Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1 | /* |
Maciej Żenczykowski | 49140b9 | 2024-08-07 15:06:07 -0700 | [diff] [blame] | 2 | * Copyright (C) 2018-2024 The Android Open Source Project |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
Maciej Żenczykowski | 49140b9 | 2024-08-07 15:06:07 -0700 | [diff] [blame] | 17 | #define LOG_TAG "NetBpfLoad" |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 18 | |
Maciej Żenczykowski | 49140b9 | 2024-08-07 15:06:07 -0700 | [diff] [blame] | 19 | #include <arpa/inet.h> |
| 20 | #include <cstdlib> |
| 21 | #include <dirent.h> |
| 22 | #include <elf.h> |
| 23 | #include <errno.h> |
| 24 | #include <error.h> |
| 25 | #include <fcntl.h> |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 26 | #include <fstream> |
Maciej Żenczykowski | 49140b9 | 2024-08-07 15:06:07 -0700 | [diff] [blame] | 27 | #include <inttypes.h> |
| 28 | #include <iostream> |
| 29 | #include <linux/bpf.h> |
| 30 | #include <linux/elf.h> |
| 31 | #include <linux/unistd.h> |
| 32 | #include <log/log.h> |
| 33 | #include <net/if.h> |
| 34 | #include <optional> |
| 35 | #include <stdint.h> |
| 36 | #include <stdio.h> |
| 37 | #include <stdlib.h> |
| 38 | #include <string.h> |
| 39 | #include <string> |
| 40 | #include <sys/mman.h> |
| 41 | #include <sys/socket.h> |
| 42 | #include <sys/stat.h> |
| 43 | #include <sys/types.h> |
| 44 | #include <sys/utsname.h> |
| 45 | #include <sys/wait.h> |
| 46 | #include <sysexits.h> |
| 47 | #include <unistd.h> |
| 48 | #include <unordered_map> |
| 49 | #include <vector> |
| 50 | |
| 51 | #include <android-base/cmsg.h> |
| 52 | #include <android-base/file.h> |
| 53 | #include <android-base/logging.h> |
| 54 | #include <android-base/macros.h> |
| 55 | #include <android-base/properties.h> |
| 56 | #include <android-base/stringprintf.h> |
| 57 | #include <android-base/strings.h> |
| 58 | #include <android-base/unique_fd.h> |
| 59 | #include <android/api-level.h> |
| 60 | |
| 61 | #include "BpfSyscallWrappers.h" |
| 62 | #include "bpf/BpfUtils.h" |
| 63 | #include "bpf/bpf_map_def.h" |
| 64 | |
| 65 | using android::base::EndsWith; |
| 66 | using android::base::StartsWith; |
| 67 | using android::base::unique_fd; |
| 68 | using std::ifstream; |
| 69 | using std::ios; |
| 70 | using std::optional; |
| 71 | using std::string; |
| 72 | using std::vector; |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 73 | |
| 74 | namespace android { |
| 75 | namespace bpf { |
| 76 | |
| 77 | // Bpf programs may specify per-program & per-map selinux_context and pin_subdir. |
| 78 | // |
| 79 | // The BpfLoader needs to convert these bpf.o specified strings into an enum |
| 80 | // for internal use (to check that valid values were specified for the specific |
| 81 | // location of the bpf.o file). |
| 82 | // |
| 83 | // It also needs to map selinux_context's into pin_subdir's. |
| 84 | // This is because of how selinux_context is actually implemented via pin+rename. |
| 85 | // |
| 86 | // Thus 'domain' enumerates all selinux_context's/pin_subdir's that the BpfLoader |
| 87 | // is aware of. Thus there currently needs to be a 1:1 mapping between the two. |
| 88 | // |
| 89 | enum class domain : int { |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 90 | unspecified = 0, // means just use the default for that specific pin location |
| 91 | tethering, // (S+) fs_bpf_tethering /sys/fs/bpf/tethering |
| 92 | net_private, // (T+) fs_bpf_net_private /sys/fs/bpf/net_private |
| 93 | net_shared, // (T+) fs_bpf_net_shared /sys/fs/bpf/net_shared |
| 94 | netd_readonly, // (T+) fs_bpf_netd_readonly /sys/fs/bpf/netd_readonly |
| 95 | netd_shared, // (T+) fs_bpf_netd_shared /sys/fs/bpf/netd_shared |
| 96 | }; |
| 97 | |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 98 | static constexpr domain AllDomains[] = { |
| 99 | domain::unspecified, |
| 100 | domain::tethering, |
| 101 | domain::net_private, |
| 102 | domain::net_shared, |
| 103 | domain::netd_readonly, |
| 104 | domain::netd_shared, |
| 105 | }; |
| 106 | |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 107 | static constexpr bool specified(domain d) { |
| 108 | return d != domain::unspecified; |
| 109 | } |
| 110 | |
| 111 | struct Location { |
| 112 | const char* const dir = ""; |
| 113 | const char* const prefix = ""; |
| 114 | }; |
| 115 | |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 116 | // Returns the build type string (from ro.build.type). |
Maciej Żenczykowski | 49140b9 | 2024-08-07 15:06:07 -0700 | [diff] [blame] | 117 | const std::string& getBuildType() { |
| 118 | static std::string t = android::base::GetProperty("ro.build.type", "unknown"); |
| 119 | return t; |
| 120 | } |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 121 | |
| 122 | // The following functions classify the 3 Android build types. |
| 123 | inline bool isEng() { |
| 124 | return getBuildType() == "eng"; |
| 125 | } |
Maciej Żenczykowski | 49140b9 | 2024-08-07 15:06:07 -0700 | [diff] [blame] | 126 | |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 127 | inline bool isUser() { |
| 128 | return getBuildType() == "user"; |
| 129 | } |
Maciej Żenczykowski | 49140b9 | 2024-08-07 15:06:07 -0700 | [diff] [blame] | 130 | |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 131 | inline bool isUserdebug() { |
| 132 | return getBuildType() == "userdebug"; |
| 133 | } |
| 134 | |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 135 | #define BPF_FS_PATH "/sys/fs/bpf/" |
| 136 | |
| 137 | // Size of the BPF log buffer for verifier logging |
| 138 | #define BPF_LOAD_LOG_SZ 0xfffff |
| 139 | |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 140 | static unsigned int page_size = static_cast<unsigned int>(getpagesize()); |
| 141 | |
Maciej Żenczykowski | d9fa1c0 | 2024-08-07 15:46:11 -0700 | [diff] [blame] | 142 | constexpr const char* lookupSelinuxContext(const domain d) { |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 143 | switch (d) { |
Maciej Żenczykowski | d9fa1c0 | 2024-08-07 15:46:11 -0700 | [diff] [blame] | 144 | case domain::unspecified: return ""; |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 145 | case domain::tethering: return "fs_bpf_tethering"; |
| 146 | case domain::net_private: return "fs_bpf_net_private"; |
| 147 | case domain::net_shared: return "fs_bpf_net_shared"; |
| 148 | case domain::netd_readonly: return "fs_bpf_netd_readonly"; |
| 149 | case domain::netd_shared: return "fs_bpf_netd_shared"; |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 150 | } |
| 151 | } |
| 152 | |
| 153 | domain getDomainFromSelinuxContext(const char s[BPF_SELINUX_CONTEXT_CHAR_ARRAY_SIZE]) { |
| 154 | for (domain d : AllDomains) { |
| 155 | // Not sure how to enforce this at compile time, so abort() bpfloader at boot instead |
| 156 | if (strlen(lookupSelinuxContext(d)) >= BPF_SELINUX_CONTEXT_CHAR_ARRAY_SIZE) abort(); |
| 157 | if (!strncmp(s, lookupSelinuxContext(d), BPF_SELINUX_CONTEXT_CHAR_ARRAY_SIZE)) return d; |
| 158 | } |
Maciej Żenczykowski | 6641f2f | 2024-08-07 15:34:24 -0700 | [diff] [blame] | 159 | ALOGE("unrecognized selinux_context '%-32s'", s); |
| 160 | // Note: we *can* just abort() here as we only load bpf .o files shipped |
| 161 | // in the same mainline module / apex as NetBpfLoad itself. |
| 162 | abort(); |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 163 | } |
| 164 | |
| 165 | constexpr const char* lookupPinSubdir(const domain d, const char* const unspecified = "") { |
| 166 | switch (d) { |
| 167 | case domain::unspecified: return unspecified; |
| 168 | case domain::tethering: return "tethering/"; |
| 169 | case domain::net_private: return "net_private/"; |
| 170 | case domain::net_shared: return "net_shared/"; |
| 171 | case domain::netd_readonly: return "netd_readonly/"; |
| 172 | case domain::netd_shared: return "netd_shared/"; |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 173 | } |
| 174 | }; |
| 175 | |
| 176 | domain getDomainFromPinSubdir(const char s[BPF_PIN_SUBDIR_CHAR_ARRAY_SIZE]) { |
| 177 | for (domain d : AllDomains) { |
| 178 | // Not sure how to enforce this at compile time, so abort() bpfloader at boot instead |
| 179 | if (strlen(lookupPinSubdir(d)) >= BPF_PIN_SUBDIR_CHAR_ARRAY_SIZE) abort(); |
| 180 | if (!strncmp(s, lookupPinSubdir(d), BPF_PIN_SUBDIR_CHAR_ARRAY_SIZE)) return d; |
| 181 | } |
| 182 | ALOGE("unrecognized pin_subdir '%-32s'", s); |
Maciej Żenczykowski | 6641f2f | 2024-08-07 15:34:24 -0700 | [diff] [blame] | 183 | // Note: we *can* just abort() here as we only load bpf .o files shipped |
| 184 | // in the same mainline module / apex as NetBpfLoad itself. |
| 185 | abort(); |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 186 | } |
| 187 | |
| 188 | static string pathToObjName(const string& path) { |
| 189 | // extract everything after the final slash, ie. this is the filename 'foo@1.o' or 'bar.o' |
| 190 | string filename = android::base::Split(path, "/").back(); |
| 191 | // strip off everything from the final period onwards (strip '.o' suffix), ie. 'foo@1' or 'bar' |
| 192 | string name = filename.substr(0, filename.find_last_of('.')); |
| 193 | // strip any potential @1 suffix, this will leave us with just 'foo' or 'bar' |
| 194 | // this can be used to provide duplicate programs (mux based on the bpfloader version) |
| 195 | return name.substr(0, name.find_last_of('@')); |
| 196 | } |
| 197 | |
| 198 | typedef struct { |
| 199 | const char* name; |
| 200 | enum bpf_prog_type type; |
Maciej Żenczykowski | 346831c | 2024-08-12 17:49:10 +0000 | [diff] [blame] | 201 | enum bpf_attach_type attach_type; |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 202 | } sectionType; |
| 203 | |
| 204 | /* |
| 205 | * Map section name prefixes to program types, the section name will be: |
| 206 | * SECTION(<prefix>/<name-of-program>) |
| 207 | * For example: |
| 208 | * SECTION("tracepoint/sched_switch_func") where sched_switch_funcs |
| 209 | * is the name of the program, and tracepoint is the type. |
| 210 | * |
| 211 | * However, be aware that you should not be directly using the SECTION() macro. |
| 212 | * Instead use the DEFINE_(BPF|XDP)_(PROG|MAP)... & LICENSE/CRITICAL macros. |
| 213 | * |
| 214 | * Programs shipped inside the tethering apex should be limited to networking stuff, |
| 215 | * as KPROBE, PERF_EVENT, TRACEPOINT are dangerous to use from mainline updatable code, |
| 216 | * since they are less stable abi/api and may conflict with platform uses of bpf. |
| 217 | */ |
| 218 | sectionType sectionNameTypes[] = { |
| 219 | {"bind4/", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND}, |
| 220 | {"bind6/", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND}, |
Maciej Żenczykowski | 346831c | 2024-08-12 17:49:10 +0000 | [diff] [blame] | 221 | {"cgroupskb/", BPF_PROG_TYPE_CGROUP_SKB}, |
| 222 | {"cgroupsock/", BPF_PROG_TYPE_CGROUP_SOCK}, |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 223 | {"cgroupsockcreate/", BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE}, |
| 224 | {"cgroupsockrelease/", BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE}, |
| 225 | {"connect4/", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT}, |
| 226 | {"connect6/", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT}, |
| 227 | {"egress/", BPF_PROG_TYPE_CGROUP_SKB, BPF_CGROUP_INET_EGRESS}, |
| 228 | {"getsockopt/", BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT}, |
| 229 | {"ingress/", BPF_PROG_TYPE_CGROUP_SKB, BPF_CGROUP_INET_INGRESS}, |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 230 | {"postbind4/", BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND}, |
| 231 | {"postbind6/", BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND}, |
| 232 | {"recvmsg4/", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG}, |
| 233 | {"recvmsg6/", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG}, |
Maciej Żenczykowski | 346831c | 2024-08-12 17:49:10 +0000 | [diff] [blame] | 234 | {"schedact/", BPF_PROG_TYPE_SCHED_ACT}, |
| 235 | {"schedcls/", BPF_PROG_TYPE_SCHED_CLS}, |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 236 | {"sendmsg4/", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG}, |
| 237 | {"sendmsg6/", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG}, |
| 238 | {"setsockopt/", BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT}, |
Maciej Żenczykowski | 346831c | 2024-08-12 17:49:10 +0000 | [diff] [blame] | 239 | {"skfilter/", BPF_PROG_TYPE_SOCKET_FILTER}, |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 240 | {"sockops/", BPF_PROG_TYPE_SOCK_OPS, BPF_CGROUP_SOCK_OPS}, |
| 241 | {"sysctl", BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL}, |
Maciej Żenczykowski | 346831c | 2024-08-12 17:49:10 +0000 | [diff] [blame] | 242 | {"xdp/", BPF_PROG_TYPE_XDP}, |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 243 | }; |
| 244 | |
| 245 | typedef struct { |
| 246 | enum bpf_prog_type type; |
Maciej Żenczykowski | 346831c | 2024-08-12 17:49:10 +0000 | [diff] [blame] | 247 | enum bpf_attach_type attach_type; |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 248 | string name; |
| 249 | vector<char> data; |
| 250 | vector<char> rel_data; |
| 251 | optional<struct bpf_prog_def> prog_def; |
| 252 | |
Maciej Żenczykowski | b4bade9 | 2024-08-14 23:06:54 +0000 | [diff] [blame] | 253 | unique_fd prog_fd; // fd after loading |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 254 | } codeSection; |
| 255 | |
| 256 | static int readElfHeader(ifstream& elfFile, Elf64_Ehdr* eh) { |
| 257 | elfFile.seekg(0); |
| 258 | if (elfFile.fail()) return -1; |
| 259 | |
| 260 | if (!elfFile.read((char*)eh, sizeof(*eh))) return -1; |
| 261 | |
| 262 | return 0; |
| 263 | } |
| 264 | |
Maciej Żenczykowski | b4bade9 | 2024-08-14 23:06:54 +0000 | [diff] [blame] | 265 | // Reads all section header tables into an Shdr array |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 266 | static int readSectionHeadersAll(ifstream& elfFile, vector<Elf64_Shdr>& shTable) { |
| 267 | Elf64_Ehdr eh; |
| 268 | int ret = 0; |
| 269 | |
| 270 | ret = readElfHeader(elfFile, &eh); |
| 271 | if (ret) return ret; |
| 272 | |
| 273 | elfFile.seekg(eh.e_shoff); |
| 274 | if (elfFile.fail()) return -1; |
| 275 | |
Maciej Żenczykowski | b4bade9 | 2024-08-14 23:06:54 +0000 | [diff] [blame] | 276 | // Read shdr table entries |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 277 | shTable.resize(eh.e_shnum); |
| 278 | |
| 279 | if (!elfFile.read((char*)shTable.data(), (eh.e_shnum * eh.e_shentsize))) return -ENOMEM; |
| 280 | |
| 281 | return 0; |
| 282 | } |
| 283 | |
Maciej Żenczykowski | b4bade9 | 2024-08-14 23:06:54 +0000 | [diff] [blame] | 284 | // Read a section by its index - for ex to get sec hdr strtab blob |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 285 | static int readSectionByIdx(ifstream& elfFile, int id, vector<char>& sec) { |
| 286 | vector<Elf64_Shdr> shTable; |
| 287 | int ret = readSectionHeadersAll(elfFile, shTable); |
| 288 | if (ret) return ret; |
| 289 | |
| 290 | elfFile.seekg(shTable[id].sh_offset); |
| 291 | if (elfFile.fail()) return -1; |
| 292 | |
| 293 | sec.resize(shTable[id].sh_size); |
| 294 | if (!elfFile.read(sec.data(), shTable[id].sh_size)) return -1; |
| 295 | |
| 296 | return 0; |
| 297 | } |
| 298 | |
Maciej Żenczykowski | b4bade9 | 2024-08-14 23:06:54 +0000 | [diff] [blame] | 299 | // Read whole section header string table |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 300 | static int readSectionHeaderStrtab(ifstream& elfFile, vector<char>& strtab) { |
| 301 | Elf64_Ehdr eh; |
| 302 | int ret = readElfHeader(elfFile, &eh); |
| 303 | if (ret) return ret; |
| 304 | |
| 305 | ret = readSectionByIdx(elfFile, eh.e_shstrndx, strtab); |
| 306 | if (ret) return ret; |
| 307 | |
| 308 | return 0; |
| 309 | } |
| 310 | |
Maciej Żenczykowski | b4bade9 | 2024-08-14 23:06:54 +0000 | [diff] [blame] | 311 | // Get name from offset in strtab |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 312 | static int getSymName(ifstream& elfFile, int nameOff, string& name) { |
| 313 | int ret; |
| 314 | vector<char> secStrTab; |
| 315 | |
| 316 | ret = readSectionHeaderStrtab(elfFile, secStrTab); |
| 317 | if (ret) return ret; |
| 318 | |
| 319 | if (nameOff >= (int)secStrTab.size()) return -1; |
| 320 | |
| 321 | name = string((char*)secStrTab.data() + nameOff); |
| 322 | return 0; |
| 323 | } |
| 324 | |
Maciej Żenczykowski | b4bade9 | 2024-08-14 23:06:54 +0000 | [diff] [blame] | 325 | // Reads a full section by name - example to get the GPL license |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 326 | static int readSectionByName(const char* name, ifstream& elfFile, vector<char>& data) { |
| 327 | vector<char> secStrTab; |
| 328 | vector<Elf64_Shdr> shTable; |
| 329 | int ret; |
| 330 | |
| 331 | ret = readSectionHeadersAll(elfFile, shTable); |
| 332 | if (ret) return ret; |
| 333 | |
| 334 | ret = readSectionHeaderStrtab(elfFile, secStrTab); |
| 335 | if (ret) return ret; |
| 336 | |
| 337 | for (int i = 0; i < (int)shTable.size(); i++) { |
| 338 | char* secname = secStrTab.data() + shTable[i].sh_name; |
| 339 | if (!secname) continue; |
| 340 | |
| 341 | if (!strcmp(secname, name)) { |
| 342 | vector<char> dataTmp; |
| 343 | dataTmp.resize(shTable[i].sh_size); |
| 344 | |
| 345 | elfFile.seekg(shTable[i].sh_offset); |
| 346 | if (elfFile.fail()) return -1; |
| 347 | |
| 348 | if (!elfFile.read((char*)dataTmp.data(), shTable[i].sh_size)) return -1; |
| 349 | |
| 350 | data = dataTmp; |
| 351 | return 0; |
| 352 | } |
| 353 | } |
| 354 | return -2; |
| 355 | } |
| 356 | |
| 357 | unsigned int readSectionUint(const char* name, ifstream& elfFile, unsigned int defVal) { |
| 358 | vector<char> theBytes; |
| 359 | int ret = readSectionByName(name, elfFile, theBytes); |
| 360 | if (ret) { |
| 361 | ALOGD("Couldn't find section %s (defaulting to %u [0x%x]).", name, defVal, defVal); |
| 362 | return defVal; |
| 363 | } else if (theBytes.size() < sizeof(unsigned int)) { |
| 364 | ALOGE("Section %s too short (defaulting to %u [0x%x]).", name, defVal, defVal); |
| 365 | return defVal; |
| 366 | } else { |
| 367 | // decode first 4 bytes as LE32 uint, there will likely be more bytes due to alignment. |
| 368 | unsigned int value = static_cast<unsigned char>(theBytes[3]); |
| 369 | value <<= 8; |
| 370 | value += static_cast<unsigned char>(theBytes[2]); |
| 371 | value <<= 8; |
| 372 | value += static_cast<unsigned char>(theBytes[1]); |
| 373 | value <<= 8; |
| 374 | value += static_cast<unsigned char>(theBytes[0]); |
| 375 | ALOGI("Section %s value is %u [0x%x]", name, value, value); |
| 376 | return value; |
| 377 | } |
| 378 | } |
| 379 | |
| 380 | static int readSectionByType(ifstream& elfFile, int type, vector<char>& data) { |
| 381 | int ret; |
| 382 | vector<Elf64_Shdr> shTable; |
| 383 | |
| 384 | ret = readSectionHeadersAll(elfFile, shTable); |
| 385 | if (ret) return ret; |
| 386 | |
| 387 | for (int i = 0; i < (int)shTable.size(); i++) { |
| 388 | if ((int)shTable[i].sh_type != type) continue; |
| 389 | |
| 390 | vector<char> dataTmp; |
| 391 | dataTmp.resize(shTable[i].sh_size); |
| 392 | |
| 393 | elfFile.seekg(shTable[i].sh_offset); |
| 394 | if (elfFile.fail()) return -1; |
| 395 | |
| 396 | if (!elfFile.read((char*)dataTmp.data(), shTable[i].sh_size)) return -1; |
| 397 | |
| 398 | data = dataTmp; |
| 399 | return 0; |
| 400 | } |
| 401 | return -2; |
| 402 | } |
| 403 | |
| 404 | static bool symCompare(Elf64_Sym a, Elf64_Sym b) { |
| 405 | return (a.st_value < b.st_value); |
| 406 | } |
| 407 | |
| 408 | static int readSymTab(ifstream& elfFile, int sort, vector<Elf64_Sym>& data) { |
| 409 | int ret, numElems; |
| 410 | Elf64_Sym* buf; |
| 411 | vector<char> secData; |
| 412 | |
| 413 | ret = readSectionByType(elfFile, SHT_SYMTAB, secData); |
| 414 | if (ret) return ret; |
| 415 | |
| 416 | buf = (Elf64_Sym*)secData.data(); |
| 417 | numElems = (secData.size() / sizeof(Elf64_Sym)); |
| 418 | data.assign(buf, buf + numElems); |
| 419 | |
| 420 | if (sort) std::sort(data.begin(), data.end(), symCompare); |
| 421 | return 0; |
| 422 | } |
| 423 | |
| 424 | static enum bpf_prog_type getSectionType(string& name) { |
| 425 | for (auto& snt : sectionNameTypes) |
| 426 | if (StartsWith(name, snt.name)) return snt.type; |
| 427 | |
| 428 | return BPF_PROG_TYPE_UNSPEC; |
| 429 | } |
| 430 | |
Maciej Żenczykowski | e666d85 | 2024-08-15 15:03:38 -0700 | [diff] [blame] | 431 | static int readProgDefs(ifstream& elfFile, vector<struct bpf_prog_def>& pd) { |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 432 | vector<char> pdData; |
| 433 | int ret = readSectionByName("progs", elfFile, pdData); |
| 434 | if (ret) return ret; |
| 435 | |
Maciej Żenczykowski | e666d85 | 2024-08-15 15:03:38 -0700 | [diff] [blame] | 436 | if (pdData.size() % sizeof(struct bpf_prog_def)) { |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 437 | ALOGE("readProgDefs failed due to improper sized progs section, %zu %% %zu != 0", |
Maciej Żenczykowski | e666d85 | 2024-08-15 15:03:38 -0700 | [diff] [blame] | 438 | pdData.size(), sizeof(struct bpf_prog_def)); |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 439 | return -1; |
| 440 | }; |
| 441 | |
Maciej Żenczykowski | e666d85 | 2024-08-15 15:03:38 -0700 | [diff] [blame] | 442 | pd.resize(pdData.size() / sizeof(struct bpf_prog_def)); |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 443 | |
| 444 | const char* dataPtr = pdData.data(); |
| 445 | for (auto& p : pd) { |
Maciej Żenczykowski | e666d85 | 2024-08-15 15:03:38 -0700 | [diff] [blame] | 446 | // Copy the structure from the ELF file and move to the next one. |
| 447 | memcpy(&p, dataPtr, sizeof(struct bpf_prog_def)); |
| 448 | dataPtr += sizeof(struct bpf_prog_def); |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 449 | } |
| 450 | return 0; |
| 451 | } |
| 452 | |
| 453 | static int getSectionSymNames(ifstream& elfFile, const string& sectionName, vector<string>& names, |
| 454 | optional<unsigned> symbolType = std::nullopt) { |
| 455 | int ret; |
| 456 | string name; |
| 457 | vector<Elf64_Sym> symtab; |
| 458 | vector<Elf64_Shdr> shTable; |
| 459 | |
| 460 | ret = readSymTab(elfFile, 1 /* sort */, symtab); |
| 461 | if (ret) return ret; |
| 462 | |
Maciej Żenczykowski | b4bade9 | 2024-08-14 23:06:54 +0000 | [diff] [blame] | 463 | // Get index of section |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 464 | ret = readSectionHeadersAll(elfFile, shTable); |
| 465 | if (ret) return ret; |
| 466 | |
| 467 | int sec_idx = -1; |
| 468 | for (int i = 0; i < (int)shTable.size(); i++) { |
| 469 | ret = getSymName(elfFile, shTable[i].sh_name, name); |
| 470 | if (ret) return ret; |
| 471 | |
| 472 | if (!name.compare(sectionName)) { |
| 473 | sec_idx = i; |
| 474 | break; |
| 475 | } |
| 476 | } |
| 477 | |
Maciej Żenczykowski | b4bade9 | 2024-08-14 23:06:54 +0000 | [diff] [blame] | 478 | // No section found with matching name |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 479 | if (sec_idx == -1) { |
| 480 | ALOGW("No %s section could be found in elf object", sectionName.c_str()); |
| 481 | return -1; |
| 482 | } |
| 483 | |
| 484 | for (int i = 0; i < (int)symtab.size(); i++) { |
| 485 | if (symbolType.has_value() && ELF_ST_TYPE(symtab[i].st_info) != symbolType) continue; |
| 486 | |
| 487 | if (symtab[i].st_shndx == sec_idx) { |
| 488 | string s; |
| 489 | ret = getSymName(elfFile, symtab[i].st_name, s); |
| 490 | if (ret) return ret; |
| 491 | names.push_back(s); |
| 492 | } |
| 493 | } |
| 494 | |
| 495 | return 0; |
| 496 | } |
| 497 | |
Maciej Żenczykowski | b4bade9 | 2024-08-14 23:06:54 +0000 | [diff] [blame] | 498 | // Read a section by its index - for ex to get sec hdr strtab blob |
Maciej Żenczykowski | e666d85 | 2024-08-15 15:03:38 -0700 | [diff] [blame] | 499 | static int readCodeSections(ifstream& elfFile, vector<codeSection>& cs) { |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 500 | vector<Elf64_Shdr> shTable; |
| 501 | int entries, ret = 0; |
| 502 | |
| 503 | ret = readSectionHeadersAll(elfFile, shTable); |
| 504 | if (ret) return ret; |
| 505 | entries = shTable.size(); |
| 506 | |
| 507 | vector<struct bpf_prog_def> pd; |
Maciej Żenczykowski | e666d85 | 2024-08-15 15:03:38 -0700 | [diff] [blame] | 508 | ret = readProgDefs(elfFile, pd); |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 509 | if (ret) return ret; |
| 510 | vector<string> progDefNames; |
| 511 | ret = getSectionSymNames(elfFile, "progs", progDefNames); |
| 512 | if (!pd.empty() && ret) return ret; |
| 513 | |
| 514 | for (int i = 0; i < entries; i++) { |
| 515 | string name; |
| 516 | codeSection cs_temp; |
| 517 | cs_temp.type = BPF_PROG_TYPE_UNSPEC; |
| 518 | |
| 519 | ret = getSymName(elfFile, shTable[i].sh_name, name); |
| 520 | if (ret) return ret; |
| 521 | |
| 522 | enum bpf_prog_type ptype = getSectionType(name); |
| 523 | |
| 524 | if (ptype == BPF_PROG_TYPE_UNSPEC) continue; |
| 525 | |
| 526 | // This must be done before '/' is replaced with '_'. |
Maciej Żenczykowski | 346831c | 2024-08-12 17:49:10 +0000 | [diff] [blame] | 527 | for (auto& snt : sectionNameTypes) |
| 528 | if (StartsWith(name, snt.name)) cs_temp.attach_type = snt.attach_type; |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 529 | |
| 530 | string oldName = name; |
| 531 | |
| 532 | // convert all slashes to underscores |
| 533 | std::replace(name.begin(), name.end(), '/', '_'); |
| 534 | |
| 535 | cs_temp.type = ptype; |
| 536 | cs_temp.name = name; |
| 537 | |
| 538 | ret = readSectionByIdx(elfFile, i, cs_temp.data); |
| 539 | if (ret) return ret; |
| 540 | ALOGV("Loaded code section %d (%s)", i, name.c_str()); |
| 541 | |
| 542 | vector<string> csSymNames; |
| 543 | ret = getSectionSymNames(elfFile, oldName, csSymNames, STT_FUNC); |
| 544 | if (ret || !csSymNames.size()) return ret; |
| 545 | for (size_t i = 0; i < progDefNames.size(); ++i) { |
| 546 | if (!progDefNames[i].compare(csSymNames[0] + "_def")) { |
| 547 | cs_temp.prog_def = pd[i]; |
| 548 | break; |
| 549 | } |
| 550 | } |
| 551 | |
Maciej Żenczykowski | b4bade9 | 2024-08-14 23:06:54 +0000 | [diff] [blame] | 552 | // Check for rel section |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 553 | if (cs_temp.data.size() > 0 && i < entries) { |
| 554 | ret = getSymName(elfFile, shTable[i + 1].sh_name, name); |
| 555 | if (ret) return ret; |
| 556 | |
| 557 | if (name == (".rel" + oldName)) { |
| 558 | ret = readSectionByIdx(elfFile, i + 1, cs_temp.rel_data); |
| 559 | if (ret) return ret; |
| 560 | ALOGV("Loaded relo section %d (%s)", i, name.c_str()); |
| 561 | } |
| 562 | } |
| 563 | |
| 564 | if (cs_temp.data.size() > 0) { |
| 565 | cs.push_back(std::move(cs_temp)); |
| 566 | ALOGV("Adding section %d to cs list", i); |
| 567 | } |
| 568 | } |
| 569 | return 0; |
| 570 | } |
| 571 | |
| 572 | static int getSymNameByIdx(ifstream& elfFile, int index, string& name) { |
| 573 | vector<Elf64_Sym> symtab; |
| 574 | int ret = 0; |
| 575 | |
| 576 | ret = readSymTab(elfFile, 0 /* !sort */, symtab); |
| 577 | if (ret) return ret; |
| 578 | |
| 579 | if (index >= (int)symtab.size()) return -1; |
| 580 | |
| 581 | return getSymName(elfFile, symtab[index].st_name, name); |
| 582 | } |
| 583 | |
| 584 | static bool mapMatchesExpectations(const unique_fd& fd, const string& mapName, |
| 585 | const struct bpf_map_def& mapDef, const enum bpf_map_type type) { |
| 586 | // bpfGetFd... family of functions require at minimum a 4.14 kernel, |
| 587 | // so on 4.9-T kernels just pretend the map matches our expectations. |
| 588 | // Additionally we'll get almost equivalent test coverage on newer devices/kernels. |
| 589 | // This is because the primary failure mode we're trying to detect here |
| 590 | // is either a source code misconfiguration (which is likely kernel independent) |
| 591 | // or a newly introduced kernel feature/bug (which is unlikely to get backported to 4.9). |
| 592 | if (!isAtLeastKernelVersion(4, 14, 0)) return true; |
| 593 | |
| 594 | // Assuming fd is a valid Bpf Map file descriptor then |
| 595 | // all the following should always succeed on a 4.14+ kernel. |
| 596 | // If they somehow do fail, they'll return -1 (and set errno), |
| 597 | // which should then cause (among others) a key_size mismatch. |
| 598 | int fd_type = bpfGetFdMapType(fd); |
| 599 | int fd_key_size = bpfGetFdKeySize(fd); |
| 600 | int fd_value_size = bpfGetFdValueSize(fd); |
| 601 | int fd_max_entries = bpfGetFdMaxEntries(fd); |
| 602 | int fd_map_flags = bpfGetFdMapFlags(fd); |
| 603 | |
| 604 | // DEVMAPs are readonly from the bpf program side's point of view, as such |
| 605 | // the kernel in kernel/bpf/devmap.c dev_map_init_map() will set the flag |
| 606 | int desired_map_flags = (int)mapDef.map_flags; |
| 607 | if (type == BPF_MAP_TYPE_DEVMAP || type == BPF_MAP_TYPE_DEVMAP_HASH) |
| 608 | desired_map_flags |= BPF_F_RDONLY_PROG; |
| 609 | |
| 610 | // The .h file enforces that this is a power of two, and page size will |
| 611 | // also always be a power of two, so this logic is actually enough to |
| 612 | // force it to be a multiple of the page size, as required by the kernel. |
| 613 | unsigned int desired_max_entries = mapDef.max_entries; |
| 614 | if (type == BPF_MAP_TYPE_RINGBUF) { |
| 615 | if (desired_max_entries < page_size) desired_max_entries = page_size; |
| 616 | } |
| 617 | |
| 618 | // The following checks should *never* trigger, if one of them somehow does, |
| 619 | // it probably means a bpf .o file has been changed/replaced at runtime |
| 620 | // and bpfloader was manually rerun (normally it should only run *once* |
| 621 | // early during the boot process). |
| 622 | // Another possibility is that something is misconfigured in the code: |
| 623 | // most likely a shared map is declared twice differently. |
| 624 | // But such a change should never be checked into the source tree... |
| 625 | if ((fd_type == type) && |
| 626 | (fd_key_size == (int)mapDef.key_size) && |
| 627 | (fd_value_size == (int)mapDef.value_size) && |
| 628 | (fd_max_entries == (int)desired_max_entries) && |
| 629 | (fd_map_flags == desired_map_flags)) { |
| 630 | return true; |
| 631 | } |
| 632 | |
| 633 | ALOGE("bpf map name %s mismatch: desired/found: " |
| 634 | "type:%d/%d key:%u/%d value:%u/%d entries:%u/%d flags:%u/%d", |
| 635 | mapName.c_str(), type, fd_type, mapDef.key_size, fd_key_size, mapDef.value_size, |
| 636 | fd_value_size, mapDef.max_entries, fd_max_entries, desired_map_flags, fd_map_flags); |
| 637 | return false; |
| 638 | } |
| 639 | |
| 640 | static int createMaps(const char* elfPath, ifstream& elfFile, vector<unique_fd>& mapFds, |
Maciej Żenczykowski | acd4aa8 | 2024-08-15 15:16:48 -0700 | [diff] [blame^] | 641 | const char* prefix, const unsigned int bpfloader_ver) { |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 642 | int ret; |
| 643 | vector<char> mdData; |
| 644 | vector<struct bpf_map_def> md; |
| 645 | vector<string> mapNames; |
| 646 | string objName = pathToObjName(string(elfPath)); |
| 647 | |
| 648 | ret = readSectionByName("maps", elfFile, mdData); |
| 649 | if (ret == -2) return 0; // no maps to read |
| 650 | if (ret) return ret; |
| 651 | |
Maciej Żenczykowski | acd4aa8 | 2024-08-15 15:16:48 -0700 | [diff] [blame^] | 652 | if (mdData.size() % sizeof(struct bpf_map_def)) { |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 653 | ALOGE("createMaps failed due to improper sized maps section, %zu %% %zu != 0", |
Maciej Żenczykowski | acd4aa8 | 2024-08-15 15:16:48 -0700 | [diff] [blame^] | 654 | mdData.size(), sizeof(struct bpf_map_def)); |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 655 | return -1; |
| 656 | }; |
| 657 | |
Maciej Żenczykowski | acd4aa8 | 2024-08-15 15:16:48 -0700 | [diff] [blame^] | 658 | md.resize(mdData.size() / sizeof(struct bpf_map_def)); |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 659 | |
| 660 | const char* dataPtr = mdData.data(); |
| 661 | for (auto& m : md) { |
Maciej Żenczykowski | acd4aa8 | 2024-08-15 15:16:48 -0700 | [diff] [blame^] | 662 | // Copy the structure from the ELF file and move to the next one. |
| 663 | memcpy(&m, dataPtr, sizeof(struct bpf_map_def)); |
| 664 | dataPtr += sizeof(struct bpf_map_def); |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 665 | } |
| 666 | |
| 667 | ret = getSectionSymNames(elfFile, "maps", mapNames); |
| 668 | if (ret) return ret; |
| 669 | |
| 670 | unsigned kvers = kernelVersion(); |
| 671 | |
| 672 | for (int i = 0; i < (int)mapNames.size(); i++) { |
| 673 | if (md[i].zero != 0) abort(); |
| 674 | |
| 675 | if (bpfloader_ver < md[i].bpfloader_min_ver) { |
| 676 | ALOGI("skipping map %s which requires bpfloader min ver 0x%05x", mapNames[i].c_str(), |
| 677 | md[i].bpfloader_min_ver); |
| 678 | mapFds.push_back(unique_fd()); |
| 679 | continue; |
| 680 | } |
| 681 | |
| 682 | if (bpfloader_ver >= md[i].bpfloader_max_ver) { |
| 683 | ALOGI("skipping map %s which requires bpfloader max ver 0x%05x", mapNames[i].c_str(), |
| 684 | md[i].bpfloader_max_ver); |
| 685 | mapFds.push_back(unique_fd()); |
| 686 | continue; |
| 687 | } |
| 688 | |
| 689 | if (kvers < md[i].min_kver) { |
| 690 | ALOGI("skipping map %s which requires kernel version 0x%x >= 0x%x", |
| 691 | mapNames[i].c_str(), kvers, md[i].min_kver); |
| 692 | mapFds.push_back(unique_fd()); |
| 693 | continue; |
| 694 | } |
| 695 | |
| 696 | if (kvers >= md[i].max_kver) { |
| 697 | ALOGI("skipping map %s which requires kernel version 0x%x < 0x%x", |
| 698 | mapNames[i].c_str(), kvers, md[i].max_kver); |
| 699 | mapFds.push_back(unique_fd()); |
| 700 | continue; |
| 701 | } |
| 702 | |
| 703 | if ((md[i].ignore_on_eng && isEng()) || (md[i].ignore_on_user && isUser()) || |
| 704 | (md[i].ignore_on_userdebug && isUserdebug())) { |
| 705 | ALOGI("skipping map %s which is ignored on %s builds", mapNames[i].c_str(), |
| 706 | getBuildType().c_str()); |
| 707 | mapFds.push_back(unique_fd()); |
| 708 | continue; |
| 709 | } |
| 710 | |
| 711 | if ((isArm() && isKernel32Bit() && md[i].ignore_on_arm32) || |
| 712 | (isArm() && isKernel64Bit() && md[i].ignore_on_aarch64) || |
| 713 | (isX86() && isKernel32Bit() && md[i].ignore_on_x86_32) || |
| 714 | (isX86() && isKernel64Bit() && md[i].ignore_on_x86_64) || |
| 715 | (isRiscV() && md[i].ignore_on_riscv64)) { |
| 716 | ALOGI("skipping map %s which is ignored on %s", mapNames[i].c_str(), |
| 717 | describeArch()); |
| 718 | mapFds.push_back(unique_fd()); |
| 719 | continue; |
| 720 | } |
| 721 | |
| 722 | enum bpf_map_type type = md[i].type; |
| 723 | if (type == BPF_MAP_TYPE_DEVMAP && !isAtLeastKernelVersion(4, 14, 0)) { |
| 724 | // On Linux Kernels older than 4.14 this map type doesn't exist, but it can kind |
| 725 | // of be approximated: ARRAY has the same userspace api, though it is not usable |
| 726 | // by the same ebpf programs. However, that's okay because the bpf_redirect_map() |
| 727 | // helper doesn't exist on 4.9-T anyway (so the bpf program would fail to load, |
| 728 | // and thus needs to be tagged as 4.14+ either way), so there's nothing useful you |
| 729 | // could do with a DEVMAP anyway (that isn't already provided by an ARRAY)... |
| 730 | // Hence using an ARRAY instead of a DEVMAP simply makes life easier for userspace. |
| 731 | type = BPF_MAP_TYPE_ARRAY; |
| 732 | } |
| 733 | if (type == BPF_MAP_TYPE_DEVMAP_HASH && !isAtLeastKernelVersion(5, 4, 0)) { |
| 734 | // On Linux Kernels older than 5.4 this map type doesn't exist, but it can kind |
| 735 | // of be approximated: HASH has the same userspace visible api. |
| 736 | // However it cannot be used by ebpf programs in the same way. |
| 737 | // Since bpf_redirect_map() only requires 4.14, a program using a DEVMAP_HASH map |
| 738 | // would fail to load (due to trying to redirect to a HASH instead of DEVMAP_HASH). |
| 739 | // One must thus tag any BPF_MAP_TYPE_DEVMAP_HASH + bpf_redirect_map() using |
| 740 | // programs as being 5.4+... |
| 741 | type = BPF_MAP_TYPE_HASH; |
| 742 | } |
| 743 | |
| 744 | // The .h file enforces that this is a power of two, and page size will |
| 745 | // also always be a power of two, so this logic is actually enough to |
| 746 | // force it to be a multiple of the page size, as required by the kernel. |
| 747 | unsigned int max_entries = md[i].max_entries; |
| 748 | if (type == BPF_MAP_TYPE_RINGBUF) { |
| 749 | if (max_entries < page_size) max_entries = page_size; |
| 750 | } |
| 751 | |
| 752 | domain selinux_context = getDomainFromSelinuxContext(md[i].selinux_context); |
| 753 | if (specified(selinux_context)) { |
Maciej Żenczykowski | 27b535a | 2024-08-15 19:46:46 +0000 | [diff] [blame] | 754 | ALOGV("map %s selinux_context [%-32s] -> %d -> '%s' (%s)", mapNames[i].c_str(), |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 755 | md[i].selinux_context, static_cast<int>(selinux_context), |
| 756 | lookupSelinuxContext(selinux_context), lookupPinSubdir(selinux_context)); |
| 757 | } |
| 758 | |
| 759 | domain pin_subdir = getDomainFromPinSubdir(md[i].pin_subdir); |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 760 | if (specified(pin_subdir)) { |
Maciej Żenczykowski | 27b535a | 2024-08-15 19:46:46 +0000 | [diff] [blame] | 761 | ALOGV("map %s pin_subdir [%-32s] -> %d -> '%s'", mapNames[i].c_str(), md[i].pin_subdir, |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 762 | static_cast<int>(pin_subdir), lookupPinSubdir(pin_subdir)); |
| 763 | } |
| 764 | |
| 765 | // Format of pin location is /sys/fs/bpf/<pin_subdir|prefix>map_<objName>_<mapName> |
| 766 | // except that maps shared across .o's have empty <objName> |
| 767 | // Note: <objName> refers to the extension-less basename of the .o file (without @ suffix). |
| 768 | string mapPinLoc = string(BPF_FS_PATH) + lookupPinSubdir(pin_subdir, prefix) + "map_" + |
| 769 | (md[i].shared ? "" : objName) + "_" + mapNames[i]; |
| 770 | bool reuse = false; |
| 771 | unique_fd fd; |
| 772 | int saved_errno; |
| 773 | |
| 774 | if (access(mapPinLoc.c_str(), F_OK) == 0) { |
| 775 | fd.reset(mapRetrieveRO(mapPinLoc.c_str())); |
| 776 | saved_errno = errno; |
| 777 | ALOGD("bpf_create_map reusing map %s, ret: %d", mapNames[i].c_str(), fd.get()); |
| 778 | reuse = true; |
| 779 | } else { |
| 780 | union bpf_attr req = { |
| 781 | .map_type = type, |
| 782 | .key_size = md[i].key_size, |
| 783 | .value_size = md[i].value_size, |
| 784 | .max_entries = max_entries, |
| 785 | .map_flags = md[i].map_flags, |
| 786 | }; |
| 787 | if (isAtLeastKernelVersion(4, 15, 0)) |
| 788 | strlcpy(req.map_name, mapNames[i].c_str(), sizeof(req.map_name)); |
| 789 | fd.reset(bpf(BPF_MAP_CREATE, req)); |
| 790 | saved_errno = errno; |
| 791 | ALOGD("bpf_create_map name %s, ret: %d", mapNames[i].c_str(), fd.get()); |
| 792 | } |
| 793 | |
| 794 | if (!fd.ok()) return -saved_errno; |
| 795 | |
| 796 | // When reusing a pinned map, we need to check the map type/sizes/etc match, but for |
| 797 | // safety (since reuse code path is rare) run these checks even if we just created it. |
| 798 | // We assume failure is due to pinned map mismatch, hence the 'NOT UNIQUE' return code. |
| 799 | if (!mapMatchesExpectations(fd, mapNames[i], md[i], type)) return -ENOTUNIQ; |
| 800 | |
| 801 | if (!reuse) { |
| 802 | if (specified(selinux_context)) { |
| 803 | string createLoc = string(BPF_FS_PATH) + lookupPinSubdir(selinux_context) + |
| 804 | "tmp_map_" + objName + "_" + mapNames[i]; |
| 805 | ret = bpfFdPin(fd, createLoc.c_str()); |
| 806 | if (ret) { |
| 807 | int err = errno; |
| 808 | ALOGE("create %s -> %d [%d:%s]", createLoc.c_str(), ret, err, strerror(err)); |
| 809 | return -err; |
| 810 | } |
| 811 | ret = renameat2(AT_FDCWD, createLoc.c_str(), |
| 812 | AT_FDCWD, mapPinLoc.c_str(), RENAME_NOREPLACE); |
| 813 | if (ret) { |
| 814 | int err = errno; |
| 815 | ALOGE("rename %s %s -> %d [%d:%s]", createLoc.c_str(), mapPinLoc.c_str(), ret, |
| 816 | err, strerror(err)); |
| 817 | return -err; |
| 818 | } |
| 819 | } else { |
| 820 | ret = bpfFdPin(fd, mapPinLoc.c_str()); |
| 821 | if (ret) { |
| 822 | int err = errno; |
| 823 | ALOGE("pin %s -> %d [%d:%s]", mapPinLoc.c_str(), ret, err, strerror(err)); |
| 824 | return -err; |
| 825 | } |
| 826 | } |
| 827 | ret = chmod(mapPinLoc.c_str(), md[i].mode); |
| 828 | if (ret) { |
| 829 | int err = errno; |
| 830 | ALOGE("chmod(%s, 0%o) = %d [%d:%s]", mapPinLoc.c_str(), md[i].mode, ret, err, |
| 831 | strerror(err)); |
| 832 | return -err; |
| 833 | } |
| 834 | ret = chown(mapPinLoc.c_str(), (uid_t)md[i].uid, (gid_t)md[i].gid); |
| 835 | if (ret) { |
| 836 | int err = errno; |
| 837 | ALOGE("chown(%s, %u, %u) = %d [%d:%s]", mapPinLoc.c_str(), md[i].uid, md[i].gid, |
| 838 | ret, err, strerror(err)); |
| 839 | return -err; |
| 840 | } |
| 841 | } |
| 842 | |
| 843 | int mapId = bpfGetFdMapId(fd); |
| 844 | if (mapId == -1) { |
| 845 | ALOGE("bpfGetFdMapId failed, ret: %d [%d]", mapId, errno); |
| 846 | } else { |
| 847 | ALOGI("map %s id %d", mapPinLoc.c_str(), mapId); |
| 848 | } |
| 849 | |
| 850 | mapFds.push_back(std::move(fd)); |
| 851 | } |
| 852 | |
| 853 | return ret; |
| 854 | } |
| 855 | |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 856 | static void applyRelo(void* insnsPtr, Elf64_Addr offset, int fd) { |
| 857 | int insnIndex; |
| 858 | struct bpf_insn *insn, *insns; |
| 859 | |
| 860 | insns = (struct bpf_insn*)(insnsPtr); |
| 861 | |
| 862 | insnIndex = offset / sizeof(struct bpf_insn); |
| 863 | insn = &insns[insnIndex]; |
| 864 | |
| 865 | // Occasionally might be useful for relocation debugging, but pretty spammy |
| 866 | if (0) { |
| 867 | ALOGV("applying relo to instruction at byte offset: %llu, " |
| 868 | "insn offset %d, insn %llx", |
| 869 | (unsigned long long)offset, insnIndex, *(unsigned long long*)insn); |
| 870 | } |
| 871 | |
| 872 | if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) { |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 873 | ALOGE("invalid relo for insn %d: code 0x%x", insnIndex, insn->code); |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 874 | return; |
| 875 | } |
| 876 | |
| 877 | insn->imm = fd; |
| 878 | insn->src_reg = BPF_PSEUDO_MAP_FD; |
| 879 | } |
| 880 | |
| 881 | static void applyMapRelo(ifstream& elfFile, vector<unique_fd> &mapFds, vector<codeSection>& cs) { |
| 882 | vector<string> mapNames; |
| 883 | |
| 884 | int ret = getSectionSymNames(elfFile, "maps", mapNames); |
| 885 | if (ret) return; |
| 886 | |
| 887 | for (int k = 0; k != (int)cs.size(); k++) { |
| 888 | Elf64_Rel* rel = (Elf64_Rel*)(cs[k].rel_data.data()); |
| 889 | int n_rel = cs[k].rel_data.size() / sizeof(*rel); |
| 890 | |
| 891 | for (int i = 0; i < n_rel; i++) { |
| 892 | int symIndex = ELF64_R_SYM(rel[i].r_info); |
| 893 | string symName; |
| 894 | |
| 895 | ret = getSymNameByIdx(elfFile, symIndex, symName); |
| 896 | if (ret) return; |
| 897 | |
Maciej Żenczykowski | b4bade9 | 2024-08-14 23:06:54 +0000 | [diff] [blame] | 898 | // Find the map fd and apply relo |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 899 | for (int j = 0; j < (int)mapNames.size(); j++) { |
| 900 | if (!mapNames[j].compare(symName)) { |
| 901 | applyRelo(cs[k].data.data(), rel[i].r_offset, mapFds[j]); |
| 902 | break; |
| 903 | } |
| 904 | } |
| 905 | } |
| 906 | } |
| 907 | } |
| 908 | |
| 909 | static int loadCodeSections(const char* elfPath, vector<codeSection>& cs, const string& license, |
| 910 | const char* prefix, const unsigned int bpfloader_ver) { |
| 911 | unsigned kvers = kernelVersion(); |
| 912 | |
| 913 | if (!kvers) { |
| 914 | ALOGE("unable to get kernel version"); |
| 915 | return -EINVAL; |
| 916 | } |
| 917 | |
| 918 | string objName = pathToObjName(string(elfPath)); |
| 919 | |
| 920 | for (int i = 0; i < (int)cs.size(); i++) { |
| 921 | unique_fd& fd = cs[i].prog_fd; |
| 922 | int ret; |
| 923 | string name = cs[i].name; |
| 924 | |
| 925 | if (!cs[i].prog_def.has_value()) { |
| 926 | ALOGE("[%d] '%s' missing program definition! bad bpf.o build?", i, name.c_str()); |
| 927 | return -EINVAL; |
| 928 | } |
| 929 | |
| 930 | unsigned min_kver = cs[i].prog_def->min_kver; |
| 931 | unsigned max_kver = cs[i].prog_def->max_kver; |
| 932 | ALOGD("cs[%d].name:%s min_kver:%x .max_kver:%x (kvers:%x)", i, name.c_str(), min_kver, |
| 933 | max_kver, kvers); |
| 934 | if (kvers < min_kver) continue; |
| 935 | if (kvers >= max_kver) continue; |
| 936 | |
| 937 | unsigned bpfMinVer = cs[i].prog_def->bpfloader_min_ver; |
| 938 | unsigned bpfMaxVer = cs[i].prog_def->bpfloader_max_ver; |
| 939 | domain selinux_context = getDomainFromSelinuxContext(cs[i].prog_def->selinux_context); |
| 940 | domain pin_subdir = getDomainFromPinSubdir(cs[i].prog_def->pin_subdir); |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 941 | |
| 942 | ALOGD("cs[%d].name:%s requires bpfloader version [0x%05x,0x%05x)", i, name.c_str(), |
| 943 | bpfMinVer, bpfMaxVer); |
| 944 | if (bpfloader_ver < bpfMinVer) continue; |
| 945 | if (bpfloader_ver >= bpfMaxVer) continue; |
| 946 | |
| 947 | if ((cs[i].prog_def->ignore_on_eng && isEng()) || |
| 948 | (cs[i].prog_def->ignore_on_user && isUser()) || |
| 949 | (cs[i].prog_def->ignore_on_userdebug && isUserdebug())) { |
| 950 | ALOGD("cs[%d].name:%s is ignored on %s builds", i, name.c_str(), |
| 951 | getBuildType().c_str()); |
| 952 | continue; |
| 953 | } |
| 954 | |
| 955 | if ((isArm() && isKernel32Bit() && cs[i].prog_def->ignore_on_arm32) || |
| 956 | (isArm() && isKernel64Bit() && cs[i].prog_def->ignore_on_aarch64) || |
| 957 | (isX86() && isKernel32Bit() && cs[i].prog_def->ignore_on_x86_32) || |
| 958 | (isX86() && isKernel64Bit() && cs[i].prog_def->ignore_on_x86_64) || |
| 959 | (isRiscV() && cs[i].prog_def->ignore_on_riscv64)) { |
| 960 | ALOGD("cs[%d].name:%s is ignored on %s", i, name.c_str(), describeArch()); |
| 961 | continue; |
| 962 | } |
| 963 | |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 964 | if (specified(selinux_context)) { |
Maciej Żenczykowski | 27b535a | 2024-08-15 19:46:46 +0000 | [diff] [blame] | 965 | ALOGV("prog %s selinux_context [%-32s] -> %d -> '%s' (%s)", name.c_str(), |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 966 | cs[i].prog_def->selinux_context, static_cast<int>(selinux_context), |
| 967 | lookupSelinuxContext(selinux_context), lookupPinSubdir(selinux_context)); |
| 968 | } |
| 969 | |
| 970 | if (specified(pin_subdir)) { |
Maciej Żenczykowski | 27b535a | 2024-08-15 19:46:46 +0000 | [diff] [blame] | 971 | ALOGV("prog %s pin_subdir [%-32s] -> %d -> '%s'", name.c_str(), |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 972 | cs[i].prog_def->pin_subdir, static_cast<int>(pin_subdir), |
| 973 | lookupPinSubdir(pin_subdir)); |
| 974 | } |
| 975 | |
| 976 | // strip any potential $foo suffix |
| 977 | // this can be used to provide duplicate programs |
| 978 | // conditionally loaded based on running kernel version |
| 979 | name = name.substr(0, name.find_last_of('$')); |
| 980 | |
| 981 | bool reuse = false; |
| 982 | // Format of pin location is |
| 983 | // /sys/fs/bpf/<prefix>prog_<objName>_<progName> |
| 984 | string progPinLoc = string(BPF_FS_PATH) + lookupPinSubdir(pin_subdir, prefix) + "prog_" + |
| 985 | objName + '_' + string(name); |
| 986 | if (access(progPinLoc.c_str(), F_OK) == 0) { |
| 987 | fd.reset(retrieveProgram(progPinLoc.c_str())); |
| 988 | ALOGD("New bpf prog load reusing prog %s, ret: %d (%s)", progPinLoc.c_str(), fd.get(), |
| 989 | (!fd.ok() ? std::strerror(errno) : "no error")); |
| 990 | reuse = true; |
| 991 | } else { |
| 992 | vector<char> log_buf(BPF_LOAD_LOG_SZ, 0); |
| 993 | |
| 994 | union bpf_attr req = { |
| 995 | .prog_type = cs[i].type, |
| 996 | .kern_version = kvers, |
| 997 | .license = ptr_to_u64(license.c_str()), |
| 998 | .insns = ptr_to_u64(cs[i].data.data()), |
| 999 | .insn_cnt = static_cast<__u32>(cs[i].data.size() / sizeof(struct bpf_insn)), |
| 1000 | .log_level = 1, |
| 1001 | .log_buf = ptr_to_u64(log_buf.data()), |
| 1002 | .log_size = static_cast<__u32>(log_buf.size()), |
Maciej Żenczykowski | 346831c | 2024-08-12 17:49:10 +0000 | [diff] [blame] | 1003 | .expected_attach_type = cs[i].attach_type, |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 1004 | }; |
| 1005 | if (isAtLeastKernelVersion(4, 15, 0)) |
| 1006 | strlcpy(req.prog_name, cs[i].name.c_str(), sizeof(req.prog_name)); |
| 1007 | fd.reset(bpf(BPF_PROG_LOAD, req)); |
| 1008 | |
| 1009 | ALOGD("BPF_PROG_LOAD call for %s (%s) returned fd: %d (%s)", elfPath, |
| 1010 | cs[i].name.c_str(), fd.get(), (!fd.ok() ? std::strerror(errno) : "no error")); |
| 1011 | |
| 1012 | if (!fd.ok()) { |
| 1013 | vector<string> lines = android::base::Split(log_buf.data(), "\n"); |
| 1014 | |
| 1015 | ALOGW("BPF_PROG_LOAD - BEGIN log_buf contents:"); |
| 1016 | for (const auto& line : lines) ALOGW("%s", line.c_str()); |
| 1017 | ALOGW("BPF_PROG_LOAD - END log_buf contents."); |
| 1018 | |
| 1019 | if (cs[i].prog_def->optional) { |
| 1020 | ALOGW("failed program is marked optional - continuing..."); |
| 1021 | continue; |
| 1022 | } |
| 1023 | ALOGE("non-optional program failed to load."); |
| 1024 | } |
| 1025 | } |
| 1026 | |
| 1027 | if (!fd.ok()) return fd.get(); |
| 1028 | |
| 1029 | if (!reuse) { |
| 1030 | if (specified(selinux_context)) { |
| 1031 | string createLoc = string(BPF_FS_PATH) + lookupPinSubdir(selinux_context) + |
| 1032 | "tmp_prog_" + objName + '_' + string(name); |
| 1033 | ret = bpfFdPin(fd, createLoc.c_str()); |
| 1034 | if (ret) { |
| 1035 | int err = errno; |
| 1036 | ALOGE("create %s -> %d [%d:%s]", createLoc.c_str(), ret, err, strerror(err)); |
| 1037 | return -err; |
| 1038 | } |
| 1039 | ret = renameat2(AT_FDCWD, createLoc.c_str(), |
| 1040 | AT_FDCWD, progPinLoc.c_str(), RENAME_NOREPLACE); |
| 1041 | if (ret) { |
| 1042 | int err = errno; |
| 1043 | ALOGE("rename %s %s -> %d [%d:%s]", createLoc.c_str(), progPinLoc.c_str(), ret, |
| 1044 | err, strerror(err)); |
| 1045 | return -err; |
| 1046 | } |
| 1047 | } else { |
| 1048 | ret = bpfFdPin(fd, progPinLoc.c_str()); |
| 1049 | if (ret) { |
| 1050 | int err = errno; |
| 1051 | ALOGE("create %s -> %d [%d:%s]", progPinLoc.c_str(), ret, err, strerror(err)); |
| 1052 | return -err; |
| 1053 | } |
| 1054 | } |
| 1055 | if (chmod(progPinLoc.c_str(), 0440)) { |
| 1056 | int err = errno; |
| 1057 | ALOGE("chmod %s 0440 -> [%d:%s]", progPinLoc.c_str(), err, strerror(err)); |
| 1058 | return -err; |
| 1059 | } |
| 1060 | if (chown(progPinLoc.c_str(), (uid_t)cs[i].prog_def->uid, |
| 1061 | (gid_t)cs[i].prog_def->gid)) { |
| 1062 | int err = errno; |
| 1063 | ALOGE("chown %s %d %d -> [%d:%s]", progPinLoc.c_str(), cs[i].prog_def->uid, |
| 1064 | cs[i].prog_def->gid, err, strerror(err)); |
| 1065 | return -err; |
| 1066 | } |
| 1067 | } |
| 1068 | |
| 1069 | int progId = bpfGetFdProgId(fd); |
| 1070 | if (progId == -1) { |
| 1071 | ALOGE("bpfGetFdProgId failed, ret: %d [%d]", progId, errno); |
| 1072 | } else { |
| 1073 | ALOGI("prog %s id %d", progPinLoc.c_str(), progId); |
| 1074 | } |
| 1075 | } |
| 1076 | |
| 1077 | return 0; |
| 1078 | } |
| 1079 | |
| 1080 | int loadProg(const char* const elfPath, bool* const isCritical, const unsigned int bpfloader_ver, |
| 1081 | const Location& location) { |
| 1082 | vector<char> license; |
| 1083 | vector<char> critical; |
| 1084 | vector<codeSection> cs; |
| 1085 | vector<unique_fd> mapFds; |
| 1086 | int ret; |
| 1087 | |
| 1088 | if (!isCritical) return -1; |
| 1089 | *isCritical = false; |
| 1090 | |
| 1091 | ifstream elfFile(elfPath, ios::in | ios::binary); |
| 1092 | if (!elfFile.is_open()) return -1; |
| 1093 | |
| 1094 | ret = readSectionByName("critical", elfFile, critical); |
| 1095 | *isCritical = !ret; |
| 1096 | |
| 1097 | ret = readSectionByName("license", elfFile, license); |
| 1098 | if (ret) { |
| 1099 | ALOGE("Couldn't find license in %s", elfPath); |
| 1100 | return ret; |
| 1101 | } else { |
| 1102 | ALOGD("Loading %s%s ELF object %s with license %s", |
| 1103 | *isCritical ? "critical for " : "optional", *isCritical ? (char*)critical.data() : "", |
| 1104 | elfPath, (char*)license.data()); |
| 1105 | } |
| 1106 | |
| 1107 | // the following default values are for bpfloader V0.0 format which does not include them |
| 1108 | unsigned int bpfLoaderMinVer = |
| 1109 | readSectionUint("bpfloader_min_ver", elfFile, DEFAULT_BPFLOADER_MIN_VER); |
| 1110 | unsigned int bpfLoaderMaxVer = |
| 1111 | readSectionUint("bpfloader_max_ver", elfFile, DEFAULT_BPFLOADER_MAX_VER); |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 1112 | |
| 1113 | // inclusive lower bound check |
| 1114 | if (bpfloader_ver < bpfLoaderMinVer) { |
| 1115 | ALOGI("BpfLoader version 0x%05x ignoring ELF object %s with min ver 0x%05x", |
| 1116 | bpfloader_ver, elfPath, bpfLoaderMinVer); |
| 1117 | return 0; |
| 1118 | } |
| 1119 | |
| 1120 | // exclusive upper bound check |
| 1121 | if (bpfloader_ver >= bpfLoaderMaxVer) { |
| 1122 | ALOGI("BpfLoader version 0x%05x ignoring ELF object %s with max ver 0x%05x", |
| 1123 | bpfloader_ver, elfPath, bpfLoaderMaxVer); |
| 1124 | return 0; |
| 1125 | } |
| 1126 | |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 1127 | ALOGI("BpfLoader version 0x%05x processing ELF object %s with ver [0x%05x,0x%05x)", |
| 1128 | bpfloader_ver, elfPath, bpfLoaderMinVer, bpfLoaderMaxVer); |
| 1129 | |
Maciej Żenczykowski | e666d85 | 2024-08-15 15:03:38 -0700 | [diff] [blame] | 1130 | ret = readCodeSections(elfFile, cs); |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 1131 | if (ret) { |
| 1132 | ALOGE("Couldn't read all code sections in %s", elfPath); |
| 1133 | return ret; |
| 1134 | } |
| 1135 | |
Maciej Żenczykowski | acd4aa8 | 2024-08-15 15:16:48 -0700 | [diff] [blame^] | 1136 | ret = createMaps(elfPath, elfFile, mapFds, location.prefix, bpfloader_ver); |
Maciej Żenczykowski | 6e1b425 | 2024-08-07 15:03:44 -0700 | [diff] [blame] | 1137 | if (ret) { |
| 1138 | ALOGE("Failed to create maps: (ret=%d) in %s", ret, elfPath); |
| 1139 | return ret; |
| 1140 | } |
| 1141 | |
| 1142 | for (int i = 0; i < (int)mapFds.size(); i++) |
| 1143 | ALOGV("map_fd found at %d is %d in %s", i, mapFds[i].get(), elfPath); |
| 1144 | |
| 1145 | applyMapRelo(elfFile, mapFds, cs); |
| 1146 | |
| 1147 | ret = loadCodeSections(elfPath, cs, string(license.data()), location.prefix, bpfloader_ver); |
| 1148 | if (ret) ALOGE("Failed to load programs, loadCodeSections ret=%d", ret); |
| 1149 | |
| 1150 | return ret; |
| 1151 | } |
| 1152 | |
Maciej Żenczykowski | 75c2def | 2024-04-25 14:19:14 -0700 | [diff] [blame] | 1153 | static bool exists(const char* const path) { |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1154 | int v = access(path, F_OK); |
Maciej Żenczykowski | 731acfe | 2024-04-30 10:09:57 +0000 | [diff] [blame] | 1155 | if (!v) return true; |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1156 | if (errno == ENOENT) return false; |
| 1157 | ALOGE("FATAL: access(%s, F_OK) -> %d [%d:%s]", path, v, errno, strerror(errno)); |
| 1158 | abort(); // can only hit this if permissions (likely selinux) are screwed up |
| 1159 | } |
| 1160 | |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1161 | |
Maciej Żenczykowski | 75c2def | 2024-04-25 14:19:14 -0700 | [diff] [blame] | 1162 | const Location locations[] = { |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1163 | // S+ Tethering mainline module (network_stack): tether offload |
| 1164 | { |
| 1165 | .dir = "/apex/com.android.tethering/etc/bpf/", |
| 1166 | .prefix = "tethering/", |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1167 | }, |
| 1168 | // T+ Tethering mainline module (shared with netd & system server) |
| 1169 | // netutils_wrapper (for iptables xt_bpf) has access to programs |
| 1170 | { |
| 1171 | .dir = "/apex/com.android.tethering/etc/bpf/netd_shared/", |
| 1172 | .prefix = "netd_shared/", |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1173 | }, |
| 1174 | // T+ Tethering mainline module (shared with netd & system server) |
| 1175 | // netutils_wrapper has no access, netd has read only access |
| 1176 | { |
| 1177 | .dir = "/apex/com.android.tethering/etc/bpf/netd_readonly/", |
| 1178 | .prefix = "netd_readonly/", |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1179 | }, |
| 1180 | // T+ Tethering mainline module (shared with system server) |
| 1181 | { |
| 1182 | .dir = "/apex/com.android.tethering/etc/bpf/net_shared/", |
| 1183 | .prefix = "net_shared/", |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1184 | }, |
| 1185 | // T+ Tethering mainline module (not shared, just network_stack) |
| 1186 | { |
| 1187 | .dir = "/apex/com.android.tethering/etc/bpf/net_private/", |
| 1188 | .prefix = "net_private/", |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1189 | }, |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1190 | }; |
| 1191 | |
Maciej Żenczykowski | 75c2def | 2024-04-25 14:19:14 -0700 | [diff] [blame] | 1192 | static int loadAllElfObjects(const unsigned int bpfloader_ver, const Location& location) { |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1193 | int retVal = 0; |
| 1194 | DIR* dir; |
| 1195 | struct dirent* ent; |
| 1196 | |
| 1197 | if ((dir = opendir(location.dir)) != NULL) { |
| 1198 | while ((ent = readdir(dir)) != NULL) { |
| 1199 | string s = ent->d_name; |
| 1200 | if (!EndsWith(s, ".o")) continue; |
| 1201 | |
| 1202 | string progPath(location.dir); |
| 1203 | progPath += s; |
| 1204 | |
| 1205 | bool critical; |
Maciej Żenczykowski | 75c2def | 2024-04-25 14:19:14 -0700 | [diff] [blame] | 1206 | int ret = loadProg(progPath.c_str(), &critical, bpfloader_ver, location); |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1207 | if (ret) { |
| 1208 | if (critical) retVal = ret; |
| 1209 | ALOGE("Failed to load object: %s, ret: %s", progPath.c_str(), std::strerror(-ret)); |
| 1210 | } else { |
Maciej Żenczykowski | 5c057ed | 2024-04-30 11:59:13 +0000 | [diff] [blame] | 1211 | ALOGD("Loaded object: %s", progPath.c_str()); |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1212 | } |
| 1213 | } |
| 1214 | closedir(dir); |
| 1215 | } |
| 1216 | return retVal; |
| 1217 | } |
| 1218 | |
Maciej Żenczykowski | 75c2def | 2024-04-25 14:19:14 -0700 | [diff] [blame] | 1219 | static int createSysFsBpfSubDir(const char* const prefix) { |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1220 | if (*prefix) { |
| 1221 | mode_t prevUmask = umask(0); |
| 1222 | |
| 1223 | string s = "/sys/fs/bpf/"; |
| 1224 | s += prefix; |
| 1225 | |
| 1226 | errno = 0; |
| 1227 | int ret = mkdir(s.c_str(), S_ISVTX | S_IRWXU | S_IRWXG | S_IRWXO); |
| 1228 | if (ret && errno != EEXIST) { |
| 1229 | const int err = errno; |
| 1230 | ALOGE("Failed to create directory: %s, ret: %s", s.c_str(), std::strerror(err)); |
| 1231 | return -err; |
| 1232 | } |
| 1233 | |
| 1234 | umask(prevUmask); |
| 1235 | } |
| 1236 | return 0; |
| 1237 | } |
| 1238 | |
| 1239 | // Technically 'value' doesn't need to be newline terminated, but it's best |
| 1240 | // to include a newline to match 'echo "value" > /proc/sys/...foo' behaviour, |
| 1241 | // which is usually how kernel devs test the actual sysctl interfaces. |
Maciej Żenczykowski | 75c2def | 2024-04-25 14:19:14 -0700 | [diff] [blame] | 1242 | static int writeProcSysFile(const char *filename, const char *value) { |
| 1243 | base::unique_fd fd(open(filename, O_WRONLY | O_CLOEXEC)); |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1244 | if (fd < 0) { |
| 1245 | const int err = errno; |
| 1246 | ALOGE("open('%s', O_WRONLY | O_CLOEXEC) -> %s", filename, strerror(err)); |
| 1247 | return -err; |
| 1248 | } |
| 1249 | int len = strlen(value); |
| 1250 | int v = write(fd, value, len); |
| 1251 | if (v < 0) { |
| 1252 | const int err = errno; |
| 1253 | ALOGE("write('%s', '%s', %d) -> %s", filename, value, len, strerror(err)); |
| 1254 | return -err; |
| 1255 | } |
| 1256 | if (v != len) { |
| 1257 | // In practice, due to us only using this for /proc/sys/... files, this can't happen. |
| 1258 | ALOGE("write('%s', '%s', %d) -> short write [%d]", filename, value, len, v); |
| 1259 | return -EINVAL; |
| 1260 | } |
| 1261 | return 0; |
| 1262 | } |
| 1263 | |
Maciej Żenczykowski | b60599b | 2024-02-09 12:30:52 -0800 | [diff] [blame] | 1264 | #define APEX_MOUNT_POINT "/apex/com.android.tethering" |
Maciej Żenczykowski | 2fe2db5 | 2024-02-07 01:23:58 +0000 | [diff] [blame] | 1265 | const char * const platformBpfLoader = "/system/bin/bpfloader"; |
Maciej Żenczykowski | b60599b | 2024-02-09 12:30:52 -0800 | [diff] [blame] | 1266 | |
Maciej Żenczykowski | 75c2def | 2024-04-25 14:19:14 -0700 | [diff] [blame] | 1267 | static int logTetheringApexVersion(void) { |
Maciej Żenczykowski | b60599b | 2024-02-09 12:30:52 -0800 | [diff] [blame] | 1268 | char * found_blockdev = NULL; |
| 1269 | FILE * f = NULL; |
| 1270 | char buf[4096]; |
| 1271 | |
| 1272 | f = fopen("/proc/mounts", "re"); |
| 1273 | if (!f) return 1; |
| 1274 | |
| 1275 | // /proc/mounts format: block_device [space] mount_point [space] other stuff... newline |
| 1276 | while (fgets(buf, sizeof(buf), f)) { |
| 1277 | char * blockdev = buf; |
| 1278 | char * space = strchr(blockdev, ' '); |
| 1279 | if (!space) continue; |
| 1280 | *space = '\0'; |
| 1281 | char * mntpath = space + 1; |
| 1282 | space = strchr(mntpath, ' '); |
| 1283 | if (!space) continue; |
| 1284 | *space = '\0'; |
| 1285 | if (strcmp(mntpath, APEX_MOUNT_POINT)) continue; |
| 1286 | found_blockdev = strdup(blockdev); |
| 1287 | break; |
| 1288 | } |
| 1289 | fclose(f); |
| 1290 | f = NULL; |
| 1291 | |
| 1292 | if (!found_blockdev) return 2; |
Maciej Żenczykowski | 5c057ed | 2024-04-30 11:59:13 +0000 | [diff] [blame] | 1293 | ALOGV("Found Tethering Apex mounted from blockdev %s", found_blockdev); |
Maciej Żenczykowski | b60599b | 2024-02-09 12:30:52 -0800 | [diff] [blame] | 1294 | |
| 1295 | f = fopen("/proc/mounts", "re"); |
| 1296 | if (!f) { free(found_blockdev); return 3; } |
| 1297 | |
| 1298 | while (fgets(buf, sizeof(buf), f)) { |
| 1299 | char * blockdev = buf; |
| 1300 | char * space = strchr(blockdev, ' '); |
| 1301 | if (!space) continue; |
| 1302 | *space = '\0'; |
| 1303 | char * mntpath = space + 1; |
| 1304 | space = strchr(mntpath, ' '); |
| 1305 | if (!space) continue; |
| 1306 | *space = '\0'; |
| 1307 | if (strcmp(blockdev, found_blockdev)) continue; |
| 1308 | if (strncmp(mntpath, APEX_MOUNT_POINT "@", strlen(APEX_MOUNT_POINT "@"))) continue; |
| 1309 | char * at = strchr(mntpath, '@'); |
| 1310 | if (!at) continue; |
| 1311 | char * ver = at + 1; |
| 1312 | ALOGI("Tethering APEX version %s", ver); |
| 1313 | } |
| 1314 | fclose(f); |
| 1315 | free(found_blockdev); |
| 1316 | return 0; |
| 1317 | } |
Maciej Żenczykowski | 2fe2db5 | 2024-02-07 01:23:58 +0000 | [diff] [blame] | 1318 | |
Maciej Żenczykowski | 68eab89 | 2024-05-24 03:17:59 -0700 | [diff] [blame] | 1319 | static bool hasGSM() { |
| 1320 | static string ph = base::GetProperty("gsm.current.phone-type", ""); |
| 1321 | static bool gsm = (ph != ""); |
| 1322 | static bool logged = false; |
| 1323 | if (!logged) { |
| 1324 | logged = true; |
| 1325 | ALOGI("hasGSM(gsm.current.phone-type='%s'): %s", ph.c_str(), gsm ? "true" : "false"); |
| 1326 | } |
| 1327 | return gsm; |
| 1328 | } |
| 1329 | |
| 1330 | static bool isTV() { |
| 1331 | if (hasGSM()) return false; // TVs don't do GSM |
| 1332 | |
| 1333 | static string key = base::GetProperty("ro.oem.key1", ""); |
| 1334 | static bool tv = StartsWith(key, "ATV00"); |
| 1335 | static bool logged = false; |
| 1336 | if (!logged) { |
| 1337 | logged = true; |
| 1338 | ALOGI("isTV(ro.oem.key1='%s'): %s.", key.c_str(), tv ? "true" : "false"); |
| 1339 | } |
| 1340 | return tv; |
| 1341 | } |
| 1342 | |
Maciej Żenczykowski | 6e6b209 | 2024-06-24 23:57:41 +0000 | [diff] [blame] | 1343 | static bool isWear() { |
| 1344 | static string wearSdkStr = base::GetProperty("ro.cw_build.wear_sdk.version", ""); |
| 1345 | static int wearSdkInt = base::GetIntProperty("ro.cw_build.wear_sdk.version", 0); |
| 1346 | static string buildChars = base::GetProperty("ro.build.characteristics", ""); |
| 1347 | static vector<string> v = base::Tokenize(buildChars, ","); |
| 1348 | static bool watch = (std::find(v.begin(), v.end(), "watch") != v.end()); |
| 1349 | static bool wear = (wearSdkInt > 0) || watch; |
| 1350 | static bool logged = false; |
| 1351 | if (!logged) { |
| 1352 | logged = true; |
| 1353 | ALOGI("isWear(ro.cw_build.wear_sdk.version=%d[%s] ro.build.characteristics='%s'): %s", |
| 1354 | wearSdkInt, wearSdkStr.c_str(), buildChars.c_str(), wear ? "true" : "false"); |
| 1355 | } |
| 1356 | return wear; |
| 1357 | } |
| 1358 | |
Maciej Żenczykowski | 6d151ef | 2024-04-30 23:55:57 -0700 | [diff] [blame] | 1359 | static int doLoad(char** argv, char * const envp[]) { |
Maciej Żenczykowski | 15f9731 | 2024-06-13 14:11:28 -0700 | [diff] [blame] | 1360 | const bool runningAsRoot = !getuid(); // true iff U QPR3 or V+ |
Maciej Żenczykowski | 7b95d99 | 2024-06-13 18:18:11 -0700 | [diff] [blame] | 1361 | |
Maciej Żenczykowski | 686f6ac | 2024-06-14 14:42:06 -0700 | [diff] [blame] | 1362 | // Any released device will have codename REL instead of a 'real' codename. |
| 1363 | // For safety: default to 'REL' so we default to unreleased=false on failure. |
| 1364 | const bool unreleased = (base::GetProperty("ro.build.version.codename", "REL") != "REL"); |
| 1365 | |
| 1366 | // goog/main device_api_level is bumped *way* before aosp/main api level |
| 1367 | // (the latter only gets bumped during the push of goog/main to aosp/main) |
| 1368 | // |
| 1369 | // Since we develop in AOSP, we want it to behave as if it was bumped too. |
| 1370 | // |
| 1371 | // Note that AOSP doesn't really have a good api level (for example during |
| 1372 | // early V dev cycle, it would have *all* of T, some but not all of U, and some V). |
| 1373 | // One could argue that for our purposes AOSP api level should be infinite or 10000. |
| 1374 | // |
| 1375 | // This could also cause api to be increased in goog/main or other branches, |
| 1376 | // but I can't imagine a case where this would be a problem: the problem |
| 1377 | // is rather a too low api level, rather than some ill defined high value. |
| 1378 | // For example as I write this aosp is 34/U, and goog is 35/V, |
| 1379 | // we want to treat both goog & aosp as 35/V, but it's harmless if we |
| 1380 | // treat goog as 36 because that value isn't yet defined to mean anything, |
| 1381 | // and we thus never compare against it. |
| 1382 | // |
| 1383 | // Also note that 'android_get_device_api_level()' is what the |
| 1384 | // //system/core/init/apex_init_util.cpp |
| 1385 | // apex init .XXrc parsing code uses for XX filtering. |
| 1386 | // |
| 1387 | // That code has a hack to bump <35 to 35 (to force aosp/main to parse .35rc), |
| 1388 | // but could (should?) perhaps be adjusted to match this. |
| 1389 | const int effective_api_level = android_get_device_api_level() + (int)unreleased; |
| 1390 | const bool isAtLeastT = (effective_api_level >= __ANDROID_API_T__); |
| 1391 | const bool isAtLeastU = (effective_api_level >= __ANDROID_API_U__); |
| 1392 | const bool isAtLeastV = (effective_api_level >= __ANDROID_API_V__); |
Maciej Żenczykowski | 041be52 | 2023-10-23 23:34:52 -0700 | [diff] [blame] | 1393 | |
Maciej Żenczykowski | 03ef12c | 2024-02-10 21:34:22 +0000 | [diff] [blame] | 1394 | // last in U QPR2 beta1 |
| 1395 | const bool has_platform_bpfloader_rc = exists("/system/etc/init/bpfloader.rc"); |
| 1396 | // first in U QPR2 beta~2 |
| 1397 | const bool has_platform_netbpfload_rc = exists("/system/etc/init/netbpfload.rc"); |
| 1398 | |
Maciej Żenczykowski | 6295614 | 2024-06-13 15:32:57 -0700 | [diff] [blame] | 1399 | // Version of Network BpfLoader depends on the Android OS version |
Maciej Żenczykowski | 1a3b54f | 2024-06-13 15:35:46 -0700 | [diff] [blame] | 1400 | unsigned int bpfloader_ver = 42u; // [42] BPFLOADER_MAINLINE_VERSION |
| 1401 | if (isAtLeastT) ++bpfloader_ver; // [43] BPFLOADER_MAINLINE_T_VERSION |
| 1402 | if (isAtLeastU) ++bpfloader_ver; // [44] BPFLOADER_MAINLINE_U_VERSION |
| 1403 | if (runningAsRoot) ++bpfloader_ver; // [45] BPFLOADER_MAINLINE_U_QPR3_VERSION |
| 1404 | if (isAtLeastV) ++bpfloader_ver; // [46] BPFLOADER_MAINLINE_V_VERSION |
Maciej Żenczykowski | 6295614 | 2024-06-13 15:32:57 -0700 | [diff] [blame] | 1405 | |
Maciej Żenczykowski | 7b95d99 | 2024-06-13 18:18:11 -0700 | [diff] [blame] | 1406 | ALOGI("NetBpfLoad v0.%u (%s) api:%d/%d kver:%07x (%s) uid:%d rc:%d%d", |
Maciej Żenczykowski | 686f6ac | 2024-06-14 14:42:06 -0700 | [diff] [blame] | 1407 | bpfloader_ver, argv[0], android_get_device_api_level(), effective_api_level, |
Maciej Żenczykowski | 7b95d99 | 2024-06-13 18:18:11 -0700 | [diff] [blame] | 1408 | kernelVersion(), describeArch(), getuid(), |
Maciej Żenczykowski | 03ef12c | 2024-02-10 21:34:22 +0000 | [diff] [blame] | 1409 | has_platform_bpfloader_rc, has_platform_netbpfload_rc); |
Maciej Żenczykowski | 041be52 | 2023-10-23 23:34:52 -0700 | [diff] [blame] | 1410 | |
Maciej Żenczykowski | 03ef12c | 2024-02-10 21:34:22 +0000 | [diff] [blame] | 1411 | if (!has_platform_bpfloader_rc && !has_platform_netbpfload_rc) { |
| 1412 | ALOGE("Unable to find platform's bpfloader & netbpfload init scripts."); |
| 1413 | return 1; |
| 1414 | } |
| 1415 | |
| 1416 | if (has_platform_bpfloader_rc && has_platform_netbpfload_rc) { |
| 1417 | ALOGE("Platform has *both* bpfloader & netbpfload init scripts."); |
| 1418 | return 1; |
| 1419 | } |
| 1420 | |
Maciej Żenczykowski | b60599b | 2024-02-09 12:30:52 -0800 | [diff] [blame] | 1421 | logTetheringApexVersion(); |
| 1422 | |
Maciej Żenczykowski | 11141da | 2024-03-15 18:21:33 -0700 | [diff] [blame] | 1423 | if (!isAtLeastT) { |
| 1424 | ALOGE("Impossible - not reachable on Android <T."); |
Maciej Żenczykowski | 03ef12c | 2024-02-10 21:34:22 +0000 | [diff] [blame] | 1425 | return 1; |
| 1426 | } |
| 1427 | |
Maciej Żenczykowski | c834fdb | 2024-06-02 22:24:01 +0000 | [diff] [blame] | 1428 | // both S and T require kernel 4.9 (and eBpf support) |
Maciej Żenczykowski | 75c2def | 2024-04-25 14:19:14 -0700 | [diff] [blame] | 1429 | if (isAtLeastT && !isAtLeastKernelVersion(4, 9, 0)) { |
Maciej Żenczykowski | 041be52 | 2023-10-23 23:34:52 -0700 | [diff] [blame] | 1430 | ALOGE("Android T requires kernel 4.9."); |
| 1431 | return 1; |
| 1432 | } |
| 1433 | |
Maciej Żenczykowski | c834fdb | 2024-06-02 22:24:01 +0000 | [diff] [blame] | 1434 | // U bumps the kernel requirement up to 4.14 |
Maciej Żenczykowski | 75c2def | 2024-04-25 14:19:14 -0700 | [diff] [blame] | 1435 | if (isAtLeastU && !isAtLeastKernelVersion(4, 14, 0)) { |
Maciej Żenczykowski | 041be52 | 2023-10-23 23:34:52 -0700 | [diff] [blame] | 1436 | ALOGE("Android U requires kernel 4.14."); |
| 1437 | return 1; |
| 1438 | } |
| 1439 | |
Maciej Żenczykowski | c834fdb | 2024-06-02 22:24:01 +0000 | [diff] [blame] | 1440 | // V bumps the kernel requirement up to 4.19 |
| 1441 | // see also: //system/netd/tests/kernel_test.cpp TestKernel419 |
Maciej Żenczykowski | 75c2def | 2024-04-25 14:19:14 -0700 | [diff] [blame] | 1442 | if (isAtLeastV && !isAtLeastKernelVersion(4, 19, 0)) { |
Maciej Żenczykowski | 041be52 | 2023-10-23 23:34:52 -0700 | [diff] [blame] | 1443 | ALOGE("Android V requires kernel 4.19."); |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1444 | return 1; |
| 1445 | } |
| 1446 | |
Maciej Żenczykowski | c834fdb | 2024-06-02 22:24:01 +0000 | [diff] [blame] | 1447 | // Technically already required by U, but only enforce on V+ |
| 1448 | // see also: //system/netd/tests/kernel_test.cpp TestKernel64Bit |
| 1449 | if (isAtLeastV && isKernel32Bit() && isAtLeastKernelVersion(5, 16, 0)) { |
| 1450 | ALOGE("Android V+ platform with 32 bit kernel version >= 5.16.0 is unsupported"); |
| 1451 | if (!isTV()) return 1; |
| 1452 | } |
| 1453 | |
| 1454 | // Various known ABI layout issues, particularly wrt. bpf and ipsec/xfrm. |
| 1455 | if (isAtLeastV && isKernel32Bit() && isX86()) { |
Maciej Żenczykowski | 7f6a426 | 2024-02-17 00:42:42 +0000 | [diff] [blame] | 1456 | ALOGE("Android V requires X86 kernel to be 64-bit."); |
Maciej Żenczykowski | 68eab89 | 2024-05-24 03:17:59 -0700 | [diff] [blame] | 1457 | if (!isTV()) return 1; |
Maciej Żenczykowski | 7f6a426 | 2024-02-17 00:42:42 +0000 | [diff] [blame] | 1458 | } |
| 1459 | |
Maciej Żenczykowski | c982a4b | 2024-04-25 23:04:09 -0700 | [diff] [blame] | 1460 | if (isAtLeastV) { |
| 1461 | bool bad = false; |
| 1462 | |
| 1463 | if (!isLtsKernel()) { |
| 1464 | ALOGW("Android V only supports LTS kernels."); |
| 1465 | bad = true; |
| 1466 | } |
| 1467 | |
| 1468 | #define REQUIRE(maj, min, sub) \ |
| 1469 | if (isKernelVersion(maj, min) && !isAtLeastKernelVersion(maj, min, sub)) { \ |
| 1470 | ALOGW("Android V requires %d.%d kernel to be %d.%d.%d+.", maj, min, maj, min, sub); \ |
| 1471 | bad = true; \ |
| 1472 | } |
| 1473 | |
| 1474 | REQUIRE(4, 19, 236) |
| 1475 | REQUIRE(5, 4, 186) |
| 1476 | REQUIRE(5, 10, 199) |
| 1477 | REQUIRE(5, 15, 136) |
| 1478 | REQUIRE(6, 1, 57) |
| 1479 | REQUIRE(6, 6, 0) |
| 1480 | |
| 1481 | #undef REQUIRE |
| 1482 | |
Maciej Żenczykowski | 4a0838c | 2024-06-14 20:22:20 +0000 | [diff] [blame] | 1483 | if (bad) { |
Maciej Żenczykowski | c982a4b | 2024-04-25 23:04:09 -0700 | [diff] [blame] | 1484 | ALOGE("Unsupported kernel version (%07x).", kernelVersion()); |
| 1485 | } |
| 1486 | } |
| 1487 | |
Maciej Żenczykowski | 75c2def | 2024-04-25 14:19:14 -0700 | [diff] [blame] | 1488 | if (isUserspace32bit() && isAtLeastKernelVersion(6, 2, 0)) { |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1489 | /* Android 14/U should only launch on 64-bit kernels |
| 1490 | * T launches on 5.10/5.15 |
| 1491 | * U launches on 5.15/6.1 |
| 1492 | * So >=5.16 implies isKernel64Bit() |
| 1493 | * |
| 1494 | * We thus added a test to V VTS which requires 5.16+ devices to use 64-bit kernels. |
| 1495 | * |
| 1496 | * Starting with Android V, which is the first to support a post 6.1 Linux Kernel, |
| 1497 | * we also require 64-bit userspace. |
| 1498 | * |
| 1499 | * There are various known issues with 32-bit userspace talking to various |
| 1500 | * kernel interfaces (especially CAP_NET_ADMIN ones) on a 64-bit kernel. |
| 1501 | * Some of these have userspace or kernel workarounds/hacks. |
| 1502 | * Some of them don't... |
| 1503 | * We're going to be removing the hacks. |
Maciej Żenczykowski | c834fdb | 2024-06-02 22:24:01 +0000 | [diff] [blame] | 1504 | * (for example "ANDROID: xfrm: remove in_compat_syscall() checks"). |
| 1505 | * Note: this check/enforcement only applies to *system* userspace code, |
| 1506 | * it does not affect unprivileged apps, the 32-on-64 compatibility |
| 1507 | * problems are AFAIK limited to various CAP_NET_ADMIN protected interfaces. |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1508 | * |
| 1509 | * Additionally the 32-bit kernel jit support is poor, |
| 1510 | * and 32-bit userspace on 64-bit kernel bpf ringbuffer compatibility is broken. |
| 1511 | */ |
| 1512 | ALOGE("64-bit userspace required on 6.2+ kernels."); |
Maciej Żenczykowski | 6e6b209 | 2024-06-24 23:57:41 +0000 | [diff] [blame] | 1513 | // Stuff won't work reliably, but exempt TVs & Arm Wear devices |
| 1514 | if (!isTV() && !(isWear() && isArm())) return 1; |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1515 | } |
| 1516 | |
| 1517 | // Ensure we can determine the Android build type. |
Maciej Żenczykowski | 75c2def | 2024-04-25 14:19:14 -0700 | [diff] [blame] | 1518 | if (!isEng() && !isUser() && !isUserdebug()) { |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1519 | ALOGE("Failed to determine the build type: got %s, want 'eng', 'user', or 'userdebug'", |
Maciej Żenczykowski | 75c2def | 2024-04-25 14:19:14 -0700 | [diff] [blame] | 1520 | getBuildType().c_str()); |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1521 | return 1; |
| 1522 | } |
| 1523 | |
Maciej Żenczykowski | 48e476b | 2024-06-13 14:06:49 -0700 | [diff] [blame] | 1524 | if (runningAsRoot) { |
| 1525 | // Note: writing this proc file requires being root (always the case on V+) |
| 1526 | |
Maciej Żenczykowski | f33f128 | 2023-10-24 04:41:54 -0700 | [diff] [blame] | 1527 | // Linux 5.16-rc1 changed the default to 2 (disabled but changeable), |
| 1528 | // but we need 0 (enabled) |
| 1529 | // (this writeFile is known to fail on at least 4.19, but always defaults to 0 on |
| 1530 | // pre-5.13, on 5.13+ it depends on CONFIG_BPF_UNPRIV_DEFAULT_OFF) |
| 1531 | if (writeProcSysFile("/proc/sys/kernel/unprivileged_bpf_disabled", "0\n") && |
Maciej Żenczykowski | 75c2def | 2024-04-25 14:19:14 -0700 | [diff] [blame] | 1532 | isAtLeastKernelVersion(5, 13, 0)) return 1; |
Maciej Żenczykowski | 732a141 | 2024-03-14 00:17:18 -0700 | [diff] [blame] | 1533 | } |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1534 | |
Maciej Żenczykowski | 732a141 | 2024-03-14 00:17:18 -0700 | [diff] [blame] | 1535 | if (isAtLeastU) { |
Maciej Żenczykowski | 48e476b | 2024-06-13 14:06:49 -0700 | [diff] [blame] | 1536 | // Note: writing these proc files requires CAP_NET_ADMIN |
| 1537 | // and sepolicy which is only present on U+, |
| 1538 | // on Android T and earlier versions they're written from the 'load_bpf_programs' |
| 1539 | // trigger (ie. by init itself) instead. |
| 1540 | |
Maciej Żenczykowski | f33f128 | 2023-10-24 04:41:54 -0700 | [diff] [blame] | 1541 | // Enable the eBPF JIT -- but do note that on 64-bit kernels it is likely |
| 1542 | // already force enabled by the kernel config option BPF_JIT_ALWAYS_ON. |
| 1543 | // (Note: this (open) will fail with ENOENT 'No such file or directory' if |
| 1544 | // kernel does not have CONFIG_BPF_JIT=y) |
| 1545 | // BPF_JIT is required by R VINTF (which means 4.14/4.19/5.4 kernels), |
| 1546 | // but 4.14/4.19 were released with P & Q, and only 5.4 is new in R+. |
| 1547 | if (writeProcSysFile("/proc/sys/net/core/bpf_jit_enable", "1\n")) return 1; |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1548 | |
Maciej Żenczykowski | f33f128 | 2023-10-24 04:41:54 -0700 | [diff] [blame] | 1549 | // Enable JIT kallsyms export for privileged users only |
| 1550 | // (Note: this (open) will fail with ENOENT 'No such file or directory' if |
| 1551 | // kernel does not have CONFIG_HAVE_EBPF_JIT=y) |
| 1552 | if (writeProcSysFile("/proc/sys/net/core/bpf_jit_kallsyms", "1\n")) return 1; |
| 1553 | } |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1554 | |
| 1555 | // Create all the pin subdirectories |
| 1556 | // (this must be done first to allow selinux_context and pin_subdir functionality, |
| 1557 | // which could otherwise fail with ENOENT during object pinning or renaming, |
| 1558 | // due to ordering issues) |
| 1559 | for (const auto& location : locations) { |
| 1560 | if (createSysFsBpfSubDir(location.prefix)) return 1; |
| 1561 | } |
| 1562 | |
Maciej Żenczykowski | a9209da | 2024-02-29 02:01:20 +0000 | [diff] [blame] | 1563 | // Note: there's no actual src dir for fs_bpf_loader .o's, |
| 1564 | // so it is not listed in 'locations[].prefix'. |
| 1565 | // This is because this is primarily meant for triggering genfscon rules, |
| 1566 | // and as such this will likely always be the case. |
| 1567 | // Thus we need to manually create the /sys/fs/bpf/loader subdirectory. |
| 1568 | if (createSysFsBpfSubDir("loader")) return 1; |
| 1569 | |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1570 | // Load all ELF objects, create programs and maps, and pin them |
| 1571 | for (const auto& location : locations) { |
Maciej Żenczykowski | 221b248 | 2024-03-18 14:33:10 -0700 | [diff] [blame] | 1572 | if (loadAllElfObjects(bpfloader_ver, location) != 0) { |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1573 | ALOGE("=== CRITICAL FAILURE LOADING BPF PROGRAMS FROM %s ===", location.dir); |
| 1574 | ALOGE("If this triggers reliably, you're probably missing kernel options or patches."); |
| 1575 | ALOGE("If this triggers randomly, you might be hitting some memory allocation " |
| 1576 | "problems or startup script race."); |
| 1577 | ALOGE("--- DO NOT EXPECT SYSTEM TO BOOT SUCCESSFULLY ---"); |
| 1578 | sleep(20); |
| 1579 | return 2; |
| 1580 | } |
| 1581 | } |
| 1582 | |
| 1583 | int key = 1; |
| 1584 | int value = 123; |
Maciej Żenczykowski | 75c2def | 2024-04-25 14:19:14 -0700 | [diff] [blame] | 1585 | base::unique_fd map( |
| 1586 | createMap(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value), 2, 0)); |
| 1587 | if (writeToMapEntry(map, &key, &value, BPF_ANY)) { |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1588 | ALOGE("Critical kernel bug - failure to write into index 1 of 2 element bpf map array."); |
| 1589 | return 1; |
| 1590 | } |
| 1591 | |
Maciej Żenczykowski | 15f9731 | 2024-06-13 14:11:28 -0700 | [diff] [blame] | 1592 | // leave a flag that we're done |
| 1593 | if (createSysFsBpfSubDir("netd_shared/mainline_done")) return 1; |
Maciej Żenczykowski | 58c1822 | 2023-10-20 14:40:16 -0700 | [diff] [blame] | 1594 | |
Maciej Żenczykowski | 15f9731 | 2024-06-13 14:11:28 -0700 | [diff] [blame] | 1595 | // platform bpfloader will only succeed when run as root |
| 1596 | if (!runningAsRoot) { |
| 1597 | // unreachable on U QPR3+ which always runs netbpfload as root |
| 1598 | |
| 1599 | ALOGI("mainline done, no need to transfer control to platform bpf loader."); |
| 1600 | return 0; |
Maciej Żenczykowski | 732a141 | 2024-03-14 00:17:18 -0700 | [diff] [blame] | 1601 | } |
| 1602 | |
Maciej Żenczykowski | 15f9731 | 2024-06-13 14:11:28 -0700 | [diff] [blame] | 1603 | // unreachable before U QPR3 |
| 1604 | ALOGI("done, transferring control to platform bpfloader."); |
| 1605 | |
| 1606 | // platform BpfLoader *needs* to run as root |
| 1607 | const char * args[] = { platformBpfLoader, NULL, }; |
| 1608 | execve(args[0], (char**)args, envp); |
| 1609 | ALOGE("FATAL: execve('%s'): %d[%s]", platformBpfLoader, errno, strerror(errno)); |
| 1610 | return 1; |
Maciej Żenczykowski | 60c159f | 2023-10-02 14:54:48 -0700 | [diff] [blame] | 1611 | } |
Maciej Żenczykowski | 75c2def | 2024-04-25 14:19:14 -0700 | [diff] [blame] | 1612 | |
| 1613 | } // namespace bpf |
| 1614 | } // namespace android |
| 1615 | |
Maciej Żenczykowski | 6d151ef | 2024-04-30 23:55:57 -0700 | [diff] [blame] | 1616 | int main(int argc, char** argv, char * const envp[]) { |
| 1617 | android::base::InitLogging(argv, &android::base::KernelLogger); |
| 1618 | |
| 1619 | if (argc == 2 && !strcmp(argv[1], "done")) { |
| 1620 | // we're being re-exec'ed from platform bpfloader to 'finalize' things |
| 1621 | if (!android::base::SetProperty("bpf.progs_loaded", "1")) { |
| 1622 | ALOGE("Failed to set bpf.progs_loaded property to 1."); |
| 1623 | return 125; |
| 1624 | } |
Maciej Żenczykowski | 66f1629 | 2024-05-06 23:52:33 -0700 | [diff] [blame] | 1625 | ALOGI("success."); |
Maciej Żenczykowski | 6d151ef | 2024-04-30 23:55:57 -0700 | [diff] [blame] | 1626 | return 0; |
| 1627 | } |
| 1628 | |
| 1629 | return android::bpf::doLoad(argv, envp); |
Maciej Żenczykowski | 75c2def | 2024-04-25 14:19:14 -0700 | [diff] [blame] | 1630 | } |