Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2019 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #include "service_utils.h" |
| 18 | |
Tom Cherry | f74b7f5 | 2019-09-23 16:16:54 -0700 | [diff] [blame] | 19 | #include <fcntl.h> |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 20 | #include <grp.h> |
Suren Baghdasaryan | 746ede9 | 2022-03-31 21:15:11 +0000 | [diff] [blame] | 21 | #include <map> |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 22 | #include <sys/mount.h> |
| 23 | #include <sys/prctl.h> |
| 24 | #include <sys/wait.h> |
Tom Cherry | f74b7f5 | 2019-09-23 16:16:54 -0700 | [diff] [blame] | 25 | #include <unistd.h> |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 26 | |
| 27 | #include <android-base/file.h> |
| 28 | #include <android-base/logging.h> |
| 29 | #include <android-base/properties.h> |
| 30 | #include <android-base/stringprintf.h> |
| 31 | #include <android-base/strings.h> |
Tom Cherry | 2e4c85f | 2019-07-09 13:33:36 -0700 | [diff] [blame] | 32 | #include <cutils/android_get_control_file.h> |
| 33 | #include <cutils/sockets.h> |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 34 | #include <processgroup/processgroup.h> |
| 35 | |
| 36 | #include "mount_namespace.h" |
Tom Cherry | 2e4c85f | 2019-07-09 13:33:36 -0700 | [diff] [blame] | 37 | #include "util.h" |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 38 | |
| 39 | using android::base::GetProperty; |
| 40 | using android::base::StartsWith; |
| 41 | using android::base::StringPrintf; |
| 42 | using android::base::unique_fd; |
| 43 | using android::base::WriteStringToFile; |
| 44 | |
| 45 | namespace android { |
| 46 | namespace init { |
| 47 | |
| 48 | namespace { |
| 49 | |
Tom Cherry | bbcbc2f | 2019-06-10 11:08:01 -0700 | [diff] [blame] | 50 | Result<void> EnterNamespace(int nstype, const char* path) { |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 51 | auto fd = unique_fd{open(path, O_RDONLY | O_CLOEXEC)}; |
| 52 | if (fd == -1) { |
| 53 | return ErrnoError() << "Could not open namespace at " << path; |
| 54 | } |
| 55 | if (setns(fd, nstype) == -1) { |
| 56 | return ErrnoError() << "Could not setns() namespace at " << path; |
| 57 | } |
Tom Cherry | bbcbc2f | 2019-06-10 11:08:01 -0700 | [diff] [blame] | 58 | return {}; |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 59 | } |
| 60 | |
Tom Cherry | bbcbc2f | 2019-06-10 11:08:01 -0700 | [diff] [blame] | 61 | Result<void> SetUpMountNamespace(bool remount_proc, bool remount_sys) { |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 62 | constexpr unsigned int kSafeFlags = MS_NODEV | MS_NOEXEC | MS_NOSUID; |
| 63 | |
Elliott Hughes | e79b8c2 | 2020-07-28 11:09:03 -0700 | [diff] [blame] | 64 | // Recursively remount / as MS_SLAVE like zygote does so that |
| 65 | // unmounting and mounting /proc doesn't interfere with the parent |
| 66 | // namespace's /proc mount. This will also prevent any other |
| 67 | // mounts/unmounts initiated by the service from interfering with the |
| 68 | // parent namespace but will still allow mount events from the parent |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 69 | // namespace to propagate to the child. |
| 70 | if (mount("rootfs", "/", nullptr, (MS_SLAVE | MS_REC), nullptr) == -1) { |
Elliott Hughes | e79b8c2 | 2020-07-28 11:09:03 -0700 | [diff] [blame] | 71 | return ErrnoError() << "Could not remount(/) recursively as MS_SLAVE"; |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 72 | } |
| 73 | |
| 74 | // umount() then mount() /proc and/or /sys |
| 75 | // Note that it is not sufficient to mount with MS_REMOUNT. |
| 76 | if (remount_proc) { |
| 77 | if (umount("/proc") == -1) { |
| 78 | return ErrnoError() << "Could not umount(/proc)"; |
| 79 | } |
| 80 | if (mount("", "/proc", "proc", kSafeFlags, "") == -1) { |
| 81 | return ErrnoError() << "Could not mount(/proc)"; |
| 82 | } |
| 83 | } |
| 84 | if (remount_sys) { |
| 85 | if (umount2("/sys", MNT_DETACH) == -1) { |
| 86 | return ErrnoError() << "Could not umount(/sys)"; |
| 87 | } |
| 88 | if (mount("", "/sys", "sysfs", kSafeFlags, "") == -1) { |
| 89 | return ErrnoError() << "Could not mount(/sys)"; |
| 90 | } |
| 91 | } |
Tom Cherry | bbcbc2f | 2019-06-10 11:08:01 -0700 | [diff] [blame] | 92 | return {}; |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 93 | } |
| 94 | |
Tom Cherry | bbcbc2f | 2019-06-10 11:08:01 -0700 | [diff] [blame] | 95 | Result<void> SetUpPidNamespace(const char* name) { |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 96 | if (prctl(PR_SET_NAME, name) == -1) { |
| 97 | return ErrnoError() << "Could not set name"; |
| 98 | } |
| 99 | |
| 100 | pid_t child_pid = fork(); |
| 101 | if (child_pid == -1) { |
| 102 | return ErrnoError() << "Could not fork init inside the PID namespace"; |
| 103 | } |
| 104 | |
| 105 | if (child_pid > 0) { |
| 106 | // So that we exit with the right status. |
| 107 | static int init_exitstatus = 0; |
| 108 | signal(SIGTERM, [](int) { _exit(init_exitstatus); }); |
| 109 | |
| 110 | pid_t waited_pid; |
| 111 | int status; |
| 112 | while ((waited_pid = wait(&status)) > 0) { |
| 113 | // This loop will end when there are no processes left inside the |
| 114 | // PID namespace or when the init process inside the PID namespace |
| 115 | // gets a signal. |
| 116 | if (waited_pid == child_pid) { |
| 117 | init_exitstatus = status; |
| 118 | } |
| 119 | } |
| 120 | if (!WIFEXITED(init_exitstatus)) { |
| 121 | _exit(EXIT_FAILURE); |
| 122 | } |
| 123 | _exit(WEXITSTATUS(init_exitstatus)); |
| 124 | } |
Tom Cherry | bbcbc2f | 2019-06-10 11:08:01 -0700 | [diff] [blame] | 125 | return {}; |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 126 | } |
| 127 | |
Tom Cherry | f74b7f5 | 2019-09-23 16:16:54 -0700 | [diff] [blame] | 128 | void SetupStdio(bool stdio_to_kmsg) { |
Tom Cherry | 247ffbf | 2019-07-08 15:09:36 -0700 | [diff] [blame] | 129 | auto fd = unique_fd{open("/dev/null", O_RDWR | O_CLOEXEC)}; |
Tom Cherry | f74b7f5 | 2019-09-23 16:16:54 -0700 | [diff] [blame] | 130 | dup2(fd, STDIN_FILENO); |
| 131 | if (stdio_to_kmsg) { |
| 132 | fd.reset(open("/dev/kmsg_debug", O_WRONLY | O_CLOEXEC)); |
| 133 | if (fd == -1) fd.reset(open("/dev/null", O_WRONLY | O_CLOEXEC)); |
| 134 | } |
| 135 | dup2(fd, STDOUT_FILENO); |
| 136 | dup2(fd, STDERR_FILENO); |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 137 | } |
| 138 | |
| 139 | void OpenConsole(const std::string& console) { |
Tom Cherry | 247ffbf | 2019-07-08 15:09:36 -0700 | [diff] [blame] | 140 | auto fd = unique_fd{open(console.c_str(), O_RDWR | O_CLOEXEC)}; |
| 141 | if (fd == -1) fd.reset(open("/dev/null", O_RDWR | O_CLOEXEC)); |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 142 | ioctl(fd, TIOCSCTTY, 0); |
| 143 | dup2(fd, 0); |
| 144 | dup2(fd, 1); |
| 145 | dup2(fd, 2); |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 146 | } |
| 147 | |
Tom Cherry | 5241d10 | 2019-09-10 14:20:35 -0700 | [diff] [blame] | 148 | } // namespace |
| 149 | |
| 150 | void Descriptor::Publish() const { |
| 151 | auto published_name = name_; |
| 152 | |
Tom Cherry | 2e4c85f | 2019-07-09 13:33:36 -0700 | [diff] [blame] | 153 | for (auto& c : published_name) { |
| 154 | c = isalnum(c) ? c : '_'; |
| 155 | } |
| 156 | |
Tom Cherry | 5241d10 | 2019-09-10 14:20:35 -0700 | [diff] [blame] | 157 | int fd = fd_.get(); |
| 158 | // For safety, the FD is created as CLOEXEC, so that must be removed before publishing. |
| 159 | auto fd_flags = fcntl(fd, F_GETFD); |
| 160 | fd_flags &= ~FD_CLOEXEC; |
| 161 | if (fcntl(fd, F_SETFD, fd_flags) != 0) { |
| 162 | PLOG(ERROR) << "Failed to remove CLOEXEC from '" << published_name << "'"; |
| 163 | } |
| 164 | |
Tom Cherry | 2e4c85f | 2019-07-09 13:33:36 -0700 | [diff] [blame] | 165 | std::string val = std::to_string(fd); |
| 166 | setenv(published_name.c_str(), val.c_str(), 1); |
| 167 | } |
| 168 | |
Tom Cherry | 5241d10 | 2019-09-10 14:20:35 -0700 | [diff] [blame] | 169 | Result<Descriptor> SocketDescriptor::Create(const std::string& global_context) const { |
Tom Cherry | 2e4c85f | 2019-07-09 13:33:36 -0700 | [diff] [blame] | 170 | const auto& socket_context = context.empty() ? global_context : context; |
Adam Langley | ecc14a5 | 2022-05-11 22:32:47 +0000 | [diff] [blame] | 171 | auto result = CreateSocket(name, type | SOCK_CLOEXEC, passcred, listen, perm, uid, gid, |
| 172 | socket_context); |
Bernie Innocenti | cecebbb | 2020-02-06 03:49:33 +0900 | [diff] [blame] | 173 | if (!result.ok()) { |
Tom Cherry | 2e4c85f | 2019-07-09 13:33:36 -0700 | [diff] [blame] | 174 | return result.error(); |
| 175 | } |
| 176 | |
Tom Cherry | 5241d10 | 2019-09-10 14:20:35 -0700 | [diff] [blame] | 177 | return Descriptor(ANDROID_SOCKET_ENV_PREFIX + name, unique_fd(*result)); |
Tom Cherry | 2e4c85f | 2019-07-09 13:33:36 -0700 | [diff] [blame] | 178 | } |
| 179 | |
Tom Cherry | 5241d10 | 2019-09-10 14:20:35 -0700 | [diff] [blame] | 180 | Result<Descriptor> FileDescriptor::Create() const { |
Tom Cherry | 2e4c85f | 2019-07-09 13:33:36 -0700 | [diff] [blame] | 181 | int flags = (type == "r") ? O_RDONLY : (type == "w") ? O_WRONLY : O_RDWR; |
| 182 | |
| 183 | // Make sure we do not block on open (eg: devices can chose to block on carrier detect). Our |
| 184 | // intention is never to delay launch of a service for such a condition. The service can |
| 185 | // perform its own blocking on carrier detect. |
Tom Cherry | 5241d10 | 2019-09-10 14:20:35 -0700 | [diff] [blame] | 186 | unique_fd fd(TEMP_FAILURE_RETRY(open(name.c_str(), flags | O_NONBLOCK | O_CLOEXEC))); |
Tom Cherry | 2e4c85f | 2019-07-09 13:33:36 -0700 | [diff] [blame] | 187 | |
| 188 | if (fd < 0) { |
| 189 | return ErrnoError() << "Failed to open file '" << name << "'"; |
| 190 | } |
| 191 | |
| 192 | // Fixup as we set O_NONBLOCK for open, the intent for fd is to block reads. |
| 193 | fcntl(fd, F_SETFL, flags); |
| 194 | |
Tom Cherry | 5241d10 | 2019-09-10 14:20:35 -0700 | [diff] [blame] | 195 | return Descriptor(ANDROID_FILE_ENV_PREFIX + name, std::move(fd)); |
Tom Cherry | 2e4c85f | 2019-07-09 13:33:36 -0700 | [diff] [blame] | 196 | } |
| 197 | |
Jooyung Han | 4f23d5a | 2020-06-09 13:44:17 +0900 | [diff] [blame] | 198 | Result<void> EnterNamespaces(const NamespaceInfo& info, const std::string& name, |
| 199 | std::optional<MountNamespace> override_mount_namespace) { |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 200 | for (const auto& [nstype, path] : info.namespaces_to_enter) { |
Bernie Innocenti | cecebbb | 2020-02-06 03:49:33 +0900 | [diff] [blame] | 201 | if (auto result = EnterNamespace(nstype, path.c_str()); !result.ok()) { |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 202 | return result; |
| 203 | } |
| 204 | } |
| 205 | |
| 206 | #if defined(__ANDROID__) |
Jooyung Han | 4f23d5a | 2020-06-09 13:44:17 +0900 | [diff] [blame] | 207 | if (override_mount_namespace.has_value()) { |
| 208 | if (auto result = SwitchToMountNamespaceIfNeeded(override_mount_namespace.value()); |
| 209 | !result.ok()) { |
| 210 | return result; |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 211 | } |
| 212 | } |
| 213 | #endif |
| 214 | |
| 215 | if (info.flags & CLONE_NEWNS) { |
| 216 | bool remount_proc = info.flags & CLONE_NEWPID; |
| 217 | bool remount_sys = |
| 218 | std::any_of(info.namespaces_to_enter.begin(), info.namespaces_to_enter.end(), |
| 219 | [](const auto& entry) { return entry.first == CLONE_NEWNET; }); |
Bernie Innocenti | cecebbb | 2020-02-06 03:49:33 +0900 | [diff] [blame] | 220 | if (auto result = SetUpMountNamespace(remount_proc, remount_sys); !result.ok()) { |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 221 | return result; |
| 222 | } |
| 223 | } |
| 224 | |
| 225 | if (info.flags & CLONE_NEWPID) { |
| 226 | // This will fork again to run an init process inside the PID namespace. |
Bernie Innocenti | cecebbb | 2020-02-06 03:49:33 +0900 | [diff] [blame] | 227 | if (auto result = SetUpPidNamespace(name.c_str()); !result.ok()) { |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 228 | return result; |
| 229 | } |
| 230 | } |
| 231 | |
Tom Cherry | bbcbc2f | 2019-06-10 11:08:01 -0700 | [diff] [blame] | 232 | return {}; |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 233 | } |
| 234 | |
Tom Cherry | bbcbc2f | 2019-06-10 11:08:01 -0700 | [diff] [blame] | 235 | Result<void> SetProcessAttributes(const ProcessAttributes& attr) { |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 236 | if (attr.ioprio_class != IoSchedClass_NONE) { |
| 237 | if (android_set_ioprio(getpid(), attr.ioprio_class, attr.ioprio_pri)) { |
| 238 | PLOG(ERROR) << "failed to set pid " << getpid() << " ioprio=" << attr.ioprio_class |
| 239 | << "," << attr.ioprio_pri; |
| 240 | } |
| 241 | } |
| 242 | |
| 243 | if (!attr.console.empty()) { |
| 244 | setsid(); |
| 245 | OpenConsole(attr.console); |
| 246 | } else { |
| 247 | if (setpgid(0, getpid()) == -1) { |
| 248 | return ErrnoError() << "setpgid failed"; |
| 249 | } |
Tom Cherry | f74b7f5 | 2019-09-23 16:16:54 -0700 | [diff] [blame] | 250 | SetupStdio(attr.stdio_to_kmsg); |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 251 | } |
| 252 | |
| 253 | for (const auto& rlimit : attr.rlimits) { |
| 254 | if (setrlimit(rlimit.first, &rlimit.second) == -1) { |
Tom Cherry | 57b9428 | 2020-02-04 15:20:27 -0800 | [diff] [blame] | 255 | return ErrnoErrorf("setrlimit({}, {{rlim_cur={}, rlim_max={}}}) failed", rlimit.first, |
| 256 | rlimit.second.rlim_cur, rlimit.second.rlim_max); |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 257 | } |
| 258 | } |
| 259 | |
| 260 | if (attr.gid) { |
| 261 | if (setgid(attr.gid) != 0) { |
| 262 | return ErrnoError() << "setgid failed"; |
| 263 | } |
| 264 | } |
| 265 | if (setgroups(attr.supp_gids.size(), const_cast<gid_t*>(&attr.supp_gids[0])) != 0) { |
| 266 | return ErrnoError() << "setgroups failed"; |
| 267 | } |
| 268 | if (attr.uid) { |
| 269 | if (setuid(attr.uid) != 0) { |
| 270 | return ErrnoError() << "setuid failed"; |
| 271 | } |
| 272 | } |
| 273 | |
| 274 | if (attr.priority != 0) { |
| 275 | if (setpriority(PRIO_PROCESS, 0, attr.priority) != 0) { |
| 276 | return ErrnoError() << "setpriority failed"; |
| 277 | } |
| 278 | } |
Tom Cherry | bbcbc2f | 2019-06-10 11:08:01 -0700 | [diff] [blame] | 279 | return {}; |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 280 | } |
| 281 | |
Tom Cherry | bbcbc2f | 2019-06-10 11:08:01 -0700 | [diff] [blame] | 282 | Result<void> WritePidToFiles(std::vector<std::string>* files) { |
Nikita Ioffe | c2b1654 | 2022-10-20 14:14:39 +0100 | [diff] [blame^] | 283 | if (files->empty()) { |
| 284 | // No files to write pid to, exit early. |
| 285 | return {}; |
| 286 | } |
| 287 | |
| 288 | if (!CgroupsAvailable()) { |
| 289 | return Error() << "cgroups are not available"; |
| 290 | } |
| 291 | |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 292 | // See if there were "writepid" instructions to write to files under cpuset path. |
| 293 | std::string cpuset_path; |
| 294 | if (CgroupGetControllerPath("cpuset", &cpuset_path)) { |
| 295 | auto cpuset_predicate = [&cpuset_path](const std::string& path) { |
| 296 | return StartsWith(path, cpuset_path + "/"); |
| 297 | }; |
| 298 | auto iter = std::find_if(files->begin(), files->end(), cpuset_predicate); |
| 299 | if (iter == files->end()) { |
| 300 | // There were no "writepid" instructions for cpusets, check if the system default |
| 301 | // cpuset is specified to be used for the process. |
| 302 | std::string default_cpuset = GetProperty("ro.cpuset.default", ""); |
| 303 | if (!default_cpuset.empty()) { |
| 304 | // Make sure the cpuset name starts and ends with '/'. |
| 305 | // A single '/' means the 'root' cpuset. |
| 306 | if (default_cpuset.front() != '/') { |
| 307 | default_cpuset.insert(0, 1, '/'); |
| 308 | } |
| 309 | if (default_cpuset.back() != '/') { |
| 310 | default_cpuset.push_back('/'); |
| 311 | } |
| 312 | files->push_back( |
| 313 | StringPrintf("%s%stasks", cpuset_path.c_str(), default_cpuset.c_str())); |
| 314 | } |
| 315 | } |
| 316 | } else { |
| 317 | LOG(ERROR) << "cpuset cgroup controller is not mounted!"; |
| 318 | } |
Suren Baghdasaryan | 746ede9 | 2022-03-31 21:15:11 +0000 | [diff] [blame] | 319 | |
| 320 | // Issue a warning whenever writepid is being used with a cgroup. This can't be done during |
| 321 | // command parsing because cgroups might not be configured at the time or parsing. |
| 322 | for (const auto& file : *files) { |
| 323 | if (CgroupGetControllerFromPath(file, nullptr)) { |
| 324 | LOG(WARNING) << "writepid usage with cgroups path '" << file |
| 325 | << "' is obsolete, please use task_profiles!"; |
| 326 | } |
| 327 | } |
| 328 | |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 329 | std::string pid_str = std::to_string(getpid()); |
| 330 | for (const auto& file : *files) { |
| 331 | if (!WriteStringToFile(pid_str, file)) { |
| 332 | return ErrnoError() << "couldn't write " << pid_str << " to " << file; |
| 333 | } |
| 334 | } |
Tom Cherry | bbcbc2f | 2019-06-10 11:08:01 -0700 | [diff] [blame] | 335 | return {}; |
Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 336 | } |
| 337 | |
| 338 | } // namespace init |
| 339 | } // namespace android |