| Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 1 | /* | 
|  | 2 | * Copyright (C) 2019 The Android Open Source Project | 
|  | 3 | * | 
|  | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | 5 | * you may not use this file except in compliance with the License. | 
|  | 6 | * You may obtain a copy of the License at | 
|  | 7 | * | 
|  | 8 | *      http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 9 | * | 
|  | 10 | * Unless required by applicable law or agreed to in writing, software | 
|  | 11 | * distributed under the License is distributed on an "AS IS" BASIS, | 
|  | 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | 13 | * See the License for the specific language governing permissions and | 
|  | 14 | * limitations under the License. | 
|  | 15 | */ | 
|  | 16 |  | 
|  | 17 | #include "service_utils.h" | 
|  | 18 |  | 
| Tom Cherry | f74b7f5 | 2019-09-23 16:16:54 -0700 | [diff] [blame] | 19 | #include <fcntl.h> | 
| Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 20 | #include <grp.h> | 
|  | 21 | #include <sys/mount.h> | 
|  | 22 | #include <sys/prctl.h> | 
|  | 23 | #include <sys/wait.h> | 
| Tom Cherry | f74b7f5 | 2019-09-23 16:16:54 -0700 | [diff] [blame] | 24 | #include <unistd.h> | 
| Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 25 |  | 
|  | 26 | #include <android-base/file.h> | 
|  | 27 | #include <android-base/logging.h> | 
|  | 28 | #include <android-base/properties.h> | 
|  | 29 | #include <android-base/stringprintf.h> | 
|  | 30 | #include <android-base/strings.h> | 
| Tom Cherry | 2e4c85f | 2019-07-09 13:33:36 -0700 | [diff] [blame] | 31 | #include <cutils/android_get_control_file.h> | 
|  | 32 | #include <cutils/sockets.h> | 
| Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 33 | #include <processgroup/processgroup.h> | 
|  | 34 |  | 
|  | 35 | #include "mount_namespace.h" | 
| Tom Cherry | 2e4c85f | 2019-07-09 13:33:36 -0700 | [diff] [blame] | 36 | #include "util.h" | 
| Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 37 |  | 
|  | 38 | using android::base::GetProperty; | 
|  | 39 | using android::base::StartsWith; | 
|  | 40 | using android::base::StringPrintf; | 
|  | 41 | using android::base::unique_fd; | 
|  | 42 | using android::base::WriteStringToFile; | 
|  | 43 |  | 
|  | 44 | namespace android { | 
|  | 45 | namespace init { | 
|  | 46 |  | 
|  | 47 | namespace { | 
|  | 48 |  | 
| Tom Cherry | bbcbc2f | 2019-06-10 11:08:01 -0700 | [diff] [blame] | 49 | Result<void> EnterNamespace(int nstype, const char* path) { | 
| Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 50 | auto fd = unique_fd{open(path, O_RDONLY | O_CLOEXEC)}; | 
|  | 51 | if (fd == -1) { | 
|  | 52 | return ErrnoError() << "Could not open namespace at " << path; | 
|  | 53 | } | 
|  | 54 | if (setns(fd, nstype) == -1) { | 
|  | 55 | return ErrnoError() << "Could not setns() namespace at " << path; | 
|  | 56 | } | 
| Tom Cherry | bbcbc2f | 2019-06-10 11:08:01 -0700 | [diff] [blame] | 57 | return {}; | 
| Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 58 | } | 
|  | 59 |  | 
| Tom Cherry | bbcbc2f | 2019-06-10 11:08:01 -0700 | [diff] [blame] | 60 | Result<void> SetUpMountNamespace(bool remount_proc, bool remount_sys) { | 
| Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 61 | constexpr unsigned int kSafeFlags = MS_NODEV | MS_NOEXEC | MS_NOSUID; | 
|  | 62 |  | 
|  | 63 | // Recursively remount / as slave like zygote does so unmounting and mounting /proc | 
|  | 64 | // doesn't interfere with the parent namespace's /proc mount. This will also | 
|  | 65 | // prevent any other mounts/unmounts initiated by the service from interfering | 
|  | 66 | // with the parent namespace but will still allow mount events from the parent | 
|  | 67 | // namespace to propagate to the child. | 
|  | 68 | if (mount("rootfs", "/", nullptr, (MS_SLAVE | MS_REC), nullptr) == -1) { | 
|  | 69 | return ErrnoError() << "Could not remount(/) recursively as slave"; | 
|  | 70 | } | 
|  | 71 |  | 
|  | 72 | // umount() then mount() /proc and/or /sys | 
|  | 73 | // Note that it is not sufficient to mount with MS_REMOUNT. | 
|  | 74 | if (remount_proc) { | 
|  | 75 | if (umount("/proc") == -1) { | 
|  | 76 | return ErrnoError() << "Could not umount(/proc)"; | 
|  | 77 | } | 
|  | 78 | if (mount("", "/proc", "proc", kSafeFlags, "") == -1) { | 
|  | 79 | return ErrnoError() << "Could not mount(/proc)"; | 
|  | 80 | } | 
|  | 81 | } | 
|  | 82 | if (remount_sys) { | 
|  | 83 | if (umount2("/sys", MNT_DETACH) == -1) { | 
|  | 84 | return ErrnoError() << "Could not umount(/sys)"; | 
|  | 85 | } | 
|  | 86 | if (mount("", "/sys", "sysfs", kSafeFlags, "") == -1) { | 
|  | 87 | return ErrnoError() << "Could not mount(/sys)"; | 
|  | 88 | } | 
|  | 89 | } | 
| Tom Cherry | bbcbc2f | 2019-06-10 11:08:01 -0700 | [diff] [blame] | 90 | return {}; | 
| Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 91 | } | 
|  | 92 |  | 
| Tom Cherry | bbcbc2f | 2019-06-10 11:08:01 -0700 | [diff] [blame] | 93 | Result<void> SetUpPidNamespace(const char* name) { | 
| Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 94 | if (prctl(PR_SET_NAME, name) == -1) { | 
|  | 95 | return ErrnoError() << "Could not set name"; | 
|  | 96 | } | 
|  | 97 |  | 
|  | 98 | pid_t child_pid = fork(); | 
|  | 99 | if (child_pid == -1) { | 
|  | 100 | return ErrnoError() << "Could not fork init inside the PID namespace"; | 
|  | 101 | } | 
|  | 102 |  | 
|  | 103 | if (child_pid > 0) { | 
|  | 104 | // So that we exit with the right status. | 
|  | 105 | static int init_exitstatus = 0; | 
|  | 106 | signal(SIGTERM, [](int) { _exit(init_exitstatus); }); | 
|  | 107 |  | 
|  | 108 | pid_t waited_pid; | 
|  | 109 | int status; | 
|  | 110 | while ((waited_pid = wait(&status)) > 0) { | 
|  | 111 | // This loop will end when there are no processes left inside the | 
|  | 112 | // PID namespace or when the init process inside the PID namespace | 
|  | 113 | // gets a signal. | 
|  | 114 | if (waited_pid == child_pid) { | 
|  | 115 | init_exitstatus = status; | 
|  | 116 | } | 
|  | 117 | } | 
|  | 118 | if (!WIFEXITED(init_exitstatus)) { | 
|  | 119 | _exit(EXIT_FAILURE); | 
|  | 120 | } | 
|  | 121 | _exit(WEXITSTATUS(init_exitstatus)); | 
|  | 122 | } | 
| Tom Cherry | bbcbc2f | 2019-06-10 11:08:01 -0700 | [diff] [blame] | 123 | return {}; | 
| Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 124 | } | 
|  | 125 |  | 
| Tom Cherry | f74b7f5 | 2019-09-23 16:16:54 -0700 | [diff] [blame] | 126 | void SetupStdio(bool stdio_to_kmsg) { | 
| Tom Cherry | 247ffbf | 2019-07-08 15:09:36 -0700 | [diff] [blame] | 127 | auto fd = unique_fd{open("/dev/null", O_RDWR | O_CLOEXEC)}; | 
| Tom Cherry | f74b7f5 | 2019-09-23 16:16:54 -0700 | [diff] [blame] | 128 | dup2(fd, STDIN_FILENO); | 
|  | 129 | if (stdio_to_kmsg) { | 
|  | 130 | fd.reset(open("/dev/kmsg_debug", O_WRONLY | O_CLOEXEC)); | 
|  | 131 | if (fd == -1) fd.reset(open("/dev/null", O_WRONLY | O_CLOEXEC)); | 
|  | 132 | } | 
|  | 133 | dup2(fd, STDOUT_FILENO); | 
|  | 134 | dup2(fd, STDERR_FILENO); | 
| Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 135 | } | 
|  | 136 |  | 
|  | 137 | void OpenConsole(const std::string& console) { | 
| Tom Cherry | 247ffbf | 2019-07-08 15:09:36 -0700 | [diff] [blame] | 138 | auto fd = unique_fd{open(console.c_str(), O_RDWR | O_CLOEXEC)}; | 
|  | 139 | if (fd == -1) fd.reset(open("/dev/null", O_RDWR | O_CLOEXEC)); | 
| Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 140 | ioctl(fd, TIOCSCTTY, 0); | 
|  | 141 | dup2(fd, 0); | 
|  | 142 | dup2(fd, 1); | 
|  | 143 | dup2(fd, 2); | 
| Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 144 | } | 
|  | 145 |  | 
| Tom Cherry | 5241d10 | 2019-09-10 14:20:35 -0700 | [diff] [blame] | 146 | }  // namespace | 
|  | 147 |  | 
|  | 148 | void Descriptor::Publish() const { | 
|  | 149 | auto published_name = name_; | 
|  | 150 |  | 
| Tom Cherry | 2e4c85f | 2019-07-09 13:33:36 -0700 | [diff] [blame] | 151 | for (auto& c : published_name) { | 
|  | 152 | c = isalnum(c) ? c : '_'; | 
|  | 153 | } | 
|  | 154 |  | 
| Tom Cherry | 5241d10 | 2019-09-10 14:20:35 -0700 | [diff] [blame] | 155 | int fd = fd_.get(); | 
|  | 156 | // For safety, the FD is created as CLOEXEC, so that must be removed before publishing. | 
|  | 157 | auto fd_flags = fcntl(fd, F_GETFD); | 
|  | 158 | fd_flags &= ~FD_CLOEXEC; | 
|  | 159 | if (fcntl(fd, F_SETFD, fd_flags) != 0) { | 
|  | 160 | PLOG(ERROR) << "Failed to remove CLOEXEC from '" << published_name << "'"; | 
|  | 161 | } | 
|  | 162 |  | 
| Tom Cherry | 2e4c85f | 2019-07-09 13:33:36 -0700 | [diff] [blame] | 163 | std::string val = std::to_string(fd); | 
|  | 164 | setenv(published_name.c_str(), val.c_str(), 1); | 
|  | 165 | } | 
|  | 166 |  | 
| Tom Cherry | 5241d10 | 2019-09-10 14:20:35 -0700 | [diff] [blame] | 167 | Result<Descriptor> SocketDescriptor::Create(const std::string& global_context) const { | 
| Tom Cherry | 2e4c85f | 2019-07-09 13:33:36 -0700 | [diff] [blame] | 168 | const auto& socket_context = context.empty() ? global_context : context; | 
| Tom Cherry | 5241d10 | 2019-09-10 14:20:35 -0700 | [diff] [blame] | 169 | auto result = CreateSocket(name, type | SOCK_CLOEXEC, passcred, perm, uid, gid, socket_context); | 
| Tom Cherry | 2e4c85f | 2019-07-09 13:33:36 -0700 | [diff] [blame] | 170 | if (!result) { | 
|  | 171 | return result.error(); | 
|  | 172 | } | 
|  | 173 |  | 
| Tom Cherry | 5241d10 | 2019-09-10 14:20:35 -0700 | [diff] [blame] | 174 | return Descriptor(ANDROID_SOCKET_ENV_PREFIX + name, unique_fd(*result)); | 
| Tom Cherry | 2e4c85f | 2019-07-09 13:33:36 -0700 | [diff] [blame] | 175 | } | 
|  | 176 |  | 
| Tom Cherry | 5241d10 | 2019-09-10 14:20:35 -0700 | [diff] [blame] | 177 | Result<Descriptor> FileDescriptor::Create() const { | 
| Tom Cherry | 2e4c85f | 2019-07-09 13:33:36 -0700 | [diff] [blame] | 178 | int flags = (type == "r") ? O_RDONLY : (type == "w") ? O_WRONLY : O_RDWR; | 
|  | 179 |  | 
|  | 180 | // Make sure we do not block on open (eg: devices can chose to block on carrier detect).  Our | 
|  | 181 | // intention is never to delay launch of a service for such a condition.  The service can | 
|  | 182 | // perform its own blocking on carrier detect. | 
| Tom Cherry | 5241d10 | 2019-09-10 14:20:35 -0700 | [diff] [blame] | 183 | unique_fd fd(TEMP_FAILURE_RETRY(open(name.c_str(), flags | O_NONBLOCK | O_CLOEXEC))); | 
| Tom Cherry | 2e4c85f | 2019-07-09 13:33:36 -0700 | [diff] [blame] | 184 |  | 
|  | 185 | if (fd < 0) { | 
|  | 186 | return ErrnoError() << "Failed to open file '" << name << "'"; | 
|  | 187 | } | 
|  | 188 |  | 
|  | 189 | // Fixup as we set O_NONBLOCK for open, the intent for fd is to block reads. | 
|  | 190 | fcntl(fd, F_SETFL, flags); | 
|  | 191 |  | 
|  | 192 | LOG(INFO) << "Opened file '" << name << "', flags " << flags; | 
|  | 193 |  | 
| Tom Cherry | 5241d10 | 2019-09-10 14:20:35 -0700 | [diff] [blame] | 194 | return Descriptor(ANDROID_FILE_ENV_PREFIX + name, std::move(fd)); | 
| Tom Cherry | 2e4c85f | 2019-07-09 13:33:36 -0700 | [diff] [blame] | 195 | } | 
|  | 196 |  | 
| Tom Cherry | bbcbc2f | 2019-06-10 11:08:01 -0700 | [diff] [blame] | 197 | Result<void> EnterNamespaces(const NamespaceInfo& info, const std::string& name, bool pre_apexd) { | 
| Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 198 | for (const auto& [nstype, path] : info.namespaces_to_enter) { | 
|  | 199 | if (auto result = EnterNamespace(nstype, path.c_str()); !result) { | 
|  | 200 | return result; | 
|  | 201 | } | 
|  | 202 | } | 
|  | 203 |  | 
|  | 204 | #if defined(__ANDROID__) | 
|  | 205 | if (pre_apexd) { | 
|  | 206 | if (!SwitchToBootstrapMountNamespaceIfNeeded()) { | 
|  | 207 | return Error() << "could not enter into the bootstrap mount namespace"; | 
|  | 208 | } | 
|  | 209 | } | 
|  | 210 | #endif | 
|  | 211 |  | 
|  | 212 | if (info.flags & CLONE_NEWNS) { | 
|  | 213 | bool remount_proc = info.flags & CLONE_NEWPID; | 
|  | 214 | bool remount_sys = | 
|  | 215 | std::any_of(info.namespaces_to_enter.begin(), info.namespaces_to_enter.end(), | 
|  | 216 | [](const auto& entry) { return entry.first == CLONE_NEWNET; }); | 
|  | 217 | if (auto result = SetUpMountNamespace(remount_proc, remount_sys); !result) { | 
|  | 218 | return result; | 
|  | 219 | } | 
|  | 220 | } | 
|  | 221 |  | 
|  | 222 | if (info.flags & CLONE_NEWPID) { | 
|  | 223 | // This will fork again to run an init process inside the PID namespace. | 
|  | 224 | if (auto result = SetUpPidNamespace(name.c_str()); !result) { | 
|  | 225 | return result; | 
|  | 226 | } | 
|  | 227 | } | 
|  | 228 |  | 
| Tom Cherry | bbcbc2f | 2019-06-10 11:08:01 -0700 | [diff] [blame] | 229 | return {}; | 
| Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 230 | } | 
|  | 231 |  | 
| Tom Cherry | bbcbc2f | 2019-06-10 11:08:01 -0700 | [diff] [blame] | 232 | Result<void> SetProcessAttributes(const ProcessAttributes& attr) { | 
| Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 233 | if (attr.ioprio_class != IoSchedClass_NONE) { | 
|  | 234 | if (android_set_ioprio(getpid(), attr.ioprio_class, attr.ioprio_pri)) { | 
|  | 235 | PLOG(ERROR) << "failed to set pid " << getpid() << " ioprio=" << attr.ioprio_class | 
|  | 236 | << "," << attr.ioprio_pri; | 
|  | 237 | } | 
|  | 238 | } | 
|  | 239 |  | 
|  | 240 | if (!attr.console.empty()) { | 
|  | 241 | setsid(); | 
|  | 242 | OpenConsole(attr.console); | 
|  | 243 | } else { | 
|  | 244 | if (setpgid(0, getpid()) == -1) { | 
|  | 245 | return ErrnoError() << "setpgid failed"; | 
|  | 246 | } | 
| Tom Cherry | f74b7f5 | 2019-09-23 16:16:54 -0700 | [diff] [blame] | 247 | SetupStdio(attr.stdio_to_kmsg); | 
| Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 248 | } | 
|  | 249 |  | 
|  | 250 | for (const auto& rlimit : attr.rlimits) { | 
|  | 251 | if (setrlimit(rlimit.first, &rlimit.second) == -1) { | 
|  | 252 | return ErrnoError() << StringPrintf( | 
|  | 253 | "setrlimit(%d, {rlim_cur=%ld, rlim_max=%ld}) failed", rlimit.first, | 
|  | 254 | rlimit.second.rlim_cur, rlimit.second.rlim_max); | 
|  | 255 | } | 
|  | 256 | } | 
|  | 257 |  | 
|  | 258 | if (attr.gid) { | 
|  | 259 | if (setgid(attr.gid) != 0) { | 
|  | 260 | return ErrnoError() << "setgid failed"; | 
|  | 261 | } | 
|  | 262 | } | 
|  | 263 | if (setgroups(attr.supp_gids.size(), const_cast<gid_t*>(&attr.supp_gids[0])) != 0) { | 
|  | 264 | return ErrnoError() << "setgroups failed"; | 
|  | 265 | } | 
|  | 266 | if (attr.uid) { | 
|  | 267 | if (setuid(attr.uid) != 0) { | 
|  | 268 | return ErrnoError() << "setuid failed"; | 
|  | 269 | } | 
|  | 270 | } | 
|  | 271 |  | 
|  | 272 | if (attr.priority != 0) { | 
|  | 273 | if (setpriority(PRIO_PROCESS, 0, attr.priority) != 0) { | 
|  | 274 | return ErrnoError() << "setpriority failed"; | 
|  | 275 | } | 
|  | 276 | } | 
| Tom Cherry | bbcbc2f | 2019-06-10 11:08:01 -0700 | [diff] [blame] | 277 | return {}; | 
| Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 278 | } | 
|  | 279 |  | 
| Tom Cherry | bbcbc2f | 2019-06-10 11:08:01 -0700 | [diff] [blame] | 280 | Result<void> WritePidToFiles(std::vector<std::string>* files) { | 
| Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 281 | // See if there were "writepid" instructions to write to files under cpuset path. | 
|  | 282 | std::string cpuset_path; | 
|  | 283 | if (CgroupGetControllerPath("cpuset", &cpuset_path)) { | 
|  | 284 | auto cpuset_predicate = [&cpuset_path](const std::string& path) { | 
|  | 285 | return StartsWith(path, cpuset_path + "/"); | 
|  | 286 | }; | 
|  | 287 | auto iter = std::find_if(files->begin(), files->end(), cpuset_predicate); | 
|  | 288 | if (iter == files->end()) { | 
|  | 289 | // There were no "writepid" instructions for cpusets, check if the system default | 
|  | 290 | // cpuset is specified to be used for the process. | 
|  | 291 | std::string default_cpuset = GetProperty("ro.cpuset.default", ""); | 
|  | 292 | if (!default_cpuset.empty()) { | 
|  | 293 | // Make sure the cpuset name starts and ends with '/'. | 
|  | 294 | // A single '/' means the 'root' cpuset. | 
|  | 295 | if (default_cpuset.front() != '/') { | 
|  | 296 | default_cpuset.insert(0, 1, '/'); | 
|  | 297 | } | 
|  | 298 | if (default_cpuset.back() != '/') { | 
|  | 299 | default_cpuset.push_back('/'); | 
|  | 300 | } | 
|  | 301 | files->push_back( | 
|  | 302 | StringPrintf("%s%stasks", cpuset_path.c_str(), default_cpuset.c_str())); | 
|  | 303 | } | 
|  | 304 | } | 
|  | 305 | } else { | 
|  | 306 | LOG(ERROR) << "cpuset cgroup controller is not mounted!"; | 
|  | 307 | } | 
|  | 308 | std::string pid_str = std::to_string(getpid()); | 
|  | 309 | for (const auto& file : *files) { | 
|  | 310 | if (!WriteStringToFile(pid_str, file)) { | 
|  | 311 | return ErrnoError() << "couldn't write " << pid_str << " to " << file; | 
|  | 312 | } | 
|  | 313 | } | 
| Tom Cherry | bbcbc2f | 2019-06-10 11:08:01 -0700 | [diff] [blame] | 314 | return {}; | 
| Vic Yang | e01ca4d | 2019-05-29 15:58:32 -0700 | [diff] [blame] | 315 | } | 
|  | 316 |  | 
|  | 317 | }  // namespace init | 
|  | 318 | }  // namespace android |