blob: 7004d8dc9a813a2783fa2d6c1f0266072d5c1470 [file] [log] [blame]
Vic Yange01ca4d2019-05-29 15:58:32 -07001/*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "service_utils.h"
18
Tom Cherryf74b7f52019-09-23 16:16:54 -070019#include <fcntl.h>
Vic Yange01ca4d2019-05-29 15:58:32 -070020#include <grp.h>
Suren Baghdasaryan746ede92022-03-31 21:15:11 +000021#include <map>
Vic Yange01ca4d2019-05-29 15:58:32 -070022#include <sys/mount.h>
23#include <sys/prctl.h>
24#include <sys/wait.h>
Tom Cherryf74b7f52019-09-23 16:16:54 -070025#include <unistd.h>
Vic Yange01ca4d2019-05-29 15:58:32 -070026
27#include <android-base/file.h>
28#include <android-base/logging.h>
29#include <android-base/properties.h>
30#include <android-base/stringprintf.h>
31#include <android-base/strings.h>
Tom Cherry2e4c85f2019-07-09 13:33:36 -070032#include <cutils/android_get_control_file.h>
33#include <cutils/sockets.h>
Vic Yange01ca4d2019-05-29 15:58:32 -070034#include <processgroup/processgroup.h>
35
36#include "mount_namespace.h"
Tom Cherry2e4c85f2019-07-09 13:33:36 -070037#include "util.h"
Vic Yange01ca4d2019-05-29 15:58:32 -070038
39using android::base::GetProperty;
40using android::base::StartsWith;
41using android::base::StringPrintf;
42using android::base::unique_fd;
43using android::base::WriteStringToFile;
44
45namespace android {
46namespace init {
47
48namespace {
49
Tom Cherrybbcbc2f2019-06-10 11:08:01 -070050Result<void> EnterNamespace(int nstype, const char* path) {
Vic Yange01ca4d2019-05-29 15:58:32 -070051 auto fd = unique_fd{open(path, O_RDONLY | O_CLOEXEC)};
52 if (fd == -1) {
53 return ErrnoError() << "Could not open namespace at " << path;
54 }
Bart Van Asscheaee2ec82022-12-02 18:48:15 -080055 if (setns(fd.get(), nstype) == -1) {
Vic Yange01ca4d2019-05-29 15:58:32 -070056 return ErrnoError() << "Could not setns() namespace at " << path;
57 }
Tom Cherrybbcbc2f2019-06-10 11:08:01 -070058 return {};
Vic Yange01ca4d2019-05-29 15:58:32 -070059}
60
Tom Cherrybbcbc2f2019-06-10 11:08:01 -070061Result<void> SetUpMountNamespace(bool remount_proc, bool remount_sys) {
Vic Yange01ca4d2019-05-29 15:58:32 -070062 constexpr unsigned int kSafeFlags = MS_NODEV | MS_NOEXEC | MS_NOSUID;
63
Elliott Hughese79b8c22020-07-28 11:09:03 -070064 // Recursively remount / as MS_SLAVE like zygote does so that
65 // unmounting and mounting /proc doesn't interfere with the parent
66 // namespace's /proc mount. This will also prevent any other
67 // mounts/unmounts initiated by the service from interfering with the
68 // parent namespace but will still allow mount events from the parent
Vic Yange01ca4d2019-05-29 15:58:32 -070069 // namespace to propagate to the child.
70 if (mount("rootfs", "/", nullptr, (MS_SLAVE | MS_REC), nullptr) == -1) {
Elliott Hughese79b8c22020-07-28 11:09:03 -070071 return ErrnoError() << "Could not remount(/) recursively as MS_SLAVE";
Vic Yange01ca4d2019-05-29 15:58:32 -070072 }
73
74 // umount() then mount() /proc and/or /sys
75 // Note that it is not sufficient to mount with MS_REMOUNT.
76 if (remount_proc) {
77 if (umount("/proc") == -1) {
78 return ErrnoError() << "Could not umount(/proc)";
79 }
80 if (mount("", "/proc", "proc", kSafeFlags, "") == -1) {
81 return ErrnoError() << "Could not mount(/proc)";
82 }
83 }
84 if (remount_sys) {
85 if (umount2("/sys", MNT_DETACH) == -1) {
86 return ErrnoError() << "Could not umount(/sys)";
87 }
88 if (mount("", "/sys", "sysfs", kSafeFlags, "") == -1) {
89 return ErrnoError() << "Could not mount(/sys)";
90 }
91 }
Tom Cherrybbcbc2f2019-06-10 11:08:01 -070092 return {};
Vic Yange01ca4d2019-05-29 15:58:32 -070093}
94
Tom Cherrybbcbc2f2019-06-10 11:08:01 -070095Result<void> SetUpPidNamespace(const char* name) {
Vic Yange01ca4d2019-05-29 15:58:32 -070096 if (prctl(PR_SET_NAME, name) == -1) {
97 return ErrnoError() << "Could not set name";
98 }
99
100 pid_t child_pid = fork();
101 if (child_pid == -1) {
102 return ErrnoError() << "Could not fork init inside the PID namespace";
103 }
104
105 if (child_pid > 0) {
106 // So that we exit with the right status.
107 static int init_exitstatus = 0;
108 signal(SIGTERM, [](int) { _exit(init_exitstatus); });
109
110 pid_t waited_pid;
111 int status;
112 while ((waited_pid = wait(&status)) > 0) {
113 // This loop will end when there are no processes left inside the
114 // PID namespace or when the init process inside the PID namespace
115 // gets a signal.
116 if (waited_pid == child_pid) {
117 init_exitstatus = status;
118 }
119 }
120 if (!WIFEXITED(init_exitstatus)) {
121 _exit(EXIT_FAILURE);
122 }
123 _exit(WEXITSTATUS(init_exitstatus));
124 }
Tom Cherrybbcbc2f2019-06-10 11:08:01 -0700125 return {};
Vic Yange01ca4d2019-05-29 15:58:32 -0700126}
127
Tom Cherryf74b7f52019-09-23 16:16:54 -0700128void SetupStdio(bool stdio_to_kmsg) {
Tom Cherry247ffbf2019-07-08 15:09:36 -0700129 auto fd = unique_fd{open("/dev/null", O_RDWR | O_CLOEXEC)};
Bart Van Asscheaee2ec82022-12-02 18:48:15 -0800130 dup2(fd.get(), STDIN_FILENO);
Tom Cherryf74b7f52019-09-23 16:16:54 -0700131 if (stdio_to_kmsg) {
132 fd.reset(open("/dev/kmsg_debug", O_WRONLY | O_CLOEXEC));
133 if (fd == -1) fd.reset(open("/dev/null", O_WRONLY | O_CLOEXEC));
134 }
Bart Van Asscheaee2ec82022-12-02 18:48:15 -0800135 dup2(fd.get(), STDOUT_FILENO);
136 dup2(fd.get(), STDERR_FILENO);
Vic Yange01ca4d2019-05-29 15:58:32 -0700137}
138
139void OpenConsole(const std::string& console) {
Tom Cherry247ffbf2019-07-08 15:09:36 -0700140 auto fd = unique_fd{open(console.c_str(), O_RDWR | O_CLOEXEC)};
141 if (fd == -1) fd.reset(open("/dev/null", O_RDWR | O_CLOEXEC));
Bart Van Asscheaee2ec82022-12-02 18:48:15 -0800142 ioctl(fd.get(), TIOCSCTTY, 0);
143 dup2(fd.get(), 0);
144 dup2(fd.get(), 1);
145 dup2(fd.get(), 2);
Vic Yange01ca4d2019-05-29 15:58:32 -0700146}
147
Tom Cherry5241d102019-09-10 14:20:35 -0700148} // namespace
149
150void Descriptor::Publish() const {
151 auto published_name = name_;
152
Tom Cherry2e4c85f2019-07-09 13:33:36 -0700153 for (auto& c : published_name) {
154 c = isalnum(c) ? c : '_';
155 }
156
Tom Cherry5241d102019-09-10 14:20:35 -0700157 int fd = fd_.get();
158 // For safety, the FD is created as CLOEXEC, so that must be removed before publishing.
159 auto fd_flags = fcntl(fd, F_GETFD);
160 fd_flags &= ~FD_CLOEXEC;
161 if (fcntl(fd, F_SETFD, fd_flags) != 0) {
162 PLOG(ERROR) << "Failed to remove CLOEXEC from '" << published_name << "'";
163 }
164
Tom Cherry2e4c85f2019-07-09 13:33:36 -0700165 std::string val = std::to_string(fd);
166 setenv(published_name.c_str(), val.c_str(), 1);
167}
168
Tom Cherry5241d102019-09-10 14:20:35 -0700169Result<Descriptor> SocketDescriptor::Create(const std::string& global_context) const {
Tom Cherry2e4c85f2019-07-09 13:33:36 -0700170 const auto& socket_context = context.empty() ? global_context : context;
Adam Langleyecc14a52022-05-11 22:32:47 +0000171 auto result = CreateSocket(name, type | SOCK_CLOEXEC, passcred, listen, perm, uid, gid,
172 socket_context);
Bernie Innocenticecebbb2020-02-06 03:49:33 +0900173 if (!result.ok()) {
Tom Cherry2e4c85f2019-07-09 13:33:36 -0700174 return result.error();
175 }
176
Tom Cherry5241d102019-09-10 14:20:35 -0700177 return Descriptor(ANDROID_SOCKET_ENV_PREFIX + name, unique_fd(*result));
Tom Cherry2e4c85f2019-07-09 13:33:36 -0700178}
179
Tom Cherry5241d102019-09-10 14:20:35 -0700180Result<Descriptor> FileDescriptor::Create() const {
Tom Cherry2e4c85f2019-07-09 13:33:36 -0700181 int flags = (type == "r") ? O_RDONLY : (type == "w") ? O_WRONLY : O_RDWR;
182
183 // Make sure we do not block on open (eg: devices can chose to block on carrier detect). Our
184 // intention is never to delay launch of a service for such a condition. The service can
185 // perform its own blocking on carrier detect.
Tom Cherry5241d102019-09-10 14:20:35 -0700186 unique_fd fd(TEMP_FAILURE_RETRY(open(name.c_str(), flags | O_NONBLOCK | O_CLOEXEC)));
Tom Cherry2e4c85f2019-07-09 13:33:36 -0700187
188 if (fd < 0) {
189 return ErrnoError() << "Failed to open file '" << name << "'";
190 }
191
192 // Fixup as we set O_NONBLOCK for open, the intent for fd is to block reads.
Bart Van Asscheaee2ec82022-12-02 18:48:15 -0800193 fcntl(fd.get(), F_SETFL, flags);
Tom Cherry2e4c85f2019-07-09 13:33:36 -0700194
Tom Cherry5241d102019-09-10 14:20:35 -0700195 return Descriptor(ANDROID_FILE_ENV_PREFIX + name, std::move(fd));
Tom Cherry2e4c85f2019-07-09 13:33:36 -0700196}
197
Jooyung Han4f23d5a2020-06-09 13:44:17 +0900198Result<void> EnterNamespaces(const NamespaceInfo& info, const std::string& name,
199 std::optional<MountNamespace> override_mount_namespace) {
Vic Yange01ca4d2019-05-29 15:58:32 -0700200 for (const auto& [nstype, path] : info.namespaces_to_enter) {
Bernie Innocenticecebbb2020-02-06 03:49:33 +0900201 if (auto result = EnterNamespace(nstype, path.c_str()); !result.ok()) {
Vic Yange01ca4d2019-05-29 15:58:32 -0700202 return result;
203 }
204 }
205
206#if defined(__ANDROID__)
Jooyung Han4f23d5a2020-06-09 13:44:17 +0900207 if (override_mount_namespace.has_value()) {
208 if (auto result = SwitchToMountNamespaceIfNeeded(override_mount_namespace.value());
209 !result.ok()) {
210 return result;
Vic Yange01ca4d2019-05-29 15:58:32 -0700211 }
212 }
213#endif
214
215 if (info.flags & CLONE_NEWNS) {
216 bool remount_proc = info.flags & CLONE_NEWPID;
217 bool remount_sys =
218 std::any_of(info.namespaces_to_enter.begin(), info.namespaces_to_enter.end(),
219 [](const auto& entry) { return entry.first == CLONE_NEWNET; });
Bernie Innocenticecebbb2020-02-06 03:49:33 +0900220 if (auto result = SetUpMountNamespace(remount_proc, remount_sys); !result.ok()) {
Vic Yange01ca4d2019-05-29 15:58:32 -0700221 return result;
222 }
223 }
224
225 if (info.flags & CLONE_NEWPID) {
226 // This will fork again to run an init process inside the PID namespace.
Bernie Innocenticecebbb2020-02-06 03:49:33 +0900227 if (auto result = SetUpPidNamespace(name.c_str()); !result.ok()) {
Vic Yange01ca4d2019-05-29 15:58:32 -0700228 return result;
229 }
230 }
231
Tom Cherrybbcbc2f2019-06-10 11:08:01 -0700232 return {};
Vic Yange01ca4d2019-05-29 15:58:32 -0700233}
234
Bart Van Assche01e66692022-11-14 16:45:47 -0800235Result<void> SetProcessAttributes(const ProcessAttributes& attr, InterprocessFifo setsid_finished) {
Vic Yange01ca4d2019-05-29 15:58:32 -0700236 if (attr.ioprio_class != IoSchedClass_NONE) {
237 if (android_set_ioprio(getpid(), attr.ioprio_class, attr.ioprio_pri)) {
238 PLOG(ERROR) << "failed to set pid " << getpid() << " ioprio=" << attr.ioprio_class
239 << "," << attr.ioprio_pri;
240 }
241 }
242
Bart Van Assche98739162022-11-14 16:54:03 -0800243 if (RequiresConsole(attr)) {
Vic Yange01ca4d2019-05-29 15:58:32 -0700244 setsid();
Bart Van Assche01e66692022-11-14 16:45:47 -0800245 setsid_finished.Write(kSetSidFinished);
246 setsid_finished.Close();
Vic Yange01ca4d2019-05-29 15:58:32 -0700247 OpenConsole(attr.console);
248 } else {
Bart Van Assche01e66692022-11-14 16:45:47 -0800249 // Without PID namespaces, this call duplicates the setpgid() call from
250 // the parent process. With PID namespaces, this setpgid() call sets the
251 // process group ID for a child of the init process in the PID
252 // namespace.
Bart Van Assched394f7432022-11-18 09:45:33 -0800253 if (setpgid(0, 0) == -1) {
Vic Yange01ca4d2019-05-29 15:58:32 -0700254 return ErrnoError() << "setpgid failed";
255 }
Tom Cherryf74b7f52019-09-23 16:16:54 -0700256 SetupStdio(attr.stdio_to_kmsg);
Vic Yange01ca4d2019-05-29 15:58:32 -0700257 }
258
259 for (const auto& rlimit : attr.rlimits) {
260 if (setrlimit(rlimit.first, &rlimit.second) == -1) {
Tom Cherry57b94282020-02-04 15:20:27 -0800261 return ErrnoErrorf("setrlimit({}, {{rlim_cur={}, rlim_max={}}}) failed", rlimit.first,
262 rlimit.second.rlim_cur, rlimit.second.rlim_max);
Vic Yange01ca4d2019-05-29 15:58:32 -0700263 }
264 }
265
266 if (attr.gid) {
267 if (setgid(attr.gid) != 0) {
268 return ErrnoError() << "setgid failed";
269 }
270 }
271 if (setgroups(attr.supp_gids.size(), const_cast<gid_t*>(&attr.supp_gids[0])) != 0) {
272 return ErrnoError() << "setgroups failed";
273 }
274 if (attr.uid) {
275 if (setuid(attr.uid) != 0) {
276 return ErrnoError() << "setuid failed";
277 }
278 }
279
280 if (attr.priority != 0) {
281 if (setpriority(PRIO_PROCESS, 0, attr.priority) != 0) {
282 return ErrnoError() << "setpriority failed";
283 }
284 }
Tom Cherrybbcbc2f2019-06-10 11:08:01 -0700285 return {};
Vic Yange01ca4d2019-05-29 15:58:32 -0700286}
287
Tom Cherrybbcbc2f2019-06-10 11:08:01 -0700288Result<void> WritePidToFiles(std::vector<std::string>* files) {
Nikita Ioffec2b16542022-10-20 14:14:39 +0100289 if (files->empty()) {
290 // No files to write pid to, exit early.
291 return {};
292 }
293
294 if (!CgroupsAvailable()) {
295 return Error() << "cgroups are not available";
296 }
297
Vic Yange01ca4d2019-05-29 15:58:32 -0700298 // See if there were "writepid" instructions to write to files under cpuset path.
299 std::string cpuset_path;
300 if (CgroupGetControllerPath("cpuset", &cpuset_path)) {
301 auto cpuset_predicate = [&cpuset_path](const std::string& path) {
302 return StartsWith(path, cpuset_path + "/");
303 };
304 auto iter = std::find_if(files->begin(), files->end(), cpuset_predicate);
305 if (iter == files->end()) {
306 // There were no "writepid" instructions for cpusets, check if the system default
307 // cpuset is specified to be used for the process.
308 std::string default_cpuset = GetProperty("ro.cpuset.default", "");
309 if (!default_cpuset.empty()) {
310 // Make sure the cpuset name starts and ends with '/'.
311 // A single '/' means the 'root' cpuset.
312 if (default_cpuset.front() != '/') {
313 default_cpuset.insert(0, 1, '/');
314 }
315 if (default_cpuset.back() != '/') {
316 default_cpuset.push_back('/');
317 }
318 files->push_back(
319 StringPrintf("%s%stasks", cpuset_path.c_str(), default_cpuset.c_str()));
320 }
321 }
322 } else {
323 LOG(ERROR) << "cpuset cgroup controller is not mounted!";
324 }
Suren Baghdasaryan746ede92022-03-31 21:15:11 +0000325
326 // Issue a warning whenever writepid is being used with a cgroup. This can't be done during
327 // command parsing because cgroups might not be configured at the time or parsing.
328 for (const auto& file : *files) {
329 if (CgroupGetControllerFromPath(file, nullptr)) {
330 LOG(WARNING) << "writepid usage with cgroups path '" << file
331 << "' is obsolete, please use task_profiles!";
332 }
333 }
334
Vic Yange01ca4d2019-05-29 15:58:32 -0700335 std::string pid_str = std::to_string(getpid());
336 for (const auto& file : *files) {
337 if (!WriteStringToFile(pid_str, file)) {
338 return ErrnoError() << "couldn't write " << pid_str << " to " << file;
339 }
340 }
Tom Cherrybbcbc2f2019-06-10 11:08:01 -0700341 return {};
Vic Yange01ca4d2019-05-29 15:58:32 -0700342}
343
344} // namespace init
345} // namespace android