Colin Cross | 9c5366b | 2010-04-13 19:48:59 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2010 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
Luis Hector Chavez | 9f97f47 | 2017-09-06 13:43:57 -0700 | [diff] [blame] | 17 | #include "sigchld_handler.h" |
Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 18 | |
Elliott Hughes | 8d82ea0 | 2015-02-06 20:15:18 -0800 | [diff] [blame] | 19 | #include <signal.h> |
Tom Cherry | 3f5eaae5 | 2017-04-06 16:30:22 -0700 | [diff] [blame] | 20 | #include <string.h> |
Bart Van Assche | a75f210 | 2023-11-03 10:33:17 -0700 | [diff] [blame^] | 21 | #include <sys/signalfd.h> |
Colin Cross | 9c5366b | 2010-04-13 19:48:59 -0700 | [diff] [blame] | 22 | #include <sys/socket.h> |
Elliott Hughes | da40c00 | 2015-03-27 23:20:44 -0700 | [diff] [blame] | 23 | #include <sys/types.h> |
Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 24 | #include <sys/wait.h> |
Elliott Hughes | da40c00 | 2015-03-27 23:20:44 -0700 | [diff] [blame] | 25 | #include <unistd.h> |
| 26 | |
Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 27 | #include <android-base/chrono_utils.h> |
Bart Van Assche | ea595ba | 2022-10-21 12:44:02 -0700 | [diff] [blame] | 28 | #include <android-base/file.h> |
Tom Cherry | 3f5eaae5 | 2017-04-06 16:30:22 -0700 | [diff] [blame] | 29 | #include <android-base/logging.h> |
Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 30 | #include <android-base/scopeguard.h> |
| 31 | #include <android-base/stringprintf.h> |
Colin Cross | 9c5366b | 2010-04-13 19:48:59 -0700 | [diff] [blame] | 32 | |
Nikita Ioffe | 3f4b0d6 | 2019-10-09 15:23:02 +0100 | [diff] [blame] | 33 | #include <thread> |
| 34 | |
Bart Van Assche | a75f210 | 2023-11-03 10:33:17 -0700 | [diff] [blame^] | 35 | #include "epoll.h" |
Colin Cross | 9c5366b | 2010-04-13 19:48:59 -0700 | [diff] [blame] | 36 | #include "init.h" |
Tom Cherry | bac3299 | 2015-07-31 12:45:25 -0700 | [diff] [blame] | 37 | #include "service.h" |
Tom Cherry | 2aeb1ad | 2019-06-26 10:46:20 -0700 | [diff] [blame] | 38 | #include "service_list.h" |
Colin Cross | 9c5366b | 2010-04-13 19:48:59 -0700 | [diff] [blame] | 39 | |
Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 40 | using android::base::boot_clock; |
| 41 | using android::base::make_scope_guard; |
Bart Van Assche | ea595ba | 2022-10-21 12:44:02 -0700 | [diff] [blame] | 42 | using android::base::ReadFileToString; |
Nikita Ioffe | 3f4b0d6 | 2019-10-09 15:23:02 +0100 | [diff] [blame] | 43 | using android::base::StringPrintf; |
| 44 | using android::base::Timer; |
Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 45 | |
Tom Cherry | 81f5d3e | 2017-06-22 12:53:17 -0700 | [diff] [blame] | 46 | namespace android { |
| 47 | namespace init { |
| 48 | |
Nikita Ioffe | 3f4b0d6 | 2019-10-09 15:23:02 +0100 | [diff] [blame] | 49 | static pid_t ReapOneProcess() { |
Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 50 | siginfo_t siginfo = {}; |
| 51 | // This returns a zombie pid or informs us that there are no zombies left to be reaped. |
| 52 | // It does NOT reap the pid; that is done below. |
| 53 | if (TEMP_FAILURE_RETRY(waitid(P_ALL, 0, &siginfo, WEXITED | WNOHANG | WNOWAIT)) != 0) { |
| 54 | PLOG(ERROR) << "waitid failed"; |
Nikita Ioffe | 3f4b0d6 | 2019-10-09 15:23:02 +0100 | [diff] [blame] | 55 | return 0; |
Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 56 | } |
| 57 | |
Bart Van Assche | c7d7ed0 | 2022-10-19 10:52:23 -0700 | [diff] [blame] | 58 | const pid_t pid = siginfo.si_pid; |
| 59 | if (pid == 0) { |
| 60 | DCHECK_EQ(siginfo.si_signo, 0); |
| 61 | return 0; |
| 62 | } |
| 63 | |
| 64 | DCHECK_EQ(siginfo.si_signo, SIGCHLD); |
Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 65 | |
| 66 | // At this point we know we have a zombie pid, so we use this scopeguard to reap the pid |
| 67 | // whenever the function returns from this point forward. |
| 68 | // We do NOT want to reap the zombie earlier as in Service::Reap(), we kill(-pid, ...) and we |
| 69 | // want the pid to remain valid throughout that (and potentially future) usages. |
| 70 | auto reaper = make_scope_guard([pid] { TEMP_FAILURE_RETRY(waitpid(pid, nullptr, WNOHANG)); }); |
| 71 | |
Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 72 | std::string name; |
| 73 | std::string wait_string; |
Tom Cherry | cb0f9bb | 2017-09-12 15:58:47 -0700 | [diff] [blame] | 74 | Service* service = nullptr; |
| 75 | |
Tom Cherry | fe81541 | 2019-04-23 15:11:07 -0700 | [diff] [blame] | 76 | if (SubcontextChildReap(pid)) { |
Tom Cherry | cb0f9bb | 2017-09-12 15:58:47 -0700 | [diff] [blame] | 77 | name = "Subcontext"; |
Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 78 | } else { |
Tom Cherry | cb0f9bb | 2017-09-12 15:58:47 -0700 | [diff] [blame] | 79 | service = ServiceList::GetInstance().FindService(pid, &Service::pid); |
| 80 | |
| 81 | if (service) { |
| 82 | name = StringPrintf("Service '%s' (pid %d)", service->name().c_str(), pid); |
| 83 | if (service->flags() & SVC_EXEC) { |
| 84 | auto exec_duration = boot_clock::now() - service->time_started(); |
| 85 | auto exec_duration_ms = |
| 86 | std::chrono::duration_cast<std::chrono::milliseconds>(exec_duration).count(); |
| 87 | wait_string = StringPrintf(" waiting took %f seconds", exec_duration_ms / 1000.0f); |
Wei Wang | f7c2bfe | 2019-07-31 11:35:18 -0700 | [diff] [blame] | 88 | } else if (service->flags() & SVC_ONESHOT) { |
| 89 | auto exec_duration = boot_clock::now() - service->time_started(); |
| 90 | auto exec_duration_ms = |
| 91 | std::chrono::duration_cast<std::chrono::milliseconds>(exec_duration) |
| 92 | .count(); |
| 93 | wait_string = StringPrintf(" oneshot service took %f seconds in background", |
| 94 | exec_duration_ms / 1000.0f); |
Tom Cherry | cb0f9bb | 2017-09-12 15:58:47 -0700 | [diff] [blame] | 95 | } |
| 96 | } else { |
| 97 | name = StringPrintf("Untracked pid %d", pid); |
| 98 | } |
Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 99 | } |
| 100 | |
Paul Crowley | c73b215 | 2018-04-13 17:38:57 +0000 | [diff] [blame] | 101 | if (siginfo.si_code == CLD_EXITED) { |
| 102 | LOG(INFO) << name << " exited with status " << siginfo.si_status << wait_string; |
| 103 | } else { |
| 104 | LOG(INFO) << name << " received signal " << siginfo.si_status << wait_string; |
Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 105 | } |
| 106 | |
David Anderson | 0fa7c40 | 2022-03-07 19:10:57 -0800 | [diff] [blame] | 107 | if (!service) { |
| 108 | LOG(INFO) << name << " did not have an associated service entry and will not be reaped"; |
| 109 | return pid; |
| 110 | } |
Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 111 | |
Paul Crowley | c73b215 | 2018-04-13 17:38:57 +0000 | [diff] [blame] | 112 | service->Reap(siginfo); |
Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 113 | |
| 114 | if (service->flags() & SVC_TEMPORARY) { |
Tom Cherry | 911b9b1 | 2017-07-27 16:20:58 -0700 | [diff] [blame] | 115 | ServiceList::GetInstance().RemoveService(*service); |
Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 116 | } |
| 117 | |
Nikita Ioffe | 3f4b0d6 | 2019-10-09 15:23:02 +0100 | [diff] [blame] | 118 | return pid; |
Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 119 | } |
| 120 | |
Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 121 | void ReapAnyOutstandingChildren() { |
Nikita Ioffe | 3f4b0d6 | 2019-10-09 15:23:02 +0100 | [diff] [blame] | 122 | while (ReapOneProcess() != 0) { |
Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 123 | } |
| 124 | } |
| 125 | |
Bart Van Assche | a75f210 | 2023-11-03 10:33:17 -0700 | [diff] [blame^] | 126 | static void DiscardSiginfo(int signal_fd) { |
| 127 | signalfd_siginfo siginfo; |
| 128 | ssize_t bytes_read = TEMP_FAILURE_RETRY(read(signal_fd, &siginfo, sizeof(siginfo))); |
| 129 | if (bytes_read != sizeof(siginfo)) { |
| 130 | LOG(WARNING) << "Unexpected: " << __func__ << " read " << bytes_read << " bytes instead of " |
| 131 | << sizeof(siginfo); |
| 132 | } |
| 133 | } |
| 134 | |
| 135 | void WaitToBeReaped(int sigchld_fd, const std::vector<pid_t>& pids, |
| 136 | std::chrono::milliseconds timeout) { |
Nikita Ioffe | 3f4b0d6 | 2019-10-09 15:23:02 +0100 | [diff] [blame] | 137 | Timer t; |
Bart Van Assche | a75f210 | 2023-11-03 10:33:17 -0700 | [diff] [blame^] | 138 | Epoll epoll; |
| 139 | // The init process passes a valid sigchld_fd argument but unit tests do not. |
| 140 | if (sigchld_fd >= 0) { |
| 141 | epoll.RegisterHandler(sigchld_fd, [sigchld_fd]() { DiscardSiginfo(sigchld_fd); }); |
| 142 | } |
Nikita Ioffe | 3f4b0d6 | 2019-10-09 15:23:02 +0100 | [diff] [blame] | 143 | std::vector<pid_t> alive_pids(pids.begin(), pids.end()); |
| 144 | while (!alive_pids.empty() && t.duration() < timeout) { |
| 145 | pid_t pid; |
| 146 | while ((pid = ReapOneProcess()) != 0) { |
| 147 | auto it = std::find(alive_pids.begin(), alive_pids.end(), pid); |
| 148 | if (it != alive_pids.end()) { |
| 149 | alive_pids.erase(it); |
| 150 | } |
| 151 | } |
| 152 | if (alive_pids.empty()) { |
| 153 | break; |
| 154 | } |
Bart Van Assche | a75f210 | 2023-11-03 10:33:17 -0700 | [diff] [blame^] | 155 | if (sigchld_fd >= 0) { |
| 156 | epoll.Wait(std::max(timeout - t.duration(), 0ms)); |
| 157 | } else { |
| 158 | std::this_thread::sleep_for(50ms); |
| 159 | } |
Nikita Ioffe | 3f4b0d6 | 2019-10-09 15:23:02 +0100 | [diff] [blame] | 160 | } |
| 161 | LOG(INFO) << "Waiting for " << pids.size() << " pids to be reaped took " << t << " with " |
| 162 | << alive_pids.size() << " of them still running"; |
Bart Van Assche | 7ce6453 | 2023-11-01 14:29:07 -0700 | [diff] [blame] | 163 | for (pid_t pid : alive_pids) { |
Bart Van Assche | ea595ba | 2022-10-21 12:44:02 -0700 | [diff] [blame] | 164 | std::string status = "(no-such-pid)"; |
| 165 | ReadFileToString(StringPrintf("/proc/%d/status", pid), &status); |
Bart Van Assche | 7ce6453 | 2023-11-01 14:29:07 -0700 | [diff] [blame] | 166 | LOG(INFO) << "Still running: " << pid << '\n' << status; |
Bart Van Assche | ea595ba | 2022-10-21 12:44:02 -0700 | [diff] [blame] | 167 | } |
Nikita Ioffe | 3f4b0d6 | 2019-10-09 15:23:02 +0100 | [diff] [blame] | 168 | } |
| 169 | |
Tom Cherry | 81f5d3e | 2017-06-22 12:53:17 -0700 | [diff] [blame] | 170 | } // namespace init |
| 171 | } // namespace android |