| Colin Cross | 9c5366b | 2010-04-13 19:48:59 -0700 | [diff] [blame] | 1 | /* | 
|  | 2 | * Copyright (C) 2010 The Android Open Source Project | 
|  | 3 | * | 
|  | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | 5 | * you may not use this file except in compliance with the License. | 
|  | 6 | * You may obtain a copy of the License at | 
|  | 7 | * | 
|  | 8 | *      http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 9 | * | 
|  | 10 | * Unless required by applicable law or agreed to in writing, software | 
|  | 11 | * distributed under the License is distributed on an "AS IS" BASIS, | 
|  | 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | 13 | * See the License for the specific language governing permissions and | 
|  | 14 | * limitations under the License. | 
|  | 15 | */ | 
|  | 16 |  | 
| Luis Hector Chavez | 9f97f47 | 2017-09-06 13:43:57 -0700 | [diff] [blame] | 17 | #include "sigchld_handler.h" | 
| Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 18 |  | 
| Elliott Hughes | 8d82ea0 | 2015-02-06 20:15:18 -0800 | [diff] [blame] | 19 | #include <signal.h> | 
| Tom Cherry | 3f5eaae5 | 2017-04-06 16:30:22 -0700 | [diff] [blame] | 20 | #include <string.h> | 
| Bart Van Assche | a75f210 | 2023-11-03 10:33:17 -0700 | [diff] [blame] | 21 | #include <sys/signalfd.h> | 
| Colin Cross | 9c5366b | 2010-04-13 19:48:59 -0700 | [diff] [blame] | 22 | #include <sys/socket.h> | 
| Elliott Hughes | da40c00 | 2015-03-27 23:20:44 -0700 | [diff] [blame] | 23 | #include <sys/types.h> | 
| Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 24 | #include <sys/wait.h> | 
| Elliott Hughes | da40c00 | 2015-03-27 23:20:44 -0700 | [diff] [blame] | 25 | #include <unistd.h> | 
|  | 26 |  | 
| Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 27 | #include <android-base/chrono_utils.h> | 
| Bart Van Assche | ea595ba | 2022-10-21 12:44:02 -0700 | [diff] [blame] | 28 | #include <android-base/file.h> | 
| Tom Cherry | 3f5eaae5 | 2017-04-06 16:30:22 -0700 | [diff] [blame] | 29 | #include <android-base/logging.h> | 
| Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 30 | #include <android-base/scopeguard.h> | 
|  | 31 | #include <android-base/stringprintf.h> | 
| Colin Cross | 9c5366b | 2010-04-13 19:48:59 -0700 | [diff] [blame] | 32 |  | 
| Nikita Ioffe | 3f4b0d6 | 2019-10-09 15:23:02 +0100 | [diff] [blame] | 33 | #include <thread> | 
|  | 34 |  | 
| Bart Van Assche | a75f210 | 2023-11-03 10:33:17 -0700 | [diff] [blame] | 35 | #include "epoll.h" | 
| Colin Cross | 9c5366b | 2010-04-13 19:48:59 -0700 | [diff] [blame] | 36 | #include "init.h" | 
| Tom Cherry | bac3299 | 2015-07-31 12:45:25 -0700 | [diff] [blame] | 37 | #include "service.h" | 
| Tom Cherry | 2aeb1ad | 2019-06-26 10:46:20 -0700 | [diff] [blame] | 38 | #include "service_list.h" | 
| Colin Cross | 9c5366b | 2010-04-13 19:48:59 -0700 | [diff] [blame] | 39 |  | 
| Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 40 | using android::base::boot_clock; | 
|  | 41 | using android::base::make_scope_guard; | 
| Bart Van Assche | ea595ba | 2022-10-21 12:44:02 -0700 | [diff] [blame] | 42 | using android::base::ReadFileToString; | 
| Nikita Ioffe | 3f4b0d6 | 2019-10-09 15:23:02 +0100 | [diff] [blame] | 43 | using android::base::StringPrintf; | 
|  | 44 | using android::base::Timer; | 
| Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 45 |  | 
| Tom Cherry | 81f5d3e | 2017-06-22 12:53:17 -0700 | [diff] [blame] | 46 | namespace android { | 
|  | 47 | namespace init { | 
|  | 48 |  | 
| Nikita Ioffe | 3f4b0d6 | 2019-10-09 15:23:02 +0100 | [diff] [blame] | 49 | static pid_t ReapOneProcess() { | 
| Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 50 | siginfo_t siginfo = {}; | 
|  | 51 | // This returns a zombie pid or informs us that there are no zombies left to be reaped. | 
|  | 52 | // It does NOT reap the pid; that is done below. | 
|  | 53 | if (TEMP_FAILURE_RETRY(waitid(P_ALL, 0, &siginfo, WEXITED | WNOHANG | WNOWAIT)) != 0) { | 
|  | 54 | PLOG(ERROR) << "waitid failed"; | 
| Nikita Ioffe | 3f4b0d6 | 2019-10-09 15:23:02 +0100 | [diff] [blame] | 55 | return 0; | 
| Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 56 | } | 
|  | 57 |  | 
| Bart Van Assche | c7d7ed0 | 2022-10-19 10:52:23 -0700 | [diff] [blame] | 58 | const pid_t pid = siginfo.si_pid; | 
|  | 59 | if (pid == 0) { | 
|  | 60 | DCHECK_EQ(siginfo.si_signo, 0); | 
|  | 61 | return 0; | 
|  | 62 | } | 
|  | 63 |  | 
|  | 64 | DCHECK_EQ(siginfo.si_signo, SIGCHLD); | 
| Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 65 |  | 
|  | 66 | // At this point we know we have a zombie pid, so we use this scopeguard to reap the pid | 
|  | 67 | // whenever the function returns from this point forward. | 
|  | 68 | // We do NOT want to reap the zombie earlier as in Service::Reap(), we kill(-pid, ...) and we | 
|  | 69 | // want the pid to remain valid throughout that (and potentially future) usages. | 
|  | 70 | auto reaper = make_scope_guard([pid] { TEMP_FAILURE_RETRY(waitpid(pid, nullptr, WNOHANG)); }); | 
|  | 71 |  | 
| Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 72 | std::string name; | 
|  | 73 | std::string wait_string; | 
| Tom Cherry | cb0f9bb | 2017-09-12 15:58:47 -0700 | [diff] [blame] | 74 | Service* service = nullptr; | 
|  | 75 |  | 
| Tom Cherry | fe81541 | 2019-04-23 15:11:07 -0700 | [diff] [blame] | 76 | if (SubcontextChildReap(pid)) { | 
| Tom Cherry | cb0f9bb | 2017-09-12 15:58:47 -0700 | [diff] [blame] | 77 | name = "Subcontext"; | 
| Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 78 | } else { | 
| Tom Cherry | cb0f9bb | 2017-09-12 15:58:47 -0700 | [diff] [blame] | 79 | service = ServiceList::GetInstance().FindService(pid, &Service::pid); | 
|  | 80 |  | 
|  | 81 | if (service) { | 
|  | 82 | name = StringPrintf("Service '%s' (pid %d)", service->name().c_str(), pid); | 
|  | 83 | if (service->flags() & SVC_EXEC) { | 
|  | 84 | auto exec_duration = boot_clock::now() - service->time_started(); | 
|  | 85 | auto exec_duration_ms = | 
|  | 86 | std::chrono::duration_cast<std::chrono::milliseconds>(exec_duration).count(); | 
|  | 87 | wait_string = StringPrintf(" waiting took %f seconds", exec_duration_ms / 1000.0f); | 
| Wei Wang | f7c2bfe | 2019-07-31 11:35:18 -0700 | [diff] [blame] | 88 | } else if (service->flags() & SVC_ONESHOT) { | 
|  | 89 | auto exec_duration = boot_clock::now() - service->time_started(); | 
|  | 90 | auto exec_duration_ms = | 
|  | 91 | std::chrono::duration_cast<std::chrono::milliseconds>(exec_duration) | 
|  | 92 | .count(); | 
|  | 93 | wait_string = StringPrintf(" oneshot service took %f seconds in background", | 
|  | 94 | exec_duration_ms / 1000.0f); | 
| Tom Cherry | cb0f9bb | 2017-09-12 15:58:47 -0700 | [diff] [blame] | 95 | } | 
|  | 96 | } else { | 
|  | 97 | name = StringPrintf("Untracked pid %d", pid); | 
|  | 98 | } | 
| Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 99 | } | 
|  | 100 |  | 
| Paul Crowley | c73b215 | 2018-04-13 17:38:57 +0000 | [diff] [blame] | 101 | if (siginfo.si_code == CLD_EXITED) { | 
|  | 102 | LOG(INFO) << name << " exited with status " << siginfo.si_status << wait_string; | 
|  | 103 | } else { | 
|  | 104 | LOG(INFO) << name << " received signal " << siginfo.si_status << wait_string; | 
| Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 105 | } | 
|  | 106 |  | 
| David Anderson | 0fa7c40 | 2022-03-07 19:10:57 -0800 | [diff] [blame] | 107 | if (!service) { | 
|  | 108 | LOG(INFO) << name << " did not have an associated service entry and will not be reaped"; | 
|  | 109 | return pid; | 
|  | 110 | } | 
| Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 111 |  | 
| Paul Crowley | c73b215 | 2018-04-13 17:38:57 +0000 | [diff] [blame] | 112 | service->Reap(siginfo); | 
| Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 113 |  | 
|  | 114 | if (service->flags() & SVC_TEMPORARY) { | 
| Tom Cherry | 911b9b1 | 2017-07-27 16:20:58 -0700 | [diff] [blame] | 115 | ServiceList::GetInstance().RemoveService(*service); | 
| Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 116 | } | 
|  | 117 |  | 
| Nikita Ioffe | 3f4b0d6 | 2019-10-09 15:23:02 +0100 | [diff] [blame] | 118 | return pid; | 
| Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 119 | } | 
|  | 120 |  | 
| Bart Van Assche | 9c6b723 | 2023-11-20 14:38:55 -0800 | [diff] [blame] | 121 | std::set<pid_t> ReapAnyOutstandingChildren() { | 
|  | 122 | std::set<pid_t> reaped_pids; | 
|  | 123 | for (;;) { | 
|  | 124 | const pid_t pid = ReapOneProcess(); | 
|  | 125 | if (pid <= 0) { | 
|  | 126 | return reaped_pids; | 
|  | 127 | } | 
|  | 128 | reaped_pids.emplace(pid); | 
|  | 129 | } | 
|  | 130 | } | 
|  | 131 |  | 
|  | 132 | static void ReapAndRemove(std::vector<pid_t>& alive_pids) { | 
|  | 133 | for (auto pid : ReapAnyOutstandingChildren()) { | 
|  | 134 | const auto it = std::find(alive_pids.begin(), alive_pids.end(), pid); | 
|  | 135 | if (it != alive_pids.end()) { | 
|  | 136 | alive_pids.erase(it); | 
|  | 137 | } | 
| Tom Cherry | eeee831 | 2017-07-28 15:22:23 -0700 | [diff] [blame] | 138 | } | 
|  | 139 | } | 
|  | 140 |  | 
| Bart Van Assche | 1daf88d | 2023-11-16 11:19:08 -0800 | [diff] [blame] | 141 | static void HandleSignal(int signal_fd) { | 
| Bart Van Assche | a75f210 | 2023-11-03 10:33:17 -0700 | [diff] [blame] | 142 | signalfd_siginfo siginfo; | 
|  | 143 | ssize_t bytes_read = TEMP_FAILURE_RETRY(read(signal_fd, &siginfo, sizeof(siginfo))); | 
|  | 144 | if (bytes_read != sizeof(siginfo)) { | 
|  | 145 | LOG(WARNING) << "Unexpected: " << __func__ << " read " << bytes_read << " bytes instead of " | 
|  | 146 | << sizeof(siginfo); | 
|  | 147 | } | 
|  | 148 | } | 
|  | 149 |  | 
|  | 150 | void WaitToBeReaped(int sigchld_fd, const std::vector<pid_t>& pids, | 
|  | 151 | std::chrono::milliseconds timeout) { | 
| Nikita Ioffe | 3f4b0d6 | 2019-10-09 15:23:02 +0100 | [diff] [blame] | 152 | Timer t; | 
| Bart Van Assche | a75f210 | 2023-11-03 10:33:17 -0700 | [diff] [blame] | 153 | Epoll epoll; | 
| Bart Van Assche | a75f210 | 2023-11-03 10:33:17 -0700 | [diff] [blame] | 154 | if (sigchld_fd >= 0) { | 
| Bart Van Assche | 1daf88d | 2023-11-16 11:19:08 -0800 | [diff] [blame] | 155 | if (auto result = epoll.Open(); result.ok()) { | 
|  | 156 | result = | 
|  | 157 | epoll.RegisterHandler(sigchld_fd, [sigchld_fd]() { HandleSignal(sigchld_fd); }); | 
|  | 158 | if (!result.ok()) { | 
|  | 159 | LOG(WARNING) << __func__ | 
|  | 160 | << " RegisterHandler() failed. Falling back to sleep_for(): " | 
|  | 161 | << result.error(); | 
|  | 162 | sigchld_fd = -1; | 
|  | 163 | } | 
| Bart Van Assche | a75f210 | 2023-11-03 10:33:17 -0700 | [diff] [blame] | 164 | } else { | 
| Bart Van Assche | 1daf88d | 2023-11-16 11:19:08 -0800 | [diff] [blame] | 165 | LOG(WARNING) << __func__ << " Epoll::Open() failed. Falling back to sleep_for(): " | 
|  | 166 | << result.error(); | 
|  | 167 | sigchld_fd = -1; | 
| Bart Van Assche | a75f210 | 2023-11-03 10:33:17 -0700 | [diff] [blame] | 168 | } | 
| Nikita Ioffe | 3f4b0d6 | 2019-10-09 15:23:02 +0100 | [diff] [blame] | 169 | } | 
| Bart Van Assche | 1daf88d | 2023-11-16 11:19:08 -0800 | [diff] [blame] | 170 | std::vector<pid_t> alive_pids(pids); | 
|  | 171 | ReapAndRemove(alive_pids); | 
|  | 172 | while (!alive_pids.empty() && t.duration() < timeout) { | 
|  | 173 | if (sigchld_fd >= 0) { | 
|  | 174 | auto result = epoll.Wait(std::max(timeout - t.duration(), 0ms)); | 
|  | 175 | if (result.ok()) { | 
|  | 176 | ReapAndRemove(alive_pids); | 
|  | 177 | continue; | 
|  | 178 | } else { | 
|  | 179 | LOG(WARNING) << "Epoll::Wait() failed " << result.error(); | 
|  | 180 | } | 
|  | 181 | } | 
|  | 182 | std::this_thread::sleep_for(50ms); | 
|  | 183 | ReapAndRemove(alive_pids); | 
|  | 184 | } | 
| Nikita Ioffe | 3f4b0d6 | 2019-10-09 15:23:02 +0100 | [diff] [blame] | 185 | LOG(INFO) << "Waiting for " << pids.size() << " pids to be reaped took " << t << " with " | 
|  | 186 | << alive_pids.size() << " of them still running"; | 
| Bart Van Assche | 7ce6453 | 2023-11-01 14:29:07 -0700 | [diff] [blame] | 187 | for (pid_t pid : alive_pids) { | 
| Bart Van Assche | ea595ba | 2022-10-21 12:44:02 -0700 | [diff] [blame] | 188 | std::string status = "(no-such-pid)"; | 
|  | 189 | ReadFileToString(StringPrintf("/proc/%d/status", pid), &status); | 
| Bart Van Assche | 7ce6453 | 2023-11-01 14:29:07 -0700 | [diff] [blame] | 190 | LOG(INFO) << "Still running: " << pid << '\n' << status; | 
| Bart Van Assche | ea595ba | 2022-10-21 12:44:02 -0700 | [diff] [blame] | 191 | } | 
| Nikita Ioffe | 3f4b0d6 | 2019-10-09 15:23:02 +0100 | [diff] [blame] | 192 | } | 
|  | 193 |  | 
| Tom Cherry | 81f5d3e | 2017-06-22 12:53:17 -0700 | [diff] [blame] | 194 | }  // namespace init | 
|  | 195 | }  // namespace android |