Zhuoyao Zhang | 5335955 | 2024-09-16 23:58:11 +0000 | [diff] [blame] | 1 | # Copyright 2024, The Android Open Source Project |
| 2 | # |
| 3 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | # you may not use this file except in compliance with the License. |
| 5 | # You may obtain a copy of the License at |
| 6 | # |
| 7 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | # |
| 9 | # Unless required by applicable law or agreed to in writing, software |
| 10 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | # See the License for the specific language governing permissions and |
| 13 | # limitations under the License. |
| 14 | |
| 15 | |
| 16 | import hashlib |
| 17 | import logging |
| 18 | import multiprocessing |
| 19 | import os |
| 20 | import pathlib |
| 21 | import signal |
| 22 | import subprocess |
| 23 | import tempfile |
| 24 | import time |
| 25 | |
| 26 | |
| 27 | DEFAULT_PROCESS_TERMINATION_TIMEOUT_SECONDS = 1 |
| 28 | |
| 29 | |
| 30 | def default_daemon_target(): |
| 31 | """Place holder for the default daemon target.""" |
| 32 | print("default daemon target") |
| 33 | |
| 34 | |
| 35 | class DaemonManager: |
| 36 | """Class to manage and monitor the daemon run as a subprocess.""" |
| 37 | |
| 38 | def __init__( |
| 39 | self, |
| 40 | binary_path: str, |
| 41 | daemon_target: callable = default_daemon_target, |
| 42 | daemon_args: tuple = (), |
| 43 | ): |
| 44 | self.binary_path = binary_path |
| 45 | self.daemon_target = daemon_target |
| 46 | self.daemon_args = daemon_args |
| 47 | |
| 48 | self.pid = os.getpid() |
| 49 | self.daemon_process = None |
| 50 | |
| 51 | pid_file_dir = pathlib.Path(tempfile.gettempdir()).joinpath("edit_monitor") |
| 52 | pid_file_dir.mkdir(parents=True, exist_ok=True) |
| 53 | self.pid_file_path = self._get_pid_file_path(pid_file_dir) |
| 54 | |
| 55 | def start(self): |
| 56 | """Writes the pidfile and starts the daemon proces.""" |
| 57 | try: |
Zhuoyao Zhang | 4d48559 | 2024-09-17 21:14:38 +0000 | [diff] [blame^] | 58 | self._stop_any_existing_instance() |
Zhuoyao Zhang | 5335955 | 2024-09-16 23:58:11 +0000 | [diff] [blame] | 59 | self._write_pid_to_pidfile() |
| 60 | self._start_daemon_process() |
| 61 | except Exception as e: |
| 62 | logging.exception("Failed to start daemon manager with error %s", e) |
| 63 | |
| 64 | def stop(self): |
| 65 | """Stops the daemon process and removes the pidfile.""" |
| 66 | |
| 67 | logging.debug("in daemon manager cleanup.") |
| 68 | try: |
| 69 | if self.daemon_process and self.daemon_process.is_alive(): |
| 70 | self._terminate_process(self.daemon_process.pid) |
| 71 | self._remove_pidfile() |
| 72 | except Exception as e: |
| 73 | logging.exception("Failed to stop daemon manager with error %s", e) |
| 74 | |
Zhuoyao Zhang | 4d48559 | 2024-09-17 21:14:38 +0000 | [diff] [blame^] | 75 | def _stop_any_existing_instance(self): |
| 76 | if not self.pid_file_path.exists(): |
| 77 | logging.debug("No existing instances.") |
| 78 | return |
| 79 | |
| 80 | ex_pid = self._read_pid_from_pidfile() |
| 81 | |
| 82 | if ex_pid: |
| 83 | logging.info("Found another instance with pid %d.", ex_pid) |
| 84 | self._terminate_process(ex_pid) |
| 85 | self._remove_pidfile() |
| 86 | |
| 87 | def _read_pid_from_pidfile(self): |
| 88 | with open(self.pid_file_path, "r") as f: |
| 89 | return int(f.read().strip()) |
| 90 | |
Zhuoyao Zhang | 5335955 | 2024-09-16 23:58:11 +0000 | [diff] [blame] | 91 | def _write_pid_to_pidfile(self): |
| 92 | """Creates a pidfile and writes the current pid to the file. |
| 93 | |
| 94 | Raise FileExistsError if the pidfile already exists. |
| 95 | """ |
| 96 | try: |
| 97 | # Use the 'x' mode to open the file for exclusive creation |
| 98 | with open(self.pid_file_path, "x") as f: |
| 99 | f.write(f"{self.pid}") |
| 100 | except FileExistsError as e: |
| 101 | # This could be caused due to race condition that a user is trying |
| 102 | # to start two edit monitors at the same time. Or because there is |
| 103 | # already an existing edit monitor running and we can not kill it |
| 104 | # for some reason. |
| 105 | logging.exception("pidfile %s already exists.", self.pid_file_path) |
| 106 | raise e |
| 107 | |
| 108 | def _start_daemon_process(self): |
| 109 | """Starts a subprocess to run the daemon.""" |
| 110 | p = multiprocessing.Process( |
| 111 | target=self.daemon_target, args=self.daemon_args |
| 112 | ) |
| 113 | p.start() |
| 114 | |
| 115 | logging.info("Start subprocess with PID %d", p.pid) |
| 116 | self.daemon_process = p |
| 117 | |
| 118 | def _terminate_process( |
| 119 | self, pid: int, timeout: int = DEFAULT_PROCESS_TERMINATION_TIMEOUT_SECONDS |
| 120 | ): |
| 121 | """Terminates a process with given pid. |
| 122 | |
| 123 | It first sends a SIGTERM to the process to allow it for proper |
| 124 | termination with a timeout. If the process is not terminated within |
| 125 | the timeout, kills it forcefully. |
| 126 | """ |
| 127 | try: |
| 128 | os.kill(pid, signal.SIGTERM) |
| 129 | if not self._wait_for_process_terminate(pid, timeout): |
| 130 | logging.warning( |
| 131 | "Process %d not terminated within timeout, try force kill", pid |
| 132 | ) |
| 133 | os.kill(pid, signal.SIGKILL) |
| 134 | except ProcessLookupError: |
| 135 | logging.info("Process with PID %d not found (already terminated)", pid) |
| 136 | |
| 137 | def _wait_for_process_terminate(self, pid: int, timeout: int) -> bool: |
| 138 | start_time = time.time() |
| 139 | |
| 140 | while time.time() < start_time + timeout: |
| 141 | if not self._is_process_alive(pid): |
| 142 | return True |
| 143 | time.sleep(1) |
| 144 | |
| 145 | logging.error("Process %d not terminated within %d seconds.", pid, timeout) |
| 146 | return False |
| 147 | |
| 148 | def _is_process_alive(self, pid: int) -> bool: |
| 149 | try: |
| 150 | output = subprocess.check_output( |
| 151 | ["ps", "-p", str(pid), "-o", "state="], text=True |
| 152 | ).strip() |
| 153 | state = output.split()[0] |
| 154 | return state != "Z" # Check if the state is not 'Z' (zombie) |
| 155 | except subprocess.CalledProcessError: |
| 156 | # Process not found (already dead). |
| 157 | return False |
| 158 | except (FileNotFoundError, OSError, ValueError) as e: |
| 159 | logging.warning( |
| 160 | "Unable to check the status for process %d with error: %s.", pid, e |
| 161 | ) |
| 162 | return True |
| 163 | |
| 164 | def _remove_pidfile(self): |
| 165 | try: |
| 166 | os.remove(self.pid_file_path) |
| 167 | except FileNotFoundError: |
| 168 | logging.info("pid file %s already removed.", self.pid_file_path) |
| 169 | |
| 170 | def _get_pid_file_path(self, pid_file_dir: pathlib.Path) -> pathlib.Path: |
| 171 | """Generates the path to store the pidfile. |
| 172 | |
| 173 | The file path should have the format of "/tmp/edit_monitor/xxxx.lock" |
| 174 | where xxxx is a hashed value based on the binary path that starts the |
| 175 | process. |
| 176 | """ |
| 177 | hash_object = hashlib.sha256() |
| 178 | hash_object.update(self.binary_path.encode("utf-8")) |
| 179 | pid_file_path = pid_file_dir.joinpath(hash_object.hexdigest() + ".lock") |
| 180 | logging.info("pid_file_path: %s", pid_file_path) |
| 181 | |
| 182 | return pid_file_path |