The initial implementation of the edit monitor

Creates a class that will manager and monitor the actual edit watchdog
that run as a subprocess. As a first step, the class supports:
1) A start method that creates a pidfile and starts a subprocess with given target/args.
2) A stop method that terminates the created subprocess and removes the pidfile.

Detailed design in go/android-local-edit-monitor.

Test: atest daemon_manager_test
bug: 365617369
Change-Id: Ic6d7be67d284ade8033416235b9b0fb1e90e1b1a
diff --git a/tools/edit_monitor/Android.bp b/tools/edit_monitor/Android.bp
index 80437c0..b939633 100644
--- a/tools/edit_monitor/Android.bp
+++ b/tools/edit_monitor/Android.bp
@@ -19,3 +19,26 @@
     default_applicable_licenses: ["Android-Apache-2.0"],
     default_team: "trendy_team_adte",
 }
+
+python_library_host {
+    name: "edit_monitor_lib",
+    pkg_path: "edit_monitor",
+    srcs: [
+        "daemon_manager.py",
+    ],
+}
+
+python_test_host {
+    name: "daemon_manager_test",
+    main: "daemon_manager_test.py",
+    pkg_path: "edit_monitor",
+    srcs: [
+        "daemon_manager_test.py",
+    ],
+    libs: [
+        "edit_monitor_lib",
+    ],
+    test_options: {
+        unit_test: true,
+    },
+}
diff --git a/tools/edit_monitor/daemon_manager.py b/tools/edit_monitor/daemon_manager.py
new file mode 100644
index 0000000..c09c321
--- /dev/null
+++ b/tools/edit_monitor/daemon_manager.py
@@ -0,0 +1,165 @@
+# Copyright 2024, The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import hashlib
+import logging
+import multiprocessing
+import os
+import pathlib
+import signal
+import subprocess
+import tempfile
+import time
+
+
+DEFAULT_PROCESS_TERMINATION_TIMEOUT_SECONDS = 1
+
+
+def default_daemon_target():
+  """Place holder for the default daemon target."""
+  print("default daemon target")
+
+
+class DaemonManager:
+  """Class to manage and monitor the daemon run as a subprocess."""
+
+  def __init__(
+      self,
+      binary_path: str,
+      daemon_target: callable = default_daemon_target,
+      daemon_args: tuple = (),
+  ):
+    self.binary_path = binary_path
+    self.daemon_target = daemon_target
+    self.daemon_args = daemon_args
+
+    self.pid = os.getpid()
+    self.daemon_process = None
+
+    pid_file_dir = pathlib.Path(tempfile.gettempdir()).joinpath("edit_monitor")
+    pid_file_dir.mkdir(parents=True, exist_ok=True)
+    self.pid_file_path = self._get_pid_file_path(pid_file_dir)
+
+  def start(self):
+    """Writes the pidfile and starts the daemon proces."""
+    try:
+      self._write_pid_to_pidfile()
+      self._start_daemon_process()
+    except Exception as e:
+      logging.exception("Failed to start daemon manager with error %s", e)
+
+  def stop(self):
+    """Stops the daemon process and removes the pidfile."""
+
+    logging.debug("in daemon manager cleanup.")
+    try:
+      if self.daemon_process and self.daemon_process.is_alive():
+        self._terminate_process(self.daemon_process.pid)
+      self._remove_pidfile()
+    except Exception as e:
+      logging.exception("Failed to stop daemon manager with error %s", e)
+
+  def _write_pid_to_pidfile(self):
+    """Creates a pidfile and writes the current pid to the file.
+
+    Raise FileExistsError if the pidfile already exists.
+    """
+    try:
+      # Use the 'x' mode to open the file for exclusive creation
+      with open(self.pid_file_path, "x") as f:
+        f.write(f"{self.pid}")
+    except FileExistsError as e:
+      # This could be caused due to race condition that a user is trying
+      # to start two edit monitors at the same time. Or because there is
+      # already an existing edit monitor running and we can not kill it
+      # for some reason.
+      logging.exception("pidfile %s already exists.", self.pid_file_path)
+      raise e
+
+  def _start_daemon_process(self):
+    """Starts a subprocess to run the daemon."""
+    p = multiprocessing.Process(
+        target=self.daemon_target, args=self.daemon_args
+    )
+    p.start()
+
+    logging.info("Start subprocess with PID %d", p.pid)
+    self.daemon_process = p
+
+  def _terminate_process(
+      self, pid: int, timeout: int = DEFAULT_PROCESS_TERMINATION_TIMEOUT_SECONDS
+  ):
+    """Terminates a process with given pid.
+
+    It first sends a SIGTERM to the process to allow it for proper
+    termination with a timeout. If the process is not terminated within
+    the timeout, kills it forcefully.
+    """
+    try:
+      os.kill(pid, signal.SIGTERM)
+      if not self._wait_for_process_terminate(pid, timeout):
+        logging.warning(
+            "Process %d not terminated within timeout, try force kill", pid
+        )
+        os.kill(pid, signal.SIGKILL)
+    except ProcessLookupError:
+      logging.info("Process with PID %d not found (already terminated)", pid)
+
+  def _wait_for_process_terminate(self, pid: int, timeout: int) -> bool:
+    start_time = time.time()
+
+    while time.time() < start_time + timeout:
+      if not self._is_process_alive(pid):
+        return True
+      time.sleep(1)
+
+    logging.error("Process %d not terminated within %d seconds.", pid, timeout)
+    return False
+
+  def _is_process_alive(self, pid: int) -> bool:
+    try:
+      output = subprocess.check_output(
+          ["ps", "-p", str(pid), "-o", "state="], text=True
+      ).strip()
+      state = output.split()[0]
+      return state != "Z"  # Check if the state is not 'Z' (zombie)
+    except subprocess.CalledProcessError:
+      # Process not found (already dead).
+      return False
+    except (FileNotFoundError, OSError, ValueError) as e:
+      logging.warning(
+          "Unable to check the status for process %d with error: %s.", pid, e
+      )
+      return True
+
+  def _remove_pidfile(self):
+    try:
+      os.remove(self.pid_file_path)
+    except FileNotFoundError:
+      logging.info("pid file %s already removed.", self.pid_file_path)
+
+  def _get_pid_file_path(self, pid_file_dir: pathlib.Path) -> pathlib.Path:
+    """Generates the path to store the pidfile.
+
+    The file path should have the format of "/tmp/edit_monitor/xxxx.lock"
+    where xxxx is a hashed value based on the binary path that starts the
+    process.
+    """
+    hash_object = hashlib.sha256()
+    hash_object.update(self.binary_path.encode("utf-8"))
+    pid_file_path = pid_file_dir.joinpath(hash_object.hexdigest() + ".lock")
+    logging.info("pid_file_path: %s", pid_file_path)
+
+    return pid_file_path
diff --git a/tools/edit_monitor/daemon_manager_test.py b/tools/edit_monitor/daemon_manager_test.py
new file mode 100644
index 0000000..5be4bee
--- /dev/null
+++ b/tools/edit_monitor/daemon_manager_test.py
@@ -0,0 +1,197 @@
+# Copyright 2024, The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Unittests for DaemonManager."""
+
+import logging
+import multiprocessing
+import os
+import pathlib
+import signal
+import subprocess
+import sys
+import tempfile
+import time
+import unittest
+from unittest import mock
+from edit_monitor import daemon_manager
+
+TEST_BINARY_FILE = '/path/to/test_binary'
+TEST_PID_FILE_PATH = (
+    '587239c2d1050afdf54512e2d799f3b929f86b43575eb3c7b4bab105dd9bd25e.lock'
+)
+
+
+def example_daemon(output_file):
+  with open(output_file, 'w') as f:
+    f.write('running daemon target')
+
+
+def long_running_daemon():
+  while True:
+    time.sleep(1)
+
+
+class DaemonManagerTest(unittest.TestCase):
+
+  @classmethod
+  def setUpClass(cls):
+    super().setUpClass()
+    # Configure to print logging to stdout.
+    logging.basicConfig(filename=None, level=logging.DEBUG)
+    console = logging.StreamHandler(sys.stdout)
+    logging.getLogger('').addHandler(console)
+
+  def setUp(self):
+    super().setUp()
+    self.original_tempdir = tempfile.tempdir
+    self.working_dir = tempfile.TemporaryDirectory()
+    # Sets the tempdir under the working dir so any temp files created during
+    # tests will be cleaned.
+    tempfile.tempdir = self.working_dir.name
+
+  def tearDown(self):
+    # Cleans up any child processes left by the tests.
+    self._cleanup_child_processes()
+    self.working_dir.cleanup()
+    # Restores tempdir.
+    tempfile.tempdir = self.original_tempdir
+    super().tearDown()
+
+  def test_start_success(self):
+    damone_output_file = tempfile.NamedTemporaryFile(
+        dir=self.working_dir.name, delete=False
+    )
+    dm = daemon_manager.DaemonManager(
+        TEST_BINARY_FILE,
+        daemon_target=example_daemon,
+        daemon_args=(damone_output_file.name,),
+    )
+    dm.start()
+    dm.daemon_process.join()
+
+    # Verifies the expected pid file is created.
+    expected_pid_file_path = pathlib.Path(self.working_dir.name).joinpath(
+        'edit_monitor', TEST_PID_FILE_PATH
+    )
+    self.assertEqual(dm.pid_file_path, expected_pid_file_path)
+    self.assertTrue(expected_pid_file_path.exists())
+
+    # Verifies the daemon process is executed successfully.
+    with open(damone_output_file.name, 'r') as f:
+      contents = f.read()
+      self.assertEqual(contents, 'running daemon target')
+
+  def test_start_failed_to_write_pidfile(self):
+    pid_file_path_dir = pathlib.Path(self.working_dir.name).joinpath(
+        'edit_monitor'
+    )
+    pid_file_path_dir.mkdir(parents=True, exist_ok=True)
+    # Makes the directory read-only so write pidfile will fail.
+    os.chmod(pid_file_path_dir, 0o555)
+
+    dm = daemon_manager.DaemonManager(TEST_BINARY_FILE)
+    dm.start()
+
+    # Verifies no daemon process is started.
+    self.assertIsNone(dm.daemon_process)
+
+  def test_start_failed_to_start_daemon_process(self):
+    dm = daemon_manager.DaemonManager(
+        TEST_BINARY_FILE, daemon_target='wrong_target', daemon_args=(1)
+    )
+    dm.start()
+
+    # Verifies no daemon process is started.
+    self.assertIsNone(dm.daemon_process)
+
+  def test_stop_success(self):
+    dm = daemon_manager.DaemonManager(
+        TEST_BINARY_FILE, daemon_target=long_running_daemon
+    )
+    dm.start()
+    dm.stop()
+
+    self.assert_no_subprocess_running()
+    self.assertFalse(dm.pid_file_path.exists())
+
+  @mock.patch('os.kill')
+  def test_stop_failed_to_kill_daemon_process(self, mock_kill):
+    mock_kill.side_effect = OSError('Unknown OSError')
+    dm = daemon_manager.DaemonManager(
+        TEST_BINARY_FILE, daemon_target=long_running_daemon
+    )
+    dm.start()
+    dm.stop()
+
+    self.assertTrue(dm.daemon_process.is_alive())
+    self.assertTrue(dm.pid_file_path.exists())
+
+  @mock.patch('os.remove')
+  def test_stop_failed_to_remove_pidfile(self, mock_remove):
+    mock_remove.side_effect = OSError('Unknown OSError')
+
+    dm = daemon_manager.DaemonManager(
+        TEST_BINARY_FILE, daemon_target=long_running_daemon
+    )
+    dm.start()
+    dm.stop()
+
+    self.assert_no_subprocess_running()
+    self.assertTrue(dm.pid_file_path.exists())
+
+  def assert_no_subprocess_running(self):
+    child_pids = self._get_child_processes(os.getpid())
+    for child_pid in child_pids:
+      self.assertFalse(
+          self._is_process_alive(child_pid), f'process {child_pid} still alive'
+      )
+
+  def _get_child_processes(self, parent_pid):
+    try:
+      output = subprocess.check_output(
+          ['ps', '-o', 'pid,ppid', '--no-headers'], text=True
+      )
+
+      child_processes = []
+      for line in output.splitlines():
+        pid, ppid = line.split()
+        if int(ppid) == parent_pid:
+          child_processes.append(int(pid))
+      return child_processes
+    except subprocess.CalledProcessError as e:
+      self.fail(f'failed to get child process, error: {e}')
+
+  def _is_process_alive(self, pid):
+    try:
+      output = subprocess.check_output(
+          ['ps', '-p', str(pid), '-o', 'state='], text=True
+      ).strip()
+      state = output.split()[0]
+      return state != 'Z'  # Check if the state is not 'Z' (zombie)
+    except subprocess.CalledProcessError:
+      return False
+
+  def _cleanup_child_processes(self):
+    child_pids = self._get_child_processes(os.getpid())
+    for child_pid in child_pids:
+      try:
+        os.kill(child_pid, signal.SIGKILL)
+      except ProcessLookupError:
+        # process already terminated
+        pass
+
+
+if __name__ == '__main__':
+  unittest.main()