Merge "Support restart in daemon manager" into main am: a891344335

Original change: https://android-review.googlesource.com/c/platform/build/+/3272726

Change-Id: I5329c7bc14890b7f12793f0bf7445bb9655a324a
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
diff --git a/tools/edit_monitor/daemon_manager.py b/tools/edit_monitor/daemon_manager.py
index 79831a7..1876451 100644
--- a/tools/edit_monitor/daemon_manager.py
+++ b/tools/edit_monitor/daemon_manager.py
@@ -20,6 +20,7 @@
 import pathlib
 import signal
 import subprocess
+import sys
 import tempfile
 import time
 
@@ -27,7 +28,8 @@
 DEFAULT_PROCESS_TERMINATION_TIMEOUT_SECONDS = 1
 DEFAULT_MONITOR_INTERVAL_SECONDS = 5
 DEFAULT_MEMORY_USAGE_THRESHOLD = 2000
-DEFAULT_CPU_USAGE_THRESHOLD = 10
+DEFAULT_CPU_USAGE_THRESHOLD = 200
+DEFAULT_REBOOT_TIMEOUT_SECONDS = 60 * 60 * 24
 
 
 def default_daemon_target():
@@ -72,6 +74,7 @@
       interval: int = DEFAULT_MONITOR_INTERVAL_SECONDS,
       memory_threshold: float = DEFAULT_MEMORY_USAGE_THRESHOLD,
       cpu_threshold: float = DEFAULT_CPU_USAGE_THRESHOLD,
+      reboot_timeout: int = DEFAULT_REBOOT_TIMEOUT_SECONDS,
   ):
     """Monits the daemon process status.
 
@@ -80,8 +83,10 @@
     given thresholds.
     """
     logging.info("start monitoring daemon process %d.", self.daemon_process.pid)
-
+    reboot_time = time.time() + reboot_timeout
     while self.daemon_process.is_alive():
+      if time.time() > reboot_time:
+        self.reboot()
       try:
         memory_usage = self._get_process_memory_percent(self.daemon_process.pid)
         self.max_memory_usage = max(self.max_memory_usage, memory_usage)
@@ -119,9 +124,32 @@
       if self.daemon_process and self.daemon_process.is_alive():
         self._terminate_process(self.daemon_process.pid)
       self._remove_pidfile()
+      logging.debug("Successfully stopped daemon manager.")
     except Exception as e:
       logging.exception("Failed to stop daemon manager with error %s", e)
 
+  def reboot(self):
+    """Reboots the current process.
+
+    Stops the current daemon manager and reboots the entire process based on
+    the binary file. Exits directly If the binary file no longer exists.
+    """
+    logging.debug("Rebooting process based on binary %s.", self.binary_path)
+
+    # Stop the current daemon manager first.
+    self.stop()
+
+    # If the binary no longer exists, exit directly.
+    if not os.path.exists(self.binary_path):
+      logging.info("binary %s no longer exists, exiting.", self.binary_path)
+      sys.exit(0)
+
+    try:
+      os.execv(self.binary_path, sys.argv)
+    except OSError as e:
+      logging.exception("Failed to reboot process with error: %s.", e)
+      sys.exit(1)  # Indicate an error occurred
+
   def _stop_any_existing_instance(self):
     if not self.pid_file_path.exists():
       logging.debug("No existing instances.")
diff --git a/tools/edit_monitor/daemon_manager_test.py b/tools/edit_monitor/daemon_manager_test.py
index 0c9e04b..bcfa850 100644
--- a/tools/edit_monitor/daemon_manager_test.py
+++ b/tools/edit_monitor/daemon_manager_test.py
@@ -197,6 +197,19 @@
     mock_output.side_effect = OSError('Unknown OSError')
     self.assert_run_simple_daemon_success()
 
+  @mock.patch('os.execv')
+  def test_monitor_daemon_reboot_triggered(self, mock_execv):
+    binary_file = tempfile.NamedTemporaryFile(
+        dir=self.working_dir.name, delete=False
+    )
+
+    dm = daemon_manager.DaemonManager(
+        binary_file.name, daemon_target=long_running_daemon
+    )
+    dm.start()
+    dm.monitor_daemon(reboot_timeout=0.5)
+    mock_execv.assert_called_once()
+
   def test_stop_success(self):
     dm = daemon_manager.DaemonManager(
         TEST_BINARY_FILE, daemon_target=long_running_daemon
@@ -232,6 +245,52 @@
     self.assert_no_subprocess_running()
     self.assertTrue(dm.pid_file_path.exists())
 
+  @mock.patch('os.execv')
+  def test_reboot_success(self, mock_execv):
+    binary_file = tempfile.NamedTemporaryFile(
+        dir=self.working_dir.name, delete=False
+    )
+
+    dm = daemon_manager.DaemonManager(
+        binary_file.name, daemon_target=long_running_daemon
+    )
+    dm.start()
+    dm.reboot()
+
+    # Verifies the old process is stopped
+    self.assert_no_subprocess_running()
+    self.assertFalse(dm.pid_file_path.exists())
+
+    mock_execv.assert_called_once()
+
+  @mock.patch('os.execv')
+  def test_reboot_binary_no_longer_exists(self, mock_execv):
+    dm = daemon_manager.DaemonManager(
+        TEST_BINARY_FILE, daemon_target=long_running_daemon
+    )
+    dm.start()
+
+    with self.assertRaises(SystemExit) as cm:
+      dm.reboot()
+      mock_execv.assert_not_called()
+      self.assertEqual(cm.exception.code, 0)
+
+  @mock.patch('os.execv')
+  def test_reboot_failed(self, mock_execv):
+    mock_execv.side_effect = OSError('Unknown OSError')
+    binary_file = tempfile.NamedTemporaryFile(
+        dir=self.working_dir.name, delete=False
+    )
+
+    dm = daemon_manager.DaemonManager(
+        binary_file.name, daemon_target=long_running_daemon
+    )
+    dm.start()
+
+    with self.assertRaises(SystemExit) as cm:
+      dm.reboot()
+      self.assertEqual(cm.exception.code, 1)
+
   def assert_run_simple_daemon_success(self):
     damone_output_file = tempfile.NamedTemporaryFile(
         dir=self.working_dir.name, delete=False