Merge "Support restart in daemon manager" into main am: a891344335
Original change: https://android-review.googlesource.com/c/platform/build/+/3272726
Change-Id: I5329c7bc14890b7f12793f0bf7445bb9655a324a
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
diff --git a/tools/edit_monitor/daemon_manager.py b/tools/edit_monitor/daemon_manager.py
index 79831a7..1876451 100644
--- a/tools/edit_monitor/daemon_manager.py
+++ b/tools/edit_monitor/daemon_manager.py
@@ -20,6 +20,7 @@
import pathlib
import signal
import subprocess
+import sys
import tempfile
import time
@@ -27,7 +28,8 @@
DEFAULT_PROCESS_TERMINATION_TIMEOUT_SECONDS = 1
DEFAULT_MONITOR_INTERVAL_SECONDS = 5
DEFAULT_MEMORY_USAGE_THRESHOLD = 2000
-DEFAULT_CPU_USAGE_THRESHOLD = 10
+DEFAULT_CPU_USAGE_THRESHOLD = 200
+DEFAULT_REBOOT_TIMEOUT_SECONDS = 60 * 60 * 24
def default_daemon_target():
@@ -72,6 +74,7 @@
interval: int = DEFAULT_MONITOR_INTERVAL_SECONDS,
memory_threshold: float = DEFAULT_MEMORY_USAGE_THRESHOLD,
cpu_threshold: float = DEFAULT_CPU_USAGE_THRESHOLD,
+ reboot_timeout: int = DEFAULT_REBOOT_TIMEOUT_SECONDS,
):
"""Monits the daemon process status.
@@ -80,8 +83,10 @@
given thresholds.
"""
logging.info("start monitoring daemon process %d.", self.daemon_process.pid)
-
+ reboot_time = time.time() + reboot_timeout
while self.daemon_process.is_alive():
+ if time.time() > reboot_time:
+ self.reboot()
try:
memory_usage = self._get_process_memory_percent(self.daemon_process.pid)
self.max_memory_usage = max(self.max_memory_usage, memory_usage)
@@ -119,9 +124,32 @@
if self.daemon_process and self.daemon_process.is_alive():
self._terminate_process(self.daemon_process.pid)
self._remove_pidfile()
+ logging.debug("Successfully stopped daemon manager.")
except Exception as e:
logging.exception("Failed to stop daemon manager with error %s", e)
+ def reboot(self):
+ """Reboots the current process.
+
+ Stops the current daemon manager and reboots the entire process based on
+ the binary file. Exits directly If the binary file no longer exists.
+ """
+ logging.debug("Rebooting process based on binary %s.", self.binary_path)
+
+ # Stop the current daemon manager first.
+ self.stop()
+
+ # If the binary no longer exists, exit directly.
+ if not os.path.exists(self.binary_path):
+ logging.info("binary %s no longer exists, exiting.", self.binary_path)
+ sys.exit(0)
+
+ try:
+ os.execv(self.binary_path, sys.argv)
+ except OSError as e:
+ logging.exception("Failed to reboot process with error: %s.", e)
+ sys.exit(1) # Indicate an error occurred
+
def _stop_any_existing_instance(self):
if not self.pid_file_path.exists():
logging.debug("No existing instances.")
diff --git a/tools/edit_monitor/daemon_manager_test.py b/tools/edit_monitor/daemon_manager_test.py
index 0c9e04b..bcfa850 100644
--- a/tools/edit_monitor/daemon_manager_test.py
+++ b/tools/edit_monitor/daemon_manager_test.py
@@ -197,6 +197,19 @@
mock_output.side_effect = OSError('Unknown OSError')
self.assert_run_simple_daemon_success()
+ @mock.patch('os.execv')
+ def test_monitor_daemon_reboot_triggered(self, mock_execv):
+ binary_file = tempfile.NamedTemporaryFile(
+ dir=self.working_dir.name, delete=False
+ )
+
+ dm = daemon_manager.DaemonManager(
+ binary_file.name, daemon_target=long_running_daemon
+ )
+ dm.start()
+ dm.monitor_daemon(reboot_timeout=0.5)
+ mock_execv.assert_called_once()
+
def test_stop_success(self):
dm = daemon_manager.DaemonManager(
TEST_BINARY_FILE, daemon_target=long_running_daemon
@@ -232,6 +245,52 @@
self.assert_no_subprocess_running()
self.assertTrue(dm.pid_file_path.exists())
+ @mock.patch('os.execv')
+ def test_reboot_success(self, mock_execv):
+ binary_file = tempfile.NamedTemporaryFile(
+ dir=self.working_dir.name, delete=False
+ )
+
+ dm = daemon_manager.DaemonManager(
+ binary_file.name, daemon_target=long_running_daemon
+ )
+ dm.start()
+ dm.reboot()
+
+ # Verifies the old process is stopped
+ self.assert_no_subprocess_running()
+ self.assertFalse(dm.pid_file_path.exists())
+
+ mock_execv.assert_called_once()
+
+ @mock.patch('os.execv')
+ def test_reboot_binary_no_longer_exists(self, mock_execv):
+ dm = daemon_manager.DaemonManager(
+ TEST_BINARY_FILE, daemon_target=long_running_daemon
+ )
+ dm.start()
+
+ with self.assertRaises(SystemExit) as cm:
+ dm.reboot()
+ mock_execv.assert_not_called()
+ self.assertEqual(cm.exception.code, 0)
+
+ @mock.patch('os.execv')
+ def test_reboot_failed(self, mock_execv):
+ mock_execv.side_effect = OSError('Unknown OSError')
+ binary_file = tempfile.NamedTemporaryFile(
+ dir=self.working_dir.name, delete=False
+ )
+
+ dm = daemon_manager.DaemonManager(
+ binary_file.name, daemon_target=long_running_daemon
+ )
+ dm.start()
+
+ with self.assertRaises(SystemExit) as cm:
+ dm.reboot()
+ self.assertEqual(cm.exception.code, 1)
+
def assert_run_simple_daemon_success(self):
damone_output_file = tempfile.NamedTemporaryFile(
dir=self.working_dir.name, delete=False