Report edit monitor error to clearcut
Report unexpected errors to clearcut when the daemon manager detect
anything wrong when start/monitor/stop/reboot the edit monitor process.
Test: atest daemon_manager_test
Bug: 365617369
Change-Id: I2027229c387ce2525d1f730d06483032662ffc4b
diff --git a/tools/edit_monitor/daemon_manager_test.py b/tools/edit_monitor/daemon_manager_test.py
index 72442c6..e132000 100644
--- a/tools/edit_monitor/daemon_manager_test.py
+++ b/tools/edit_monitor/daemon_manager_test.py
@@ -26,6 +26,7 @@
import unittest
from unittest import mock
from edit_monitor import daemon_manager
+from proto import edit_event_pb2
TEST_BINARY_FILE = '/path/to/test_binary'
@@ -133,7 +134,8 @@
def test_start_return_directly_if_in_cog_env(self):
dm = daemon_manager.DaemonManager(
- '/google/cog/cloud/user/workspace/edit_monitor')
+ '/google/cog/cloud/user/workspace/edit_monitor'
+ )
dm.start()
# Verify no daemon process is started.
self.assertIsNone(dm.daemon_process)
@@ -148,9 +150,13 @@
with open(pid_file_path_dir.joinpath(TEST_PID_FILE_PATH), 'w') as f:
f.write('123456')
- with self.assertRaises(OSError) as error:
- dm = daemon_manager.DaemonManager(TEST_BINARY_FILE)
+ fake_cclient = FakeClearcutClient()
+ with self.assertRaises(OSError):
+ dm = daemon_manager.DaemonManager(TEST_BINARY_FILE, cclient=fake_cclient)
dm.start()
+ self._assert_error_event_logged(
+ fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_START_EDIT_MONITOR
+ )
def test_start_failed_to_write_pidfile(self):
pid_file_path_dir = pathlib.Path(self.working_dir.name).joinpath(
@@ -160,40 +166,63 @@
# Makes the directory read-only so write pidfile will fail.
os.chmod(pid_file_path_dir, 0o555)
- with self.assertRaises(PermissionError) as error:
- dm = daemon_manager.DaemonManager(TEST_BINARY_FILE)
+ fake_cclient = FakeClearcutClient()
+ with self.assertRaises(PermissionError):
+ dm = daemon_manager.DaemonManager(TEST_BINARY_FILE, cclient=fake_cclient)
dm.start()
+ self._assert_error_event_logged(
+ fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_START_EDIT_MONITOR
+ )
def test_start_failed_to_start_daemon_process(self):
- with self.assertRaises(TypeError) as error:
+ fake_cclient = FakeClearcutClient()
+ with self.assertRaises(TypeError):
dm = daemon_manager.DaemonManager(
- TEST_BINARY_FILE, daemon_target='wrong_target', daemon_args=(1)
+ TEST_BINARY_FILE,
+ daemon_target='wrong_target',
+ daemon_args=(1),
+ cclient=fake_cclient,
)
dm.start()
+ self._assert_error_event_logged(
+ fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_START_EDIT_MONITOR
+ )
def test_monitor_daemon_subprocess_killed_high_memory_usage(self):
+ fake_cclient = FakeClearcutClient()
dm = daemon_manager.DaemonManager(
TEST_BINARY_FILE,
daemon_target=memory_consume_daemon_target,
daemon_args=(2,),
+ cclient=fake_cclient,
)
dm.start()
dm.monitor_daemon(interval=1, memory_threshold=2)
self.assertTrue(dm.max_memory_usage >= 2)
self.assert_no_subprocess_running()
+ self._assert_error_event_logged(
+ fake_cclient,
+ edit_event_pb2.EditEvent.KILLED_DUE_TO_EXCEEDED_RESOURCE_USAGE,
+ )
def test_monitor_daemon_subprocess_killed_high_cpu_usage(self):
+ fake_cclient = FakeClearcutClient()
dm = daemon_manager.DaemonManager(
TEST_BINARY_FILE,
daemon_target=cpu_consume_daemon_target,
daemon_args=(20,),
+ cclient=fake_cclient,
)
dm.start()
dm.monitor_daemon(interval=1, cpu_threshold=20)
self.assertTrue(dm.max_cpu_usage >= 20)
self.assert_no_subprocess_running()
+ self._assert_error_event_logged(
+ fake_cclient,
+ edit_event_pb2.EditEvent.KILLED_DUE_TO_EXCEEDED_RESOURCE_USAGE,
+ )
@mock.patch('subprocess.check_output')
def test_monitor_daemon_failed_does_not_matter(self, mock_output):
@@ -207,7 +236,8 @@
)
dm = daemon_manager.DaemonManager(
- binary_file.name, daemon_target=long_running_daemon
+ binary_file.name,
+ daemon_target=long_running_daemon,
)
dm.start()
dm.monitor_daemon(reboot_timeout=0.5)
@@ -226,27 +256,42 @@
@mock.patch('os.kill')
def test_stop_failed_to_kill_daemon_process(self, mock_kill):
mock_kill.side_effect = OSError('Unknown OSError')
+ fake_cclient = FakeClearcutClient()
dm = daemon_manager.DaemonManager(
- TEST_BINARY_FILE, daemon_target=long_running_daemon
+ TEST_BINARY_FILE,
+ daemon_target=long_running_daemon,
+ cclient=fake_cclient,
)
- dm.start()
- dm.stop()
- self.assertTrue(dm.daemon_process.is_alive())
- self.assertTrue(dm.pid_file_path.exists())
+ with self.assertRaises(SystemExit):
+ dm.start()
+ dm.stop()
+ self.assertTrue(dm.daemon_process.is_alive())
+ self.assertTrue(dm.pid_file_path.exists())
+ self._assert_error_event_logged(
+ fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_STOP_EDIT_MONITOR
+ )
@mock.patch('os.remove')
def test_stop_failed_to_remove_pidfile(self, mock_remove):
mock_remove.side_effect = OSError('Unknown OSError')
+ fake_cclient = FakeClearcutClient()
dm = daemon_manager.DaemonManager(
- TEST_BINARY_FILE, daemon_target=long_running_daemon
+ TEST_BINARY_FILE,
+ daemon_target=long_running_daemon,
+ cclient=fake_cclient,
)
- dm.start()
- dm.stop()
- self.assert_no_subprocess_running()
- self.assertTrue(dm.pid_file_path.exists())
+ with self.assertRaises(SystemExit):
+ dm.start()
+ dm.stop()
+ self.assert_no_subprocess_running()
+ self.assertTrue(dm.pid_file_path.exists())
+
+ self._assert_error_event_logged(
+ fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_STOP_EDIT_MONITOR
+ )
@mock.patch('os.execv')
def test_reboot_success(self, mock_execv):
@@ -273,7 +318,7 @@
)
dm.start()
- with self.assertRaises(SystemExit) as cm:
+ with self.assertRaises(SystemExit):
dm.reboot()
mock_execv.assert_not_called()
self.assertEqual(cm.exception.code, 0)
@@ -281,18 +326,24 @@
@mock.patch('os.execv')
def test_reboot_failed(self, mock_execv):
mock_execv.side_effect = OSError('Unknown OSError')
+ fake_cclient = FakeClearcutClient()
binary_file = tempfile.NamedTemporaryFile(
dir=self.working_dir.name, delete=False
)
dm = daemon_manager.DaemonManager(
- binary_file.name, daemon_target=long_running_daemon
+ binary_file.name,
+ daemon_target=long_running_daemon,
+ cclient=fake_cclient,
)
dm.start()
- with self.assertRaises(SystemExit) as cm:
+ with self.assertRaises(SystemExit):
dm.reboot()
self.assertEqual(cm.exception.code, 1)
+ self._assert_error_event_logged(
+ fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_REBOOT_EDIT_MONITOR
+ )
def assert_run_simple_daemon_success(self):
damone_output_file = tempfile.NamedTemporaryFile(
@@ -374,6 +425,33 @@
f.write(str(p.pid))
return p
+ def _assert_error_event_logged(self, fake_cclient, error_type):
+ error_events = fake_cclient.get_sent_events()
+ self.assertEquals(len(error_events), 1)
+ self.assertEquals(
+ edit_event_pb2.EditEvent.FromString(
+ error_events[0].source_extension
+ ).edit_monitor_error_event.error_type,
+ error_type,
+ )
+
+
+class FakeClearcutClient:
+
+ def __init__(self):
+ self.pending_log_events = []
+ self.sent_log_event = []
+
+ def log(self, log_event):
+ self.pending_log_events.append(log_event)
+
+ def flush_events(self):
+ self.sent_log_event.extend(self.pending_log_events)
+ self.pending_log_events.clear()
+
+ def get_sent_events(self):
+ return self.sent_log_event + self.pending_log_events
+
if __name__ == '__main__':
unittest.main()