blob: 407d94e9b57d3888b91d1fed67b04ec53fdb6806 [file] [log] [blame]
Zhuoyao Zhang53359552024-09-16 23:58:11 +00001# Copyright 2024, The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Unittests for DaemonManager."""
16
17import logging
18import multiprocessing
19import os
20import pathlib
21import signal
22import subprocess
23import sys
24import tempfile
25import time
26import unittest
27from unittest import mock
28from edit_monitor import daemon_manager
Zhuoyao Zhangba64f312024-10-14 20:32:53 +000029from proto import edit_event_pb2
Zhuoyao Zhang53359552024-09-16 23:58:11 +000030
Zhuoyao Zhangd28da5c2024-09-24 19:46:12 +000031
Zhuoyao Zhang53359552024-09-16 23:58:11 +000032TEST_BINARY_FILE = '/path/to/test_binary'
33TEST_PID_FILE_PATH = (
34 '587239c2d1050afdf54512e2d799f3b929f86b43575eb3c7b4bab105dd9bd25e.lock'
35)
36
37
Zhuoyao Zhang4d485592024-09-17 21:14:38 +000038def simple_daemon(output_file):
Zhuoyao Zhang53359552024-09-16 23:58:11 +000039 with open(output_file, 'w') as f:
40 f.write('running daemon target')
41
42
43def long_running_daemon():
44 while True:
45 time.sleep(1)
46
47
Zhuoyao Zhangdc2840d2024-09-19 23:29:27 +000048def memory_consume_daemon_target(size_mb):
49 try:
50 size_bytes = size_mb * 1024 * 1024
51 dummy_data = bytearray(size_bytes)
52 time.sleep(10)
53 except MemoryError:
54 print(f'Process failed to allocate {size_mb} MB of memory.')
55
56
57def cpu_consume_daemon_target(target_usage_percent):
58 while True:
59 start_time = time.time()
60 while time.time() - start_time < target_usage_percent / 100:
61 pass # Busy loop to consume CPU
62
63 # Sleep to reduce CPU usage
64 time.sleep(1 - target_usage_percent / 100)
65
66
Zhuoyao Zhang53359552024-09-16 23:58:11 +000067class DaemonManagerTest(unittest.TestCase):
68
69 @classmethod
70 def setUpClass(cls):
71 super().setUpClass()
72 # Configure to print logging to stdout.
73 logging.basicConfig(filename=None, level=logging.DEBUG)
74 console = logging.StreamHandler(sys.stdout)
75 logging.getLogger('').addHandler(console)
76
77 def setUp(self):
78 super().setUp()
79 self.original_tempdir = tempfile.tempdir
80 self.working_dir = tempfile.TemporaryDirectory()
81 # Sets the tempdir under the working dir so any temp files created during
82 # tests will be cleaned.
83 tempfile.tempdir = self.working_dir.name
Zhuoyao Zhang3ca7cef2024-10-31 22:07:31 +000084 self.patch = mock.patch.dict(os.environ, {'ENABLE_EDIT_MONITOR': 'true'})
85 self.patch.start()
Zhuoyao Zhang53359552024-09-16 23:58:11 +000086
87 def tearDown(self):
88 # Cleans up any child processes left by the tests.
89 self._cleanup_child_processes()
90 self.working_dir.cleanup()
91 # Restores tempdir.
92 tempfile.tempdir = self.original_tempdir
Zhuoyao Zhang3ca7cef2024-10-31 22:07:31 +000093 self.patch.stop()
Zhuoyao Zhang53359552024-09-16 23:58:11 +000094 super().tearDown()
95
Zhuoyao Zhang4d485592024-09-17 21:14:38 +000096 def test_start_success_with_no_existing_instance(self):
97 self.assert_run_simple_daemon_success()
98
99 def test_start_success_with_existing_instance_running(self):
Zhuoyao Zhangd28da5c2024-09-24 19:46:12 +0000100 # Create a running daemon subprocess
101 p = self._create_fake_deamon_process()
Zhuoyao Zhang4d485592024-09-17 21:14:38 +0000102
103 self.assert_run_simple_daemon_success()
Zhuoyao Zhang4d485592024-09-17 21:14:38 +0000104
105 def test_start_success_with_existing_instance_already_dead(self):
106 # Create a pidfile with pid that does not exist.
107 pid_file_path_dir = pathlib.Path(self.working_dir.name).joinpath(
108 'edit_monitor'
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000109 )
Zhuoyao Zhang4d485592024-09-17 21:14:38 +0000110 pid_file_path_dir.mkdir(parents=True, exist_ok=True)
111 with open(pid_file_path_dir.joinpath(TEST_PID_FILE_PATH), 'w') as f:
112 f.write('123456')
113
114 self.assert_run_simple_daemon_success()
115
116 def test_start_success_with_existing_instance_from_different_binary(self):
117 # First start an instance based on "some_binary_path"
118 existing_dm = daemon_manager.DaemonManager(
Zhuoyao Zhangdc2840d2024-09-19 23:29:27 +0000119 'some_binary_path',
Zhuoyao Zhang4d485592024-09-17 21:14:38 +0000120 daemon_target=long_running_daemon,
121 )
122 existing_dm.start()
123
124 self.assert_run_simple_daemon_success()
125 existing_dm.stop()
126
Zhuoyao Zhangd28da5c2024-09-24 19:46:12 +0000127 def test_start_return_directly_if_block_sign_exists(self):
128 # Creates the block sign.
129 pathlib.Path(self.working_dir.name).joinpath(
130 daemon_manager.BLOCK_SIGN_FILE
131 ).touch()
132
133 dm = daemon_manager.DaemonManager(TEST_BINARY_FILE)
134 dm.start()
Zhuoyao Zhang3ca7cef2024-10-31 22:07:31 +0000135
136 # Verify no daemon process is started.
137 self.assertIsNone(dm.daemon_process)
138
139 @mock.patch.dict(os.environ, {'ENABLE_EDIT_MONITOR': 'false'}, clear=True)
140 def test_start_return_directly_if_disabled(self):
141 dm = daemon_manager.DaemonManager(TEST_BINARY_FILE)
142 dm.start()
143
Zhuoyao Zhangd28da5c2024-09-24 19:46:12 +0000144 # Verify no daemon process is started.
145 self.assertIsNone(dm.daemon_process)
146
Zhuoyao Zhang05e28fa2024-10-04 21:58:39 +0000147 def test_start_return_directly_if_in_cog_env(self):
148 dm = daemon_manager.DaemonManager(
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000149 '/google/cog/cloud/user/workspace/edit_monitor'
150 )
Zhuoyao Zhang05e28fa2024-10-04 21:58:39 +0000151 dm.start()
Zhuoyao Zhang3ca7cef2024-10-31 22:07:31 +0000152
Zhuoyao Zhang05e28fa2024-10-04 21:58:39 +0000153 # Verify no daemon process is started.
154 self.assertIsNone(dm.daemon_process)
155
Zhuoyao Zhang4d485592024-09-17 21:14:38 +0000156 @mock.patch('os.kill')
157 def test_start_failed_to_kill_existing_instance(self, mock_kill):
158 mock_kill.side_effect = OSError('Unknown OSError')
159 pid_file_path_dir = pathlib.Path(self.working_dir.name).joinpath(
160 'edit_monitor'
161 )
162 pid_file_path_dir.mkdir(parents=True, exist_ok=True)
163 with open(pid_file_path_dir.joinpath(TEST_PID_FILE_PATH), 'w') as f:
164 f.write('123456')
165
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000166 fake_cclient = FakeClearcutClient()
167 with self.assertRaises(OSError):
168 dm = daemon_manager.DaemonManager(TEST_BINARY_FILE, cclient=fake_cclient)
Zhuoyao Zhangd28da5c2024-09-24 19:46:12 +0000169 dm.start()
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000170 self._assert_error_event_logged(
171 fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_START_EDIT_MONITOR
172 )
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000173
174 def test_start_failed_to_write_pidfile(self):
175 pid_file_path_dir = pathlib.Path(self.working_dir.name).joinpath(
176 'edit_monitor'
177 )
178 pid_file_path_dir.mkdir(parents=True, exist_ok=True)
179 # Makes the directory read-only so write pidfile will fail.
180 os.chmod(pid_file_path_dir, 0o555)
181
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000182 fake_cclient = FakeClearcutClient()
183 with self.assertRaises(PermissionError):
184 dm = daemon_manager.DaemonManager(TEST_BINARY_FILE, cclient=fake_cclient)
Zhuoyao Zhangd28da5c2024-09-24 19:46:12 +0000185 dm.start()
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000186 self._assert_error_event_logged(
187 fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_START_EDIT_MONITOR
188 )
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000189
190 def test_start_failed_to_start_daemon_process(self):
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000191 fake_cclient = FakeClearcutClient()
192 with self.assertRaises(TypeError):
Zhuoyao Zhangd28da5c2024-09-24 19:46:12 +0000193 dm = daemon_manager.DaemonManager(
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000194 TEST_BINARY_FILE,
195 daemon_target='wrong_target',
196 daemon_args=(1),
197 cclient=fake_cclient,
Zhuoyao Zhangd28da5c2024-09-24 19:46:12 +0000198 )
199 dm.start()
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000200 self._assert_error_event_logged(
201 fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_START_EDIT_MONITOR
202 )
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000203
Zhuoyao Zhangdc2840d2024-09-19 23:29:27 +0000204 def test_monitor_daemon_subprocess_killed_high_memory_usage(self):
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000205 fake_cclient = FakeClearcutClient()
Zhuoyao Zhangdc2840d2024-09-19 23:29:27 +0000206 dm = daemon_manager.DaemonManager(
207 TEST_BINARY_FILE,
208 daemon_target=memory_consume_daemon_target,
209 daemon_args=(2,),
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000210 cclient=fake_cclient,
Zhuoyao Zhangdc2840d2024-09-19 23:29:27 +0000211 )
212 dm.start()
213 dm.monitor_daemon(interval=1, memory_threshold=2)
214
215 self.assertTrue(dm.max_memory_usage >= 2)
216 self.assert_no_subprocess_running()
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000217 self._assert_error_event_logged(
218 fake_cclient,
219 edit_event_pb2.EditEvent.KILLED_DUE_TO_EXCEEDED_RESOURCE_USAGE,
220 )
Zhuoyao Zhangdc2840d2024-09-19 23:29:27 +0000221
222 def test_monitor_daemon_subprocess_killed_high_cpu_usage(self):
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000223 fake_cclient = FakeClearcutClient()
Zhuoyao Zhangdc2840d2024-09-19 23:29:27 +0000224 dm = daemon_manager.DaemonManager(
225 TEST_BINARY_FILE,
226 daemon_target=cpu_consume_daemon_target,
227 daemon_args=(20,),
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000228 cclient=fake_cclient,
Zhuoyao Zhangdc2840d2024-09-19 23:29:27 +0000229 )
230 dm.start()
231 dm.monitor_daemon(interval=1, cpu_threshold=20)
232
233 self.assertTrue(dm.max_cpu_usage >= 20)
234 self.assert_no_subprocess_running()
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000235 self._assert_error_event_logged(
236 fake_cclient,
237 edit_event_pb2.EditEvent.KILLED_DUE_TO_EXCEEDED_RESOURCE_USAGE,
238 )
Zhuoyao Zhangdc2840d2024-09-19 23:29:27 +0000239
240 @mock.patch('subprocess.check_output')
241 def test_monitor_daemon_failed_does_not_matter(self, mock_output):
242 mock_output.side_effect = OSError('Unknown OSError')
243 self.assert_run_simple_daemon_success()
244
Zhuoyao Zhang205a2fc2024-09-20 18:19:59 +0000245 @mock.patch('os.execv')
246 def test_monitor_daemon_reboot_triggered(self, mock_execv):
247 binary_file = tempfile.NamedTemporaryFile(
248 dir=self.working_dir.name, delete=False
249 )
250
251 dm = daemon_manager.DaemonManager(
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000252 binary_file.name,
253 daemon_target=long_running_daemon,
Zhuoyao Zhang205a2fc2024-09-20 18:19:59 +0000254 )
255 dm.start()
256 dm.monitor_daemon(reboot_timeout=0.5)
257 mock_execv.assert_called_once()
258
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000259 def test_stop_success(self):
260 dm = daemon_manager.DaemonManager(
261 TEST_BINARY_FILE, daemon_target=long_running_daemon
262 )
263 dm.start()
264 dm.stop()
265
266 self.assert_no_subprocess_running()
267 self.assertFalse(dm.pid_file_path.exists())
268
269 @mock.patch('os.kill')
270 def test_stop_failed_to_kill_daemon_process(self, mock_kill):
271 mock_kill.side_effect = OSError('Unknown OSError')
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000272 fake_cclient = FakeClearcutClient()
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000273 dm = daemon_manager.DaemonManager(
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000274 TEST_BINARY_FILE,
275 daemon_target=long_running_daemon,
276 cclient=fake_cclient,
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000277 )
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000278
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000279 with self.assertRaises(SystemExit):
280 dm.start()
281 dm.stop()
282 self.assertTrue(dm.daemon_process.is_alive())
283 self.assertTrue(dm.pid_file_path.exists())
284 self._assert_error_event_logged(
285 fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_STOP_EDIT_MONITOR
286 )
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000287
288 @mock.patch('os.remove')
289 def test_stop_failed_to_remove_pidfile(self, mock_remove):
290 mock_remove.side_effect = OSError('Unknown OSError')
291
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000292 fake_cclient = FakeClearcutClient()
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000293 dm = daemon_manager.DaemonManager(
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000294 TEST_BINARY_FILE,
295 daemon_target=long_running_daemon,
296 cclient=fake_cclient,
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000297 )
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000298
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000299 with self.assertRaises(SystemExit):
300 dm.start()
301 dm.stop()
302 self.assert_no_subprocess_running()
303 self.assertTrue(dm.pid_file_path.exists())
304
305 self._assert_error_event_logged(
306 fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_STOP_EDIT_MONITOR
307 )
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000308
Zhuoyao Zhang205a2fc2024-09-20 18:19:59 +0000309 @mock.patch('os.execv')
310 def test_reboot_success(self, mock_execv):
311 binary_file = tempfile.NamedTemporaryFile(
312 dir=self.working_dir.name, delete=False
313 )
314
315 dm = daemon_manager.DaemonManager(
316 binary_file.name, daemon_target=long_running_daemon
317 )
318 dm.start()
319 dm.reboot()
320
321 # Verifies the old process is stopped
322 self.assert_no_subprocess_running()
323 self.assertFalse(dm.pid_file_path.exists())
324
325 mock_execv.assert_called_once()
326
327 @mock.patch('os.execv')
328 def test_reboot_binary_no_longer_exists(self, mock_execv):
329 dm = daemon_manager.DaemonManager(
330 TEST_BINARY_FILE, daemon_target=long_running_daemon
331 )
332 dm.start()
333
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000334 with self.assertRaises(SystemExit):
Zhuoyao Zhang205a2fc2024-09-20 18:19:59 +0000335 dm.reboot()
336 mock_execv.assert_not_called()
337 self.assertEqual(cm.exception.code, 0)
338
339 @mock.patch('os.execv')
340 def test_reboot_failed(self, mock_execv):
341 mock_execv.side_effect = OSError('Unknown OSError')
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000342 fake_cclient = FakeClearcutClient()
Zhuoyao Zhang205a2fc2024-09-20 18:19:59 +0000343 binary_file = tempfile.NamedTemporaryFile(
344 dir=self.working_dir.name, delete=False
345 )
346
347 dm = daemon_manager.DaemonManager(
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000348 binary_file.name,
349 daemon_target=long_running_daemon,
350 cclient=fake_cclient,
Zhuoyao Zhang205a2fc2024-09-20 18:19:59 +0000351 )
352 dm.start()
353
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000354 with self.assertRaises(SystemExit):
Zhuoyao Zhang205a2fc2024-09-20 18:19:59 +0000355 dm.reboot()
356 self.assertEqual(cm.exception.code, 1)
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000357 self._assert_error_event_logged(
358 fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_REBOOT_EDIT_MONITOR
359 )
Zhuoyao Zhang205a2fc2024-09-20 18:19:59 +0000360
Zhuoyao Zhang4d485592024-09-17 21:14:38 +0000361 def assert_run_simple_daemon_success(self):
362 damone_output_file = tempfile.NamedTemporaryFile(
363 dir=self.working_dir.name, delete=False
364 )
365 dm = daemon_manager.DaemonManager(
366 TEST_BINARY_FILE,
367 daemon_target=simple_daemon,
368 daemon_args=(damone_output_file.name,),
369 )
370 dm.start()
Zhuoyao Zhangdc2840d2024-09-19 23:29:27 +0000371 dm.monitor_daemon(interval=1)
Zhuoyao Zhang4d485592024-09-17 21:14:38 +0000372
373 # Verifies the expected pid file is created.
374 expected_pid_file_path = pathlib.Path(self.working_dir.name).joinpath(
375 'edit_monitor', TEST_PID_FILE_PATH
376 )
377 self.assertTrue(expected_pid_file_path.exists())
378
379 # Verify the daemon process is executed successfully.
380 with open(damone_output_file.name, 'r') as f:
381 contents = f.read()
382 self.assertEqual(contents, 'running daemon target')
383
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000384 def assert_no_subprocess_running(self):
385 child_pids = self._get_child_processes(os.getpid())
386 for child_pid in child_pids:
387 self.assertFalse(
388 self._is_process_alive(child_pid), f'process {child_pid} still alive'
389 )
390
Zhuoyao Zhangd28da5c2024-09-24 19:46:12 +0000391 def _get_child_processes(self, parent_pid: int) -> list[int]:
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000392 try:
393 output = subprocess.check_output(
394 ['ps', '-o', 'pid,ppid', '--no-headers'], text=True
395 )
396
397 child_processes = []
398 for line in output.splitlines():
399 pid, ppid = line.split()
400 if int(ppid) == parent_pid:
401 child_processes.append(int(pid))
402 return child_processes
403 except subprocess.CalledProcessError as e:
404 self.fail(f'failed to get child process, error: {e}')
405
Zhuoyao Zhangd28da5c2024-09-24 19:46:12 +0000406 def _is_process_alive(self, pid: int) -> bool:
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000407 try:
408 output = subprocess.check_output(
409 ['ps', '-p', str(pid), '-o', 'state='], text=True
410 ).strip()
411 state = output.split()[0]
412 return state != 'Z' # Check if the state is not 'Z' (zombie)
413 except subprocess.CalledProcessError:
414 return False
415
416 def _cleanup_child_processes(self):
417 child_pids = self._get_child_processes(os.getpid())
418 for child_pid in child_pids:
419 try:
420 os.kill(child_pid, signal.SIGKILL)
421 except ProcessLookupError:
422 # process already terminated
423 pass
424
Zhuoyao Zhangd28da5c2024-09-24 19:46:12 +0000425 def _create_fake_deamon_process(
426 self, name: str = ''
427 ) -> multiprocessing.Process:
428 # Create a long running subprocess
429 p = multiprocessing.Process(target=long_running_daemon)
430 p.start()
431
432 # Create the pidfile with the subprocess pid
433 pid_file_path_dir = pathlib.Path(self.working_dir.name).joinpath(
434 'edit_monitor'
435 )
436 pid_file_path_dir.mkdir(parents=True, exist_ok=True)
437 with open(pid_file_path_dir.joinpath(name + 'pid.lock'), 'w') as f:
438 f.write(str(p.pid))
439 return p
440
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000441 def _assert_error_event_logged(self, fake_cclient, error_type):
442 error_events = fake_cclient.get_sent_events()
443 self.assertEquals(len(error_events), 1)
444 self.assertEquals(
445 edit_event_pb2.EditEvent.FromString(
446 error_events[0].source_extension
447 ).edit_monitor_error_event.error_type,
448 error_type,
449 )
450
451
452class FakeClearcutClient:
453
454 def __init__(self):
455 self.pending_log_events = []
456 self.sent_log_event = []
457
458 def log(self, log_event):
459 self.pending_log_events.append(log_event)
460
461 def flush_events(self):
462 self.sent_log_event.extend(self.pending_log_events)
463 self.pending_log_events.clear()
464
465 def get_sent_events(self):
466 return self.sent_log_event + self.pending_log_events
467
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000468
469if __name__ == '__main__':
470 unittest.main()