blob: e132000bcecc89fef1a538543e63f15a475e9a2d [file] [log] [blame]
Zhuoyao Zhang53359552024-09-16 23:58:11 +00001# Copyright 2024, The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Unittests for DaemonManager."""
16
17import logging
18import multiprocessing
19import os
20import pathlib
21import signal
22import subprocess
23import sys
24import tempfile
25import time
26import unittest
27from unittest import mock
28from edit_monitor import daemon_manager
Zhuoyao Zhangba64f312024-10-14 20:32:53 +000029from proto import edit_event_pb2
Zhuoyao Zhang53359552024-09-16 23:58:11 +000030
Zhuoyao Zhangd28da5c2024-09-24 19:46:12 +000031
Zhuoyao Zhang53359552024-09-16 23:58:11 +000032TEST_BINARY_FILE = '/path/to/test_binary'
33TEST_PID_FILE_PATH = (
34 '587239c2d1050afdf54512e2d799f3b929f86b43575eb3c7b4bab105dd9bd25e.lock'
35)
36
37
Zhuoyao Zhang4d485592024-09-17 21:14:38 +000038def simple_daemon(output_file):
Zhuoyao Zhang53359552024-09-16 23:58:11 +000039 with open(output_file, 'w') as f:
40 f.write('running daemon target')
41
42
43def long_running_daemon():
44 while True:
45 time.sleep(1)
46
47
Zhuoyao Zhangdc2840d2024-09-19 23:29:27 +000048def memory_consume_daemon_target(size_mb):
49 try:
50 size_bytes = size_mb * 1024 * 1024
51 dummy_data = bytearray(size_bytes)
52 time.sleep(10)
53 except MemoryError:
54 print(f'Process failed to allocate {size_mb} MB of memory.')
55
56
57def cpu_consume_daemon_target(target_usage_percent):
58 while True:
59 start_time = time.time()
60 while time.time() - start_time < target_usage_percent / 100:
61 pass # Busy loop to consume CPU
62
63 # Sleep to reduce CPU usage
64 time.sleep(1 - target_usage_percent / 100)
65
66
Zhuoyao Zhang53359552024-09-16 23:58:11 +000067class DaemonManagerTest(unittest.TestCase):
68
69 @classmethod
70 def setUpClass(cls):
71 super().setUpClass()
72 # Configure to print logging to stdout.
73 logging.basicConfig(filename=None, level=logging.DEBUG)
74 console = logging.StreamHandler(sys.stdout)
75 logging.getLogger('').addHandler(console)
76
77 def setUp(self):
78 super().setUp()
79 self.original_tempdir = tempfile.tempdir
80 self.working_dir = tempfile.TemporaryDirectory()
81 # Sets the tempdir under the working dir so any temp files created during
82 # tests will be cleaned.
83 tempfile.tempdir = self.working_dir.name
84
85 def tearDown(self):
86 # Cleans up any child processes left by the tests.
87 self._cleanup_child_processes()
88 self.working_dir.cleanup()
89 # Restores tempdir.
90 tempfile.tempdir = self.original_tempdir
91 super().tearDown()
92
Zhuoyao Zhang4d485592024-09-17 21:14:38 +000093 def test_start_success_with_no_existing_instance(self):
94 self.assert_run_simple_daemon_success()
95
96 def test_start_success_with_existing_instance_running(self):
Zhuoyao Zhangd28da5c2024-09-24 19:46:12 +000097 # Create a running daemon subprocess
98 p = self._create_fake_deamon_process()
Zhuoyao Zhang4d485592024-09-17 21:14:38 +000099
100 self.assert_run_simple_daemon_success()
Zhuoyao Zhang4d485592024-09-17 21:14:38 +0000101
102 def test_start_success_with_existing_instance_already_dead(self):
103 # Create a pidfile with pid that does not exist.
104 pid_file_path_dir = pathlib.Path(self.working_dir.name).joinpath(
105 'edit_monitor'
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000106 )
Zhuoyao Zhang4d485592024-09-17 21:14:38 +0000107 pid_file_path_dir.mkdir(parents=True, exist_ok=True)
108 with open(pid_file_path_dir.joinpath(TEST_PID_FILE_PATH), 'w') as f:
109 f.write('123456')
110
111 self.assert_run_simple_daemon_success()
112
113 def test_start_success_with_existing_instance_from_different_binary(self):
114 # First start an instance based on "some_binary_path"
115 existing_dm = daemon_manager.DaemonManager(
Zhuoyao Zhangdc2840d2024-09-19 23:29:27 +0000116 'some_binary_path',
Zhuoyao Zhang4d485592024-09-17 21:14:38 +0000117 daemon_target=long_running_daemon,
118 )
119 existing_dm.start()
120
121 self.assert_run_simple_daemon_success()
122 existing_dm.stop()
123
Zhuoyao Zhangd28da5c2024-09-24 19:46:12 +0000124 def test_start_return_directly_if_block_sign_exists(self):
125 # Creates the block sign.
126 pathlib.Path(self.working_dir.name).joinpath(
127 daemon_manager.BLOCK_SIGN_FILE
128 ).touch()
129
130 dm = daemon_manager.DaemonManager(TEST_BINARY_FILE)
131 dm.start()
132 # Verify no daemon process is started.
133 self.assertIsNone(dm.daemon_process)
134
Zhuoyao Zhang05e28fa2024-10-04 21:58:39 +0000135 def test_start_return_directly_if_in_cog_env(self):
136 dm = daemon_manager.DaemonManager(
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000137 '/google/cog/cloud/user/workspace/edit_monitor'
138 )
Zhuoyao Zhang05e28fa2024-10-04 21:58:39 +0000139 dm.start()
140 # Verify no daemon process is started.
141 self.assertIsNone(dm.daemon_process)
142
Zhuoyao Zhang4d485592024-09-17 21:14:38 +0000143 @mock.patch('os.kill')
144 def test_start_failed_to_kill_existing_instance(self, mock_kill):
145 mock_kill.side_effect = OSError('Unknown OSError')
146 pid_file_path_dir = pathlib.Path(self.working_dir.name).joinpath(
147 'edit_monitor'
148 )
149 pid_file_path_dir.mkdir(parents=True, exist_ok=True)
150 with open(pid_file_path_dir.joinpath(TEST_PID_FILE_PATH), 'w') as f:
151 f.write('123456')
152
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000153 fake_cclient = FakeClearcutClient()
154 with self.assertRaises(OSError):
155 dm = daemon_manager.DaemonManager(TEST_BINARY_FILE, cclient=fake_cclient)
Zhuoyao Zhangd28da5c2024-09-24 19:46:12 +0000156 dm.start()
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000157 self._assert_error_event_logged(
158 fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_START_EDIT_MONITOR
159 )
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000160
161 def test_start_failed_to_write_pidfile(self):
162 pid_file_path_dir = pathlib.Path(self.working_dir.name).joinpath(
163 'edit_monitor'
164 )
165 pid_file_path_dir.mkdir(parents=True, exist_ok=True)
166 # Makes the directory read-only so write pidfile will fail.
167 os.chmod(pid_file_path_dir, 0o555)
168
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000169 fake_cclient = FakeClearcutClient()
170 with self.assertRaises(PermissionError):
171 dm = daemon_manager.DaemonManager(TEST_BINARY_FILE, cclient=fake_cclient)
Zhuoyao Zhangd28da5c2024-09-24 19:46:12 +0000172 dm.start()
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000173 self._assert_error_event_logged(
174 fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_START_EDIT_MONITOR
175 )
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000176
177 def test_start_failed_to_start_daemon_process(self):
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000178 fake_cclient = FakeClearcutClient()
179 with self.assertRaises(TypeError):
Zhuoyao Zhangd28da5c2024-09-24 19:46:12 +0000180 dm = daemon_manager.DaemonManager(
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000181 TEST_BINARY_FILE,
182 daemon_target='wrong_target',
183 daemon_args=(1),
184 cclient=fake_cclient,
Zhuoyao Zhangd28da5c2024-09-24 19:46:12 +0000185 )
186 dm.start()
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000187 self._assert_error_event_logged(
188 fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_START_EDIT_MONITOR
189 )
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000190
Zhuoyao Zhangdc2840d2024-09-19 23:29:27 +0000191 def test_monitor_daemon_subprocess_killed_high_memory_usage(self):
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000192 fake_cclient = FakeClearcutClient()
Zhuoyao Zhangdc2840d2024-09-19 23:29:27 +0000193 dm = daemon_manager.DaemonManager(
194 TEST_BINARY_FILE,
195 daemon_target=memory_consume_daemon_target,
196 daemon_args=(2,),
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000197 cclient=fake_cclient,
Zhuoyao Zhangdc2840d2024-09-19 23:29:27 +0000198 )
199 dm.start()
200 dm.monitor_daemon(interval=1, memory_threshold=2)
201
202 self.assertTrue(dm.max_memory_usage >= 2)
203 self.assert_no_subprocess_running()
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000204 self._assert_error_event_logged(
205 fake_cclient,
206 edit_event_pb2.EditEvent.KILLED_DUE_TO_EXCEEDED_RESOURCE_USAGE,
207 )
Zhuoyao Zhangdc2840d2024-09-19 23:29:27 +0000208
209 def test_monitor_daemon_subprocess_killed_high_cpu_usage(self):
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000210 fake_cclient = FakeClearcutClient()
Zhuoyao Zhangdc2840d2024-09-19 23:29:27 +0000211 dm = daemon_manager.DaemonManager(
212 TEST_BINARY_FILE,
213 daemon_target=cpu_consume_daemon_target,
214 daemon_args=(20,),
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000215 cclient=fake_cclient,
Zhuoyao Zhangdc2840d2024-09-19 23:29:27 +0000216 )
217 dm.start()
218 dm.monitor_daemon(interval=1, cpu_threshold=20)
219
220 self.assertTrue(dm.max_cpu_usage >= 20)
221 self.assert_no_subprocess_running()
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000222 self._assert_error_event_logged(
223 fake_cclient,
224 edit_event_pb2.EditEvent.KILLED_DUE_TO_EXCEEDED_RESOURCE_USAGE,
225 )
Zhuoyao Zhangdc2840d2024-09-19 23:29:27 +0000226
227 @mock.patch('subprocess.check_output')
228 def test_monitor_daemon_failed_does_not_matter(self, mock_output):
229 mock_output.side_effect = OSError('Unknown OSError')
230 self.assert_run_simple_daemon_success()
231
Zhuoyao Zhang205a2fc2024-09-20 18:19:59 +0000232 @mock.patch('os.execv')
233 def test_monitor_daemon_reboot_triggered(self, mock_execv):
234 binary_file = tempfile.NamedTemporaryFile(
235 dir=self.working_dir.name, delete=False
236 )
237
238 dm = daemon_manager.DaemonManager(
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000239 binary_file.name,
240 daemon_target=long_running_daemon,
Zhuoyao Zhang205a2fc2024-09-20 18:19:59 +0000241 )
242 dm.start()
243 dm.monitor_daemon(reboot_timeout=0.5)
244 mock_execv.assert_called_once()
245
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000246 def test_stop_success(self):
247 dm = daemon_manager.DaemonManager(
248 TEST_BINARY_FILE, daemon_target=long_running_daemon
249 )
250 dm.start()
251 dm.stop()
252
253 self.assert_no_subprocess_running()
254 self.assertFalse(dm.pid_file_path.exists())
255
256 @mock.patch('os.kill')
257 def test_stop_failed_to_kill_daemon_process(self, mock_kill):
258 mock_kill.side_effect = OSError('Unknown OSError')
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000259 fake_cclient = FakeClearcutClient()
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000260 dm = daemon_manager.DaemonManager(
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000261 TEST_BINARY_FILE,
262 daemon_target=long_running_daemon,
263 cclient=fake_cclient,
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000264 )
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000265
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000266 with self.assertRaises(SystemExit):
267 dm.start()
268 dm.stop()
269 self.assertTrue(dm.daemon_process.is_alive())
270 self.assertTrue(dm.pid_file_path.exists())
271 self._assert_error_event_logged(
272 fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_STOP_EDIT_MONITOR
273 )
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000274
275 @mock.patch('os.remove')
276 def test_stop_failed_to_remove_pidfile(self, mock_remove):
277 mock_remove.side_effect = OSError('Unknown OSError')
278
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000279 fake_cclient = FakeClearcutClient()
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000280 dm = daemon_manager.DaemonManager(
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000281 TEST_BINARY_FILE,
282 daemon_target=long_running_daemon,
283 cclient=fake_cclient,
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000284 )
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000285
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000286 with self.assertRaises(SystemExit):
287 dm.start()
288 dm.stop()
289 self.assert_no_subprocess_running()
290 self.assertTrue(dm.pid_file_path.exists())
291
292 self._assert_error_event_logged(
293 fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_STOP_EDIT_MONITOR
294 )
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000295
Zhuoyao Zhang205a2fc2024-09-20 18:19:59 +0000296 @mock.patch('os.execv')
297 def test_reboot_success(self, mock_execv):
298 binary_file = tempfile.NamedTemporaryFile(
299 dir=self.working_dir.name, delete=False
300 )
301
302 dm = daemon_manager.DaemonManager(
303 binary_file.name, daemon_target=long_running_daemon
304 )
305 dm.start()
306 dm.reboot()
307
308 # Verifies the old process is stopped
309 self.assert_no_subprocess_running()
310 self.assertFalse(dm.pid_file_path.exists())
311
312 mock_execv.assert_called_once()
313
314 @mock.patch('os.execv')
315 def test_reboot_binary_no_longer_exists(self, mock_execv):
316 dm = daemon_manager.DaemonManager(
317 TEST_BINARY_FILE, daemon_target=long_running_daemon
318 )
319 dm.start()
320
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000321 with self.assertRaises(SystemExit):
Zhuoyao Zhang205a2fc2024-09-20 18:19:59 +0000322 dm.reboot()
323 mock_execv.assert_not_called()
324 self.assertEqual(cm.exception.code, 0)
325
326 @mock.patch('os.execv')
327 def test_reboot_failed(self, mock_execv):
328 mock_execv.side_effect = OSError('Unknown OSError')
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000329 fake_cclient = FakeClearcutClient()
Zhuoyao Zhang205a2fc2024-09-20 18:19:59 +0000330 binary_file = tempfile.NamedTemporaryFile(
331 dir=self.working_dir.name, delete=False
332 )
333
334 dm = daemon_manager.DaemonManager(
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000335 binary_file.name,
336 daemon_target=long_running_daemon,
337 cclient=fake_cclient,
Zhuoyao Zhang205a2fc2024-09-20 18:19:59 +0000338 )
339 dm.start()
340
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000341 with self.assertRaises(SystemExit):
Zhuoyao Zhang205a2fc2024-09-20 18:19:59 +0000342 dm.reboot()
343 self.assertEqual(cm.exception.code, 1)
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000344 self._assert_error_event_logged(
345 fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_REBOOT_EDIT_MONITOR
346 )
Zhuoyao Zhang205a2fc2024-09-20 18:19:59 +0000347
Zhuoyao Zhang4d485592024-09-17 21:14:38 +0000348 def assert_run_simple_daemon_success(self):
349 damone_output_file = tempfile.NamedTemporaryFile(
350 dir=self.working_dir.name, delete=False
351 )
352 dm = daemon_manager.DaemonManager(
353 TEST_BINARY_FILE,
354 daemon_target=simple_daemon,
355 daemon_args=(damone_output_file.name,),
356 )
357 dm.start()
Zhuoyao Zhangdc2840d2024-09-19 23:29:27 +0000358 dm.monitor_daemon(interval=1)
Zhuoyao Zhang4d485592024-09-17 21:14:38 +0000359
360 # Verifies the expected pid file is created.
361 expected_pid_file_path = pathlib.Path(self.working_dir.name).joinpath(
362 'edit_monitor', TEST_PID_FILE_PATH
363 )
364 self.assertTrue(expected_pid_file_path.exists())
365
366 # Verify the daemon process is executed successfully.
367 with open(damone_output_file.name, 'r') as f:
368 contents = f.read()
369 self.assertEqual(contents, 'running daemon target')
370
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000371 def assert_no_subprocess_running(self):
372 child_pids = self._get_child_processes(os.getpid())
373 for child_pid in child_pids:
374 self.assertFalse(
375 self._is_process_alive(child_pid), f'process {child_pid} still alive'
376 )
377
Zhuoyao Zhangd28da5c2024-09-24 19:46:12 +0000378 def _get_child_processes(self, parent_pid: int) -> list[int]:
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000379 try:
380 output = subprocess.check_output(
381 ['ps', '-o', 'pid,ppid', '--no-headers'], text=True
382 )
383
384 child_processes = []
385 for line in output.splitlines():
386 pid, ppid = line.split()
387 if int(ppid) == parent_pid:
388 child_processes.append(int(pid))
389 return child_processes
390 except subprocess.CalledProcessError as e:
391 self.fail(f'failed to get child process, error: {e}')
392
Zhuoyao Zhangd28da5c2024-09-24 19:46:12 +0000393 def _is_process_alive(self, pid: int) -> bool:
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000394 try:
395 output = subprocess.check_output(
396 ['ps', '-p', str(pid), '-o', 'state='], text=True
397 ).strip()
398 state = output.split()[0]
399 return state != 'Z' # Check if the state is not 'Z' (zombie)
400 except subprocess.CalledProcessError:
401 return False
402
403 def _cleanup_child_processes(self):
404 child_pids = self._get_child_processes(os.getpid())
405 for child_pid in child_pids:
406 try:
407 os.kill(child_pid, signal.SIGKILL)
408 except ProcessLookupError:
409 # process already terminated
410 pass
411
Zhuoyao Zhangd28da5c2024-09-24 19:46:12 +0000412 def _create_fake_deamon_process(
413 self, name: str = ''
414 ) -> multiprocessing.Process:
415 # Create a long running subprocess
416 p = multiprocessing.Process(target=long_running_daemon)
417 p.start()
418
419 # Create the pidfile with the subprocess pid
420 pid_file_path_dir = pathlib.Path(self.working_dir.name).joinpath(
421 'edit_monitor'
422 )
423 pid_file_path_dir.mkdir(parents=True, exist_ok=True)
424 with open(pid_file_path_dir.joinpath(name + 'pid.lock'), 'w') as f:
425 f.write(str(p.pid))
426 return p
427
Zhuoyao Zhangba64f312024-10-14 20:32:53 +0000428 def _assert_error_event_logged(self, fake_cclient, error_type):
429 error_events = fake_cclient.get_sent_events()
430 self.assertEquals(len(error_events), 1)
431 self.assertEquals(
432 edit_event_pb2.EditEvent.FromString(
433 error_events[0].source_extension
434 ).edit_monitor_error_event.error_type,
435 error_type,
436 )
437
438
439class FakeClearcutClient:
440
441 def __init__(self):
442 self.pending_log_events = []
443 self.sent_log_event = []
444
445 def log(self, log_event):
446 self.pending_log_events.append(log_event)
447
448 def flush_events(self):
449 self.sent_log_event.extend(self.pending_log_events)
450 self.pending_log_events.clear()
451
452 def get_sent_events(self):
453 return self.sent_log_event + self.pending_log_events
454
Zhuoyao Zhang53359552024-09-16 23:58:11 +0000455
456if __name__ == '__main__':
457 unittest.main()