Synchronize the setup process of edit monitor

This cl ensures only one edit monitor instance is in the setup proecess
by requesting exclusive file lock on a setup lock file. If there is
already an edit monitor in setup process, other instances trying to
setup will fail. This prevents race conditions when multiple edit
monitor instance are starting.

Test: atest daemon_manager_test
bug: 382135550
Change-Id: Ia8bd2f35318ae81fc002ccce62e2fde3b8cae26b
diff --git a/tools/edit_monitor/daemon_manager.py b/tools/edit_monitor/daemon_manager.py
index 36a7593..7d666fe 100644
--- a/tools/edit_monitor/daemon_manager.py
+++ b/tools/edit_monitor/daemon_manager.py
@@ -13,6 +13,8 @@
 # limitations under the License.
 
 
+import errno
+import fcntl
 import getpass
 import hashlib
 import logging
@@ -100,16 +102,32 @@
       logging.warning("Edit monitor for cog is not supported, exiting...")
       return
 
-    try:
-      self._stop_any_existing_instance()
-      self._write_pid_to_pidfile()
-      self._start_daemon_process()
-    except Exception as e:
-      logging.exception("Failed to start daemon manager with error %s", e)
-      self._send_error_event_to_clearcut(
-          edit_event_pb2.EditEvent.FAILED_TO_START_EDIT_MONITOR
-      )
-      raise e
+    setup_lock_file = pathlib.Path(tempfile.gettempdir()).joinpath(
+        self.pid_file_path.name + ".setup"
+    )
+    logging.info("setup lock file: %s", setup_lock_file)
+    with open(setup_lock_file, "w") as f:
+      try:
+        # Acquire an exclusive lock
+        fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
+        self._stop_any_existing_instance()
+        self._write_pid_to_pidfile()
+        self._start_daemon_process()
+      except Exception as e:
+        if (
+            isinstance(e, IOError) and e.errno == errno.EAGAIN
+        ):  # Failed to acquire the file lock.
+          logging.warning("Another edit monitor is starting, exitinng...")
+          return
+        else:
+          logging.exception("Failed to start daemon manager with error %s", e)
+          self._send_error_event_to_clearcut(
+              edit_event_pb2.EditEvent.FAILED_TO_START_EDIT_MONITOR
+          )
+          raise e
+      finally:
+        # Release the lock
+        fcntl.flock(f, fcntl.LOCK_UN)
 
   def monitor_daemon(
       self,
@@ -414,18 +432,18 @@
     pids = []
 
     try:
-      output = subprocess.check_output(
-          ["ps", "-ef", "--no-headers"], text=True)
+      output = subprocess.check_output(["ps", "-ef", "--no-headers"], text=True)
       for line in output.splitlines():
-          parts = line.split()
-          process_path = parts[7]
-          if pathlib.Path(process_path).name == 'edit_monitor':
-            pid = int(parts[1])
-            if pid != self.pid:  # exclude the current process
-              pids.append(pid)
+        parts = line.split()
+        process_path = parts[7]
+        if pathlib.Path(process_path).name == "edit_monitor":
+          pid = int(parts[1])
+          if pid != self.pid:  # exclude the current process
+            pids.append(pid)
     except Exception:
       logging.exception(
-          "Failed to get pids of existing edit monitors from ps command.")
+          "Failed to get pids of existing edit monitors from ps command."
+      )
 
     return pids
 
diff --git a/tools/edit_monitor/daemon_manager_test.py b/tools/edit_monitor/daemon_manager_test.py
index e412ab0..be28965 100644
--- a/tools/edit_monitor/daemon_manager_test.py
+++ b/tools/edit_monitor/daemon_manager_test.py
@@ -14,6 +14,7 @@
 
 """Unittests for DaemonManager."""
 
+import fcntl
 import logging
 import multiprocessing
 import os
@@ -82,7 +83,8 @@
     # tests will be cleaned.
     tempfile.tempdir = self.working_dir.name
     self.patch = mock.patch.dict(
-        os.environ, {'ENABLE_ANDROID_EDIT_MONITOR': 'true'})
+        os.environ, {'ENABLE_ANDROID_EDIT_MONITOR': 'true'}
+    )
     self.patch.start()
 
   def tearDown(self):
@@ -102,6 +104,7 @@
     p = self._create_fake_deamon_process()
 
     self.assert_run_simple_daemon_success()
+    self.assert_no_subprocess_running()
 
   def test_start_success_with_existing_instance_already_dead(self):
     # Create a pidfile with pid that does not exist.
@@ -137,7 +140,9 @@
     # Verify no daemon process is started.
     self.assertIsNone(dm.daemon_process)
 
-  @mock.patch.dict(os.environ, {'ENABLE_ANDROID_EDIT_MONITOR': 'false'}, clear=True)
+  @mock.patch.dict(
+      os.environ, {'ENABLE_ANDROID_EDIT_MONITOR': 'false'}, clear=True
+  )
   def test_start_return_directly_if_disabled(self):
     dm = daemon_manager.DaemonManager(TEST_BINARY_FILE)
     dm.start()
@@ -154,6 +159,25 @@
     # Verify no daemon process is started.
     self.assertIsNone(dm.daemon_process)
 
+  def test_start_failed_other_instance_is_starting(self):
+    f = open(
+        pathlib.Path(self.working_dir.name).joinpath(
+            TEST_PID_FILE_PATH + '.setup'
+        ),
+        'w',
+    )
+    # Acquire an exclusive lock
+    fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
+
+    dm = daemon_manager.DaemonManager(TEST_BINARY_FILE)
+    dm.start()
+
+    # Release the lock
+    fcntl.flock(f, fcntl.LOCK_UN)
+    f.close()
+    # Verify no daemon process is started.
+    self.assertIsNone(dm.daemon_process)
+
   @mock.patch('os.kill')
   def test_start_failed_to_kill_existing_instance(self, mock_kill):
     mock_kill.side_effect = OSError('Unknown OSError')
@@ -177,6 +201,7 @@
         'edit_monitor'
     )
     pid_file_path_dir.mkdir(parents=True, exist_ok=True)
+
     # Makes the directory read-only so write pidfile will fail.
     os.chmod(pid_file_path_dir, 0o555)
 
@@ -216,7 +241,7 @@
         cclient=fake_cclient,
     )
     # set the fake total_memory_size
-    dm.total_memory_size = 100 * 1024 *1024
+    dm.total_memory_size = 100 * 1024 * 1024
     dm.start()
     dm.monitor_daemon(interval=1)
 
@@ -452,7 +477,7 @@
         pass
 
   def _create_fake_deamon_process(
-      self, name: str = ''
+      self, name: str = TEST_PID_FILE_PATH
   ) -> multiprocessing.Process:
     # Create a long running subprocess
     p = multiprocessing.Process(target=long_running_daemon)
@@ -463,7 +488,7 @@
         'edit_monitor'
     )
     pid_file_path_dir.mkdir(parents=True, exist_ok=True)
-    with open(pid_file_path_dir.joinpath(name + 'pid.lock'), 'w') as f:
+    with open(pid_file_path_dir.joinpath(name), 'w') as f:
       f.write(str(p.pid))
     return p