Ensure autoKi is not stored when keepalive stops or is not started.

In the cases below, the keepalive is already cleaned up internally in
KeepaliveTracker, but are not be cleaned up in
AutomaticOnOffKeepaliveTracker. This means the mAutomaticOnOffKeepalives
list is storing stopped keepalives and the metrics will still conisder
the keepalive as active.

1. In KeepaliveInfo.start, return whether the keepalive is successfully
   starting(i.e. mStartedState == STARTING). Check for this in
   handleStartKeepalive and return early on failure, without storing the
   autoKi. Also check in handleResumeKeepalive.
2. In handleEventSocketKeepalive, return whether the handleStopKeepalive
   is called, indicating the keepalive is forced to stop and was not
   already STOPPING. This should happen when the state is STARTING but
   the network agent returned an unsuccessful event.
3. In handleCheckKeepalivesStillValid, move the checking logic to
   AutomaticOnOffKeepaliveTracker so the handleStopKeepalive is called.

Bug: 281646074
Test: atest FrameworksNetTests
Change-Id: I7839464534baed43abbd40b321287132da25b978
diff --git a/service/src/com/android/server/connectivity/AutomaticOnOffKeepaliveTracker.java b/service/src/com/android/server/connectivity/AutomaticOnOffKeepaliveTracker.java
index f8285ed..9513178 100644
--- a/service/src/com/android/server/connectivity/AutomaticOnOffKeepaliveTracker.java
+++ b/service/src/com/android/server/connectivity/AutomaticOnOffKeepaliveTracker.java
@@ -18,6 +18,7 @@
 
 import static android.net.SocketKeepalive.ERROR_INVALID_SOCKET;
 import static android.net.SocketKeepalive.MIN_INTERVAL_SEC;
+import static android.net.SocketKeepalive.SUCCESS;
 import static android.net.SocketKeepalive.SUCCESS_PAUSED;
 import static android.provider.DeviceConfig.NAMESPACE_TETHERING;
 import static android.system.OsConstants.AF_INET;
@@ -52,6 +53,7 @@
 import android.system.StructTimeval;
 import android.util.LocalLog;
 import android.util.Log;
+import android.util.Pair;
 import android.util.SparseArray;
 
 import com.android.internal.annotations.VisibleForTesting;
@@ -381,7 +383,11 @@
             return;
         }
         autoKi.mAutomaticOnOffState = STATE_ENABLED;
-        handleResumeKeepalive(newKi);
+        final int error = handleResumeKeepalive(newKi);
+        if (error != SUCCESS) {
+            // Failed to start the keepalive
+            cleanupAutoOnOffKeepalive(autoKi);
+        }
     }
 
     /**
@@ -402,7 +408,20 @@
      * Forward to KeepaliveTracker.
      */
     public void handleEventSocketKeepalive(@NonNull NetworkAgentInfo nai, int slot, int reason) {
-        mKeepaliveTracker.handleEventSocketKeepalive(nai, slot, reason);
+        if (mKeepaliveTracker.handleEventSocketKeepalive(nai, slot, reason)) return;
+
+        // The keepalive was stopped and so the autoKi should be cleaned up.
+        final AutomaticOnOffKeepalive autoKi =
+                CollectionUtils.findFirst(
+                        mAutomaticOnOffKeepalives, it -> it.match(nai.network(), slot));
+        if (autoKi == null) {
+            // This may occur when the autoKi gets cleaned up elsewhere (i.e
+            // handleCheckKeepalivesStillValid) while waiting for the network agent to
+            // start the keepalive and the network agent returns an error event.
+            Log.e(TAG, "Attempt cleanup on unknown network, slot");
+            return;
+        }
+        cleanupAutoOnOffKeepalive(autoKi);
     }
 
     /**
@@ -425,9 +444,14 @@
      */
     public void handleStartKeepalive(Message message) {
         final AutomaticOnOffKeepalive autoKi = (AutomaticOnOffKeepalive) message.obj;
+        final int error = mKeepaliveTracker.handleStartKeepalive(autoKi.mKi);
+        if (error != SUCCESS) {
+            mEventLog.log("Failed to start keepalive " + autoKi.mCallback + " on "
+                    + autoKi.getNetwork() + " with error " + error);
+            return;
+        }
         mEventLog.log("Start keepalive " + autoKi.mCallback + " on " + autoKi.getNetwork());
         mKeepaliveStatsTracker.onStartKeepalive();
-        mKeepaliveTracker.handleStartKeepalive(autoKi.mKi);
 
         // Add automatic on/off request into list to track its life cycle.
         try {
@@ -443,10 +467,22 @@
         }
     }
 
-    private void handleResumeKeepalive(@NonNull final KeepaliveTracker.KeepaliveInfo ki) {
+    /**
+     * Handle resume keepalive with the given KeepaliveInfo
+     *
+     * @return SUCCESS if the keepalive is successfully starting and the error reason otherwise.
+     */
+    private int handleResumeKeepalive(@NonNull final KeepaliveTracker.KeepaliveInfo ki) {
+        final int error = mKeepaliveTracker.handleStartKeepalive(ki);
+        if (error != SUCCESS) {
+            mEventLog.log("Failed to resume keepalive " + ki.mCallback + " on " + ki.mNai
+                    + " with error " + error);
+            return error;
+        }
         mKeepaliveStatsTracker.onResumeKeepalive();
-        mKeepaliveTracker.handleStartKeepalive(ki);
         mEventLog.log("Resumed successfully keepalive " + ki.mCallback + " on " + ki.mNai);
+
+        return SUCCESS;
     }
 
     private void handlePauseKeepalive(@NonNull final KeepaliveTracker.KeepaliveInfo ki) {
@@ -612,7 +648,22 @@
      * Forward to KeepaliveTracker.
      */
     public void handleCheckKeepalivesStillValid(NetworkAgentInfo nai) {
-        mKeepaliveTracker.handleCheckKeepalivesStillValid(nai);
+        ArrayList<Pair<AutomaticOnOffKeepalive, Integer>> invalidKeepalives = null;
+
+        for (final AutomaticOnOffKeepalive autoKi : mAutomaticOnOffKeepalives) {
+            if (!nai.equals(autoKi.mKi.mNai)) continue;
+            final int error = autoKi.mKi.isValid();
+            if (error != SUCCESS) {
+                if (invalidKeepalives == null) {
+                    invalidKeepalives = new ArrayList<>();
+                }
+                invalidKeepalives.add(Pair.create(autoKi, error));
+            }
+        }
+        if (invalidKeepalives == null) return;
+        for (final Pair<AutomaticOnOffKeepalive, Integer> keepaliveAndError : invalidKeepalives) {
+            handleStopKeepalive(keepaliveAndError.first, keepaliveAndError.second);
+        }
     }
 
     @VisibleForTesting
diff --git a/service/src/com/android/server/connectivity/KeepaliveTracker.java b/service/src/com/android/server/connectivity/KeepaliveTracker.java
index cc226ce..409b188 100644
--- a/service/src/com/android/server/connectivity/KeepaliveTracker.java
+++ b/service/src/com/android/server/connectivity/KeepaliveTracker.java
@@ -54,7 +54,6 @@
 import android.system.ErrnoException;
 import android.system.Os;
 import android.util.Log;
-import android.util.Pair;
 
 import com.android.connectivity.resources.R;
 import com.android.internal.annotations.VisibleForTesting;
@@ -337,7 +336,12 @@
             return SUCCESS;
         }
 
-        private int isValid() {
+        /**
+         * Checks if the keepalive info is valid to start.
+         *
+         * @return SUCCESS if the keepalive is valid and the error reason otherwise.
+         */
+        public int isValid() {
             synchronized (mNai) {
                 int error = checkInterval();
                 if (error == SUCCESS) error = checkLimit();
@@ -348,11 +352,17 @@
             }
         }
 
-        void start(int slot) {
+        /**
+         * Attempt to start the keepalive on the given slot.
+         *
+         * @param slot the slot to start the keepalive on.
+         * @return SUCCESS if the keepalive is successfully starting and the error reason otherwise.
+         */
+        int start(int slot) {
             // BINDER_DIED can happen if the binder died before the KeepaliveInfo was created and
             // the constructor set the state to BINDER_DIED. If that's the case, the KI is already
             // cleaned up.
-            if (BINDER_DIED == mStartedState) return;
+            if (BINDER_DIED == mStartedState) return BINDER_DIED;
             mSlot = slot;
             int error = isValid();
             if (error == SUCCESS) {
@@ -368,7 +378,7 @@
                             mTcpController.startSocketMonitor(mFd, this, mSlot);
                         } catch (InvalidSocketException e) {
                             handleStopKeepalive(mNai, mSlot, ERROR_INVALID_SOCKET);
-                            return;
+                            return ERROR_INVALID_SOCKET;
                         }
                         final TcpKeepalivePacketData tcpData = (TcpKeepalivePacketData) mPacket;
                         mNai.onAddTcpKeepalivePacketFilter(slot, tcpData);
@@ -377,13 +387,14 @@
                         break;
                     default:
                         Log.wtf(TAG, "Starting keepalive with unknown type: " + mType);
-                        handleStopKeepalive(mNai, mSlot, error);
-                        return;
+                        handleStopKeepalive(mNai, mSlot, ERROR_UNSUPPORTED);
+                        return ERROR_UNSUPPORTED;
                 }
                 mStartedState = STARTING;
+                return SUCCESS;
             } else {
                 handleStopKeepalive(mNai, mSlot, error);
-                return;
+                return error;
             }
         }
 
@@ -444,6 +455,8 @@
             }
         }
 
+        // TODO: This does not clean up the autoKi in AutomaticOnOffKeepaliveTracker and it is not
+        // possible without a big refactor.
         void onFileDescriptorInitiatedStop(final int socketKeepaliveReason) {
             handleStopKeepalive(mNai, mSlot, socketKeepaliveReason);
         }
@@ -486,12 +499,15 @@
 
     /**
      * Handle start keepalives with the message.
+     *
+     * @param ki the keepalive to start.
+     * @return SUCCESS if the keepalive is successfully starting and the error reason otherwise.
      */
-    public void handleStartKeepalive(KeepaliveInfo ki) {
+    public int handleStartKeepalive(KeepaliveInfo ki) {
         NetworkAgentInfo nai = ki.getNai();
         int slot = findFirstFreeSlot(nai);
         mKeepalives.get(nai).put(slot, ki);
-        ki.start(slot);
+        return ki.start(slot);
     }
 
     public void handleStopAllKeepalives(NetworkAgentInfo nai, int reason) {
@@ -609,24 +625,13 @@
         }
     }
 
-    public void handleCheckKeepalivesStillValid(NetworkAgentInfo nai) {
-        HashMap <Integer, KeepaliveInfo> networkKeepalives = mKeepalives.get(nai);
-        if (networkKeepalives != null) {
-            ArrayList<Pair<Integer, Integer>> invalidKeepalives = new ArrayList<>();
-            for (int slot : networkKeepalives.keySet()) {
-                int error = networkKeepalives.get(slot).isValid();
-                if (error != SUCCESS) {
-                    invalidKeepalives.add(Pair.create(slot, error));
-                }
-            }
-            for (Pair<Integer, Integer> slotAndError: invalidKeepalives) {
-                handleStopKeepalive(nai, slotAndError.first, slotAndError.second);
-            }
-        }
-    }
-
-    /** Handle keepalive events from lower layer. */
-    public void handleEventSocketKeepalive(@NonNull NetworkAgentInfo nai, int slot, int reason) {
+    /**
+     * Handle keepalive events from lower layer.
+     *
+     * @return false if the event caused handleStopKeepalive to be called, i.e. the keepalive is
+     *     forced to stop. Otherwise, return true.
+     */
+    public boolean handleEventSocketKeepalive(@NonNull NetworkAgentInfo nai, int slot, int reason) {
         KeepaliveInfo ki = null;
         try {
             ki = mKeepalives.get(nai).get(slot);
@@ -634,7 +639,7 @@
         if (ki == null) {
             Log.e(TAG, "Event " + NetworkAgent.EVENT_SOCKET_KEEPALIVE + "," + slot + "," + reason
                     + " for unknown keepalive " + slot + " on " + nai.toShortString());
-            return;
+            return true;
         }
 
         // This can be called in a number of situations :
@@ -667,11 +672,13 @@
                     Log.w(TAG, "Discarded " + (ki.mResumed ? "onResumed" : "onStarted")
                             + " callback for slot " + slot);
                 }
+                return true;
             } else {
                 Log.d(TAG, "Failed to start keepalive " + slot + " on " + nai.toShortString()
                         + ": " + reason);
                 // The message indicated some error trying to start: do call handleStopKeepalive.
                 handleStopKeepalive(nai, slot, reason);
+                return false;
             }
         } else if (KeepaliveInfo.STOPPING == ki.mStartedState) {
             // The message indicated result of stopping : clean up keepalive slots.
@@ -679,9 +686,12 @@
                     + " stopped: " + reason);
             ki.mStartedState = KeepaliveInfo.NOT_STARTED;
             cleanupStoppedKeepalive(nai, slot);
+            return true;
         } else {
             Log.wtf(TAG, "Event " + NetworkAgent.EVENT_SOCKET_KEEPALIVE + "," + slot + "," + reason
                     + " for keepalive in wrong state: " + ki.toString());
+            // Although this is an unexpected event, the keepalive is not stopped here.
+            return true;
         }
     }
 
diff --git a/tests/unit/java/com/android/server/connectivity/AutomaticOnOffKeepaliveTrackerTest.java b/tests/unit/java/com/android/server/connectivity/AutomaticOnOffKeepaliveTrackerTest.java
index d0b0ac4..946df07 100644
--- a/tests/unit/java/com/android/server/connectivity/AutomaticOnOffKeepaliveTrackerTest.java
+++ b/tests/unit/java/com/android/server/connectivity/AutomaticOnOffKeepaliveTrackerTest.java
@@ -25,6 +25,7 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertThrows;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.ArgumentMatchers.any;
@@ -491,8 +492,7 @@
         final TestKeepaliveInfo testInfo =
                 doStartNattKeepalive(TEST_KEEPALIVE_INVALID_INTERVAL_SEC);
 
-        // TODO: Should be null, no keepalive should be stored.
-        assertNotNull(getAutoKiForBinder(testInfo.binder));
+        assertNull(getAutoKiForBinder(testInfo.binder));
 
         verify(testInfo.socketKeepaliveCallback).onError(SocketKeepalive.ERROR_INVALID_INTERVAL);
         verifyNoMoreInteractions(ignoreStubs(testInfo.socketKeepaliveCallback));
@@ -510,8 +510,7 @@
 
         checkAndProcessKeepaliveStop();
 
-        // TODO: Should be null, should cleanup on starting failure.
-        assertNotNull(getAutoKiForBinder(testInfo.binder));
+        assertNull(getAutoKiForBinder(testInfo.binder));
 
         verify(testInfo.socketKeepaliveCallback).onError(SocketKeepalive.ERROR_HARDWARE_ERROR);
         verifyNoMoreInteractions(ignoreStubs(testInfo.socketKeepaliveCallback));
@@ -534,8 +533,7 @@
 
         checkAndProcessKeepaliveStop();
 
-        // TODO: Should be null, keepalives that are no longer valid should be cleaned up.
-        assertNotNull(getAutoKiForBinder(testInfo.binder));
+        assertNull(getAutoKiForBinder(testInfo.binder));
 
         verify(testInfo.socketKeepaliveCallback).onError(SocketKeepalive.ERROR_INVALID_IP_ADDRESS);
         verifyNoMoreInteractions(ignoreStubs(testInfo.socketKeepaliveCallback));