Fix a race condition in upstream selection. am: 491999292b

Original change: https://android-review.googlesource.com/c/platform/packages/modules/Connectivity/+/1662400

Change-Id: I6e1e1834489b80eb332176a69f4f6740e9575bdd
diff --git a/Tethering/src/com/android/networkstack/tethering/Tethering.java b/Tethering/src/com/android/networkstack/tethering/Tethering.java
index c6e8fd6..f795747 100644
--- a/Tethering/src/com/android/networkstack/tethering/Tethering.java
+++ b/Tethering/src/com/android/networkstack/tethering/Tethering.java
@@ -1719,6 +1719,12 @@
                     break;
             }
 
+            if (mConfig.chooseUpstreamAutomatically
+                    && arg1 == UpstreamNetworkMonitor.EVENT_DEFAULT_SWITCHED) {
+                chooseUpstreamType(true);
+                return;
+            }
+
             if (ns == null || !pertainsToCurrentUpstream(ns)) {
                 // TODO: In future, this is where upstream evaluation and selection
                 // could be handled for notifications which include sufficient data.
diff --git a/Tethering/src/com/android/networkstack/tethering/UpstreamNetworkMonitor.java b/Tethering/src/com/android/networkstack/tethering/UpstreamNetworkMonitor.java
index 513f07c..e39145b 100644
--- a/Tethering/src/com/android/networkstack/tethering/UpstreamNetworkMonitor.java
+++ b/Tethering/src/com/android/networkstack/tethering/UpstreamNetworkMonitor.java
@@ -42,11 +42,14 @@
 import android.util.Log;
 import android.util.SparseIntArray;
 
+import androidx.annotation.NonNull;
+
 import com.android.internal.annotations.VisibleForTesting;
 import com.android.internal.util.StateMachine;
 
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.Objects;
 import java.util.Set;
 
 
@@ -82,6 +85,7 @@
     public static final int EVENT_ON_CAPABILITIES   = 1;
     public static final int EVENT_ON_LINKPROPERTIES = 2;
     public static final int EVENT_ON_LOST           = 3;
+    public static final int EVENT_DEFAULT_SWITCHED  = 4;
     public static final int NOTIFY_LOCAL_PREFIXES   = 10;
     // This value is used by deprecated preferredUpstreamIfaceTypes selection which is default
     // disabled.
@@ -398,7 +402,7 @@
         notifyTarget(EVENT_ON_CAPABILITIES, network);
     }
 
-    private void handleLinkProp(Network network, LinkProperties newLp) {
+    private void updateLinkProperties(Network network, LinkProperties newLp) {
         final UpstreamNetworkState prev = mNetworkMap.get(network);
         if (prev == null || newLp.equals(prev.linkProperties)) {
             // Ignore notifications about networks for which we have not yet
@@ -414,8 +418,10 @@
 
         mNetworkMap.put(network, new UpstreamNetworkState(
                 newLp, prev.networkCapabilities, network));
-        // TODO: If sufficient information is available to select a more
-        // preferable upstream, do so now and notify the target.
+    }
+
+    private void handleLinkProp(Network network, LinkProperties newLp) {
+        updateLinkProperties(network, newLp);
         notifyTarget(EVENT_ON_LINKPROPERTIES, network);
     }
 
@@ -445,6 +451,24 @@
         notifyTarget(EVENT_ON_LOST, mNetworkMap.remove(network));
     }
 
+    private void maybeHandleNetworkSwitch(@NonNull Network network) {
+        if (Objects.equals(mDefaultInternetNetwork, network)) return;
+
+        final UpstreamNetworkState ns = mNetworkMap.get(network);
+        if (ns == null) {
+            // Can never happen unless there is a bug in ConnectivityService. Entries are only
+            // removed from mNetworkMap when receiving onLost, and onLost for a given network can
+            // never be followed by any other callback on that network.
+            Log.wtf(TAG, "maybeHandleNetworkSwitch: no UpstreamNetworkState for " + network);
+            return;
+        }
+
+        // Default network changed. Update local data and notify tethering.
+        Log.d(TAG, "New default Internet network: " + network);
+        mDefaultInternetNetwork = network;
+        notifyTarget(EVENT_DEFAULT_SWITCHED, ns);
+    }
+
     private void recomputeLocalPrefixes() {
         final HashSet<IpPrefix> localPrefixes = allLocalPrefixes(mNetworkMap.values());
         if (!mLocalPrefixes.equals(localPrefixes)) {
@@ -482,7 +506,22 @@
         @Override
         public void onCapabilitiesChanged(Network network, NetworkCapabilities newNc) {
             if (mCallbackType == CALLBACK_DEFAULT_INTERNET) {
-                mDefaultInternetNetwork = network;
+                // mDefaultInternetNetwork is not updated here because upstream selection must only
+                // run when the LinkProperties have been updated as well as the capabilities. If
+                // this callback is due to a default network switch, then the system will invoke
+                // onLinkPropertiesChanged right after this method and mDefaultInternetNetwork will
+                // be updated then.
+                //
+                // Technically, not updating here isn't necessary, because the notifications to
+                // Tethering sent by notifyTarget are messages sent to a state machine running on
+                // the same thread as this method, and so cannot arrive until after this method has
+                // returned. However, it is not a good idea to rely on that because fact that
+                // Tethering uses multiple state machines running on the same thread is a major
+                // source of race conditions and something that should be fixed.
+                //
+                // TODO: is it correct that this code always updates EntitlementManager?
+                // This code runs when the default network connects or changes capabilities, but the
+                // default network might not be the tethering upstream.
                 final boolean newIsCellular = isCellular(newNc);
                 if (mIsDefaultCellularUpstream != newIsCellular) {
                     mIsDefaultCellularUpstream = newIsCellular;
@@ -496,7 +535,15 @@
 
         @Override
         public void onLinkPropertiesChanged(Network network, LinkProperties newLp) {
-            if (mCallbackType == CALLBACK_DEFAULT_INTERNET) return;
+            if (mCallbackType == CALLBACK_DEFAULT_INTERNET) {
+                updateLinkProperties(network, newLp);
+                // When the default network callback calls onLinkPropertiesChanged, it means that
+                // all the network information for the default network is known (because
+                // onLinkPropertiesChanged is called after onAvailable and onCapabilitiesChanged).
+                // Inform tethering that the default network might have changed.
+                maybeHandleNetworkSwitch(network);
+                return;
+            }
 
             handleLinkProp(network, newLp);
             // Any non-LISTEN_ALL callback will necessarily concern a network that will
@@ -513,6 +560,8 @@
                 mDefaultInternetNetwork = null;
                 mIsDefaultCellularUpstream = false;
                 mEntitlementMgr.notifyUpstream(false);
+                Log.d(TAG, "Lost default Internet network: " + network);
+                notifyTarget(EVENT_DEFAULT_SWITCHED, null);
                 return;
             }
 
diff --git a/Tethering/tests/unit/src/com/android/networkstack/tethering/TetheringTest.java b/Tethering/tests/unit/src/com/android/networkstack/tethering/TetheringTest.java
index 3f847ff..d18d990 100644
--- a/Tethering/tests/unit/src/com/android/networkstack/tethering/TetheringTest.java
+++ b/Tethering/tests/unit/src/com/android/networkstack/tethering/TetheringTest.java
@@ -1120,37 +1120,26 @@
         mLooper.dispatchAll();
         inOrder.verify(mUpstreamNetworkMonitor).setCurrentUpstream(wifi.networkId);
 
+        // This code has historically been racy, so test different orderings of CONNECTIVITY_ACTION
+        // broadcasts and callbacks, and add mLooper.dispatchAll() calls between the two.
         final Runnable doDispatchAll = () -> mLooper.dispatchAll();
 
-        // There is a race where if Tethering and UpstreamNetworkMonitor process the
-        // CONNECTIVITY_ACTION before UpstreamNetworkMonitor gets onCapabilitiesChanged on
-        // CALLBACK_DEFAULT_INTERNET, the upstream does not change.
-        // Simulate this by telling TestConnectivityManager to call mLooper.dispatchAll() between
-        // sending the CONNECTIVITY_ACTION and sending the callbacks.
-        // The switch to wifi just above shows that if the CONNECTIVITY_ACTION and the callbacks
-        // happen close enough together that the tethering code sees both, the code behaves as
-        // expected.
         mCm.makeDefaultNetwork(mobile, BROADCAST_FIRST, doDispatchAll);
         mLooper.dispatchAll();
-        inOrder.verify(mUpstreamNetworkMonitor, never()).setCurrentUpstream(any());  // BUG
+        inOrder.verify(mUpstreamNetworkMonitor).setCurrentUpstream(mobile.networkId);
 
         mCm.makeDefaultNetwork(wifi, BROADCAST_FIRST, doDispatchAll);
         mLooper.dispatchAll();
-        inOrder.verify(mUpstreamNetworkMonitor).setCurrentUpstream(mobile.networkId);  // BUG
+        inOrder.verify(mUpstreamNetworkMonitor).setCurrentUpstream(wifi.networkId);
 
-        // If the broadcast immediately follows the callbacks, the code behaves as expected.
-        // In this case nothing happens because the upstream is already set to cellular and
-        // remaining on cellular is correct.
         mCm.makeDefaultNetwork(mobile, CALLBACKS_FIRST);
         mLooper.dispatchAll();
-        inOrder.verify(mUpstreamNetworkMonitor, never()).setCurrentUpstream(any());
+        inOrder.verify(mUpstreamNetworkMonitor).setCurrentUpstream(mobile.networkId);
 
         mCm.makeDefaultNetwork(wifi, CALLBACKS_FIRST);
         mLooper.dispatchAll();
         inOrder.verify(mUpstreamNetworkMonitor).setCurrentUpstream(wifi.networkId);
 
-        // If the broadcast happens after the callbacks have been processed, the code also behaves
-        // correctly.
         mCm.makeDefaultNetwork(mobile, CALLBACKS_FIRST, doDispatchAll);
         mLooper.dispatchAll();
         inOrder.verify(mUpstreamNetworkMonitor).setCurrentUpstream(mobile.networkId);
@@ -1161,31 +1150,30 @@
         inOrder.verify(mUpstreamNetworkMonitor, never()).setCurrentUpstream(any());
 
         // Lose and regain upstream.
-        // As above, if broadcasts are processed before the callbacks, upstream is not updated.
         assertTrue(mUpstreamNetworkMonitor.getCurrentPreferredUpstream().linkProperties
                 .hasIPv4Address());
         mCm.makeDefaultNetwork(null, BROADCAST_FIRST, doDispatchAll);
         mLooper.dispatchAll();
         mobile.fakeDisconnect();
         mLooper.dispatchAll();
-        inOrder.verify(mUpstreamNetworkMonitor, never()).setCurrentUpstream(any());  // BUG
+        inOrder.verify(mUpstreamNetworkMonitor).setCurrentUpstream(null);
 
         mobile = new TestNetworkAgent(mCm, buildMobile464xlatUpstreamState());
         mobile.fakeConnect();
         mCm.makeDefaultNetwork(mobile, BROADCAST_FIRST, doDispatchAll);
         mLooper.dispatchAll();
-        inOrder.verify(mUpstreamNetworkMonitor).setCurrentUpstream(null);  // BUG
+        inOrder.verify(mUpstreamNetworkMonitor).setCurrentUpstream(mobile.networkId);
 
         // Check the IP addresses to ensure that the upstream is indeed not the same as the previous
         // mobile upstream, even though the netId is (unrealistically) the same.
         assertFalse(mUpstreamNetworkMonitor.getCurrentPreferredUpstream().linkProperties
                 .hasIPv4Address());
 
-        // Lose and regain upstream again, but this time send events in an order that works.
+        // Lose and regain upstream again.
         mCm.makeDefaultNetwork(null, CALLBACKS_FIRST, doDispatchAll);
         mobile.fakeDisconnect();
         mLooper.dispatchAll();
-        inOrder.verify(mUpstreamNetworkMonitor, never()).setCurrentUpstream(any());  // BUG
+        inOrder.verify(mUpstreamNetworkMonitor).setCurrentUpstream(null);
 
         mobile = new TestNetworkAgent(mCm, buildMobileDualStackUpstreamState());
         mobile.fakeConnect();