Introduce client-side rate limiting for TrafficStats APIs

This change introduces client-side rate limiting for TrafficStats
APIs to address a performance issue caused by excessive calls,
resulting in high CPU usage and binder contention. By caching
traffic statistics on the client side, we reduce the frequency
of calls to NetworkStatsService, improving overall system
performance and mitigating potential privacy concerns related
to traffic analysis fingerprints.

This client-side caching mechanism will be disabled and gradually
enabled through a feature flag rollout.

Test: atest CtsNetTestCases:android.net.cts.TrafficStatsTest \
      CtsNetTestCases:android.net.cts.NetworkStatsBinderTest \
      FrameworksNetIntegrationTests:com.android.server.net.integrationtests.NetworkStatsIntegrationTest \
      ConnectivityCoverageTests:android.net.connectivity.com.android.server.net.NetworkStatsServiceTest \
      ConnectivityCoverageTests:android.net.connectivity.android.net.TrafficStatsTest \
      CtsNetTestCases:android.net.cts.IpSecManagerTest#testIkeOverUdpEncapSocket \
      CtsNetTestCasesUpdateStatsPermission:android.net.cts.networkpermission.updatestatspermission.UpdateStatsPermissionTest#testUpdateDeviceStatsPermission
Test: adb shell device_config put tethering trafficstats_client_rate_limit_cache_enabled_flag [-1|1]
Bug: 343260158
Change-Id: If8a252d8d01c1545b23fdc7a00010f249ca09dc2
diff --git a/framework-t/src/android/net/TrafficStats.java b/framework-t/src/android/net/TrafficStats.java
index 81f2cf9..868033a 100644
--- a/framework-t/src/android/net/TrafficStats.java
+++ b/framework-t/src/android/net/TrafficStats.java
@@ -17,6 +17,7 @@
 package android.net;
 
 import static android.annotation.SystemApi.Client.MODULE_LIBRARIES;
+import static android.net.NetworkStats.UID_ALL;
 
 import static com.android.internal.annotations.VisibleForTesting.Visibility.PRIVATE;
 
@@ -33,21 +34,25 @@
 import android.content.Context;
 import android.media.MediaPlayer;
 import android.net.netstats.StatsResult;
+import android.net.netstats.TrafficStatsRateLimitCacheConfig;
 import android.os.Binder;
 import android.os.Build;
 import android.os.RemoteException;
 import android.os.StrictMode;
+import android.os.SystemClock;
 import android.util.Log;
 
 import com.android.internal.annotations.GuardedBy;
 import com.android.internal.annotations.VisibleForTesting;
+import com.android.net.module.util.BinderUtils;
+import com.android.net.module.util.LruCacheWithExpiry;
 
 import java.io.FileDescriptor;
 import java.io.IOException;
 import java.net.DatagramSocket;
 import java.net.Socket;
 import java.net.SocketException;
-
+import java.util.function.LongSupplier;
 
 /**
  * Class that provides network traffic statistics. These statistics include
@@ -182,13 +187,48 @@
     /** @hide */
     public static final int TAG_SYSTEM_PROBE = 0xFFFFFF42;
 
+    private static final StatsResult EMPTY_STATS = new StatsResult(0L, 0L, 0L, 0L);
+
+    private static final Object sRateLimitCacheLock = new Object();
+
     @GuardedBy("TrafficStats.class")
+    @Nullable
     private static INetworkStatsService sStatsService;
 
     // The variable will only be accessed in the test, which is effectively
     // single-threaded.
+    @Nullable
     private static INetworkStatsService sStatsServiceForTest = null;
 
+    // This holds the configuration for the TrafficStats rate limit caches.
+    // It will be filled with the result of a query to the service the first time
+    // the caller invokes get*Stats APIs.
+    // This variable can be accessed from any thread with the lock held.
+    @GuardedBy("sRateLimitCacheLock")
+    @Nullable
+    private static TrafficStatsRateLimitCacheConfig sRateLimitCacheConfig;
+
+    // Cache for getIfaceStats and getTotalStats binder interfaces.
+    // This variable can be accessed from any thread with the lock held,
+    // while the cache itself is thread-safe and can be accessed outside
+    // the lock.
+    @GuardedBy("sRateLimitCacheLock")
+    @Nullable
+    private static LruCacheWithExpiry<String, StatsResult> sRateLimitIfaceCache;
+
+    // Cache for getUidStats binder interface.
+    // This variable can be accessed from any thread with the lock held,
+    // while the cache itself is thread-safe and can be accessed outside
+    // the lock.
+    @GuardedBy("sRateLimitCacheLock")
+    @Nullable
+    private static LruCacheWithExpiry<Integer, StatsResult> sRateLimitUidCache;
+
+    // The variable will only be accessed in the test, which is effectively
+    // single-threaded.
+    @Nullable
+    private static LongSupplier sTimeSupplierForTest = null;
+
     @UnsupportedAppUsage(maxTargetSdk = Build.VERSION_CODES.P, trackingBug = 130143562)
     private synchronized static INetworkStatsService getStatsService() {
         if (sStatsServiceForTest != null) return sStatsServiceForTest;
@@ -215,6 +255,28 @@
     }
 
     /**
+     * Set time supplier for test, or null to reset.
+     *
+     * @hide
+     */
+    @VisibleForTesting(visibility = PRIVATE)
+    public static void setTimeSupplierForTest(LongSupplier timeSupplier) {
+        sTimeSupplierForTest = timeSupplier;
+    }
+
+    /**
+     * Trigger query rate-limit cache config and initializing the caches.
+     *
+     * This is for test purpose.
+     *
+     * @hide
+     */
+    @VisibleForTesting(visibility = PRIVATE)
+    public static void reinitRateLimitCacheForTest() {
+        maybeGetConfigAndInitRateLimitCache(true /* forceReinit */);
+    }
+
+    /**
      * Snapshot of {@link NetworkStats} when the currently active profiling
      * session started, or {@code null} if no session active.
      *
@@ -254,6 +316,92 @@
         sStatsService = statsManager.getBinder();
     }
 
+    @Nullable
+    private static LruCacheWithExpiry<String, StatsResult> maybeGetRateLimitIfaceCache() {
+        if (!maybeGetConfigAndInitRateLimitCache(false /* forceReinit */)) return null;
+        synchronized (sRateLimitCacheLock) {
+            return sRateLimitIfaceCache;
+        }
+    }
+
+    @Nullable
+    private static LruCacheWithExpiry<Integer, StatsResult> maybeGetRateLimitUidCache() {
+        if (!maybeGetConfigAndInitRateLimitCache(false /* forceReinit */)) return null;
+        synchronized (sRateLimitCacheLock) {
+            return sRateLimitUidCache;
+        }
+    }
+
+    /**
+     * Gets the rate limit cache configuration and init caches if null.
+     *
+     * Gets the configuration from the service as the configuration
+     * is not expected to change dynamically. And use it to initialize
+     * rate-limit cache if not yet initialized.
+     *
+     * @return whether the rate-limit cache is enabled.
+     *
+     * @hide
+     */
+    private static boolean maybeGetConfigAndInitRateLimitCache(boolean forceReinit) {
+        // Access the service outside the lock to avoid potential deadlocks. This is
+        // especially important when the caller is a system component (e.g.,
+        // NetworkPolicyManagerService) that might hold other locks that the service
+        // also needs.
+        // Although this introduces a race condition where multiple threads might
+        // query the service concurrently, it's acceptable in this case because the
+        // configuration doesn't change dynamically. The configuration only needs to
+        // be fetched once before initializing the cache.
+        synchronized (sRateLimitCacheLock) {
+            if (sRateLimitCacheConfig != null && !forceReinit) {
+                return sRateLimitCacheConfig.isCacheEnabled;
+            }
+        }
+
+        final TrafficStatsRateLimitCacheConfig config;
+        try {
+            config = getStatsService().getRateLimitCacheConfig();
+        } catch (RemoteException e) {
+            throw e.rethrowFromSystemServer();
+        }
+
+        synchronized (sRateLimitCacheLock) {
+            if (sRateLimitCacheConfig == null || forceReinit) {
+                sRateLimitCacheConfig = config;
+                initRateLimitCacheLocked();
+            }
+        }
+        return config.isCacheEnabled;
+    }
+
+    @GuardedBy("sRateLimitCacheLock")
+    private static void initRateLimitCacheLocked() {
+        // Set up rate limiting caches.
+        // Use uid cache with UID_ALL to cache total stats.
+        if (sRateLimitCacheConfig.isCacheEnabled) {
+            // A time supplier which is monotonic until device reboots, and counts
+            // time spent in sleep. This is needed to ensure the get*Stats caller
+            // won't get stale value after system time adjustment or waking up from sleep.
+            final LongSupplier realtimeSupplier = (sTimeSupplierForTest != null
+                    ? sTimeSupplierForTest : () -> SystemClock.elapsedRealtime());
+            sRateLimitIfaceCache = new LruCacheWithExpiry<String, StatsResult>(
+                    realtimeSupplier,
+                    sRateLimitCacheConfig.expiryDurationMs,
+                    sRateLimitCacheConfig.maxEntries,
+                    (statsResult) -> !isEmpty(statsResult)
+            );
+            sRateLimitUidCache = new LruCacheWithExpiry<Integer, StatsResult>(
+                    realtimeSupplier,
+                    sRateLimitCacheConfig.expiryDurationMs,
+                    sRateLimitCacheConfig.maxEntries,
+                    (statsResult) -> !isEmpty(statsResult)
+            );
+        } else {
+            sRateLimitIfaceCache = null;
+            sRateLimitUidCache = null;
+        }
+    }
+
     /**
      * Attach the socket tagger implementation to the current process, to
      * get notified when a socket's {@link FileDescriptor} is assigned to
@@ -736,6 +884,14 @@
             android.Manifest.permission.NETWORK_STACK,
             android.Manifest.permission.NETWORK_SETTINGS})
     public static void clearRateLimitCaches() {
+        final LruCacheWithExpiry<String, StatsResult> ifaceCache = maybeGetRateLimitIfaceCache();
+        if (ifaceCache != null) {
+            ifaceCache.clear();
+        }
+        final LruCacheWithExpiry<Integer, StatsResult> uidCache = maybeGetRateLimitUidCache();
+        if (uidCache != null) {
+            uidCache.clear();
+        }
         try {
             getStatsService().clearTrafficStatsRateLimitCaches();
         } catch (RemoteException e) {
@@ -985,35 +1141,76 @@
 
     /** @hide */
     public static long getUidStats(int uid, int type) {
-        final StatsResult stats;
+        return fetchStats(maybeGetRateLimitUidCache(), uid,
+                () -> getStatsService().getUidStats(uid), type);
+    }
+
+    // Note: This method calls to the service, do not invoke this method with lock held.
+    private static <K> long fetchStats(
+            @Nullable LruCacheWithExpiry<K, StatsResult> cache, K key,
+            BinderUtils.ThrowingSupplier<StatsResult, RemoteException> statsFetcher, int type) {
         try {
-            stats = getStatsService().getUidStats(uid);
+            final StatsResult stats;
+            if (cache != null) {
+                stats = fetchStatsWithCache(cache, key, statsFetcher);
+            } else {
+                // Cache is not enabled, fetch directly from service.
+                stats = statsFetcher.get();
+            }
+            return getEntryValueForType(stats, type);
         } catch (RemoteException e) {
             throw e.rethrowFromSystemServer();
         }
-        return getEntryValueForType(stats, type);
+    }
+
+    // Note: This method calls to the service, do not invoke this method with lock held.
+    @Nullable
+    private static <K> StatsResult fetchStatsWithCache(LruCacheWithExpiry<K, StatsResult> cache,
+            K key, BinderUtils.ThrowingSupplier<StatsResult, RemoteException> statsFetcher)
+            throws RemoteException {
+        // Attempt to retrieve from the cache first.
+        StatsResult stats = cache.get(key);
+
+        // Although the cache instance is thread-safe, this can still introduce a
+        // race condition between threads of the same process, potentially
+        // returning non-monotonic results. This is because there is no lock
+        // between get, fetch, and put operations. This is considered acceptable
+        // because varying thread execution speeds can also cause non-monotonic
+        // results, even with locking.
+        if (stats == null) {
+            // Cache miss, fetch from the service.
+            stats = statsFetcher.get();
+
+            // Update the cache with the fetched result if valid.
+            if (stats != null && !isEmpty(stats)) {
+                final StatsResult cachedValue = cache.putIfAbsent(key, stats);
+                if (cachedValue != null) {
+                    // Some other thread cached a value after this thread
+                    // originally got a cache miss. Return the cached value
+                    // to ensure all returned values after caching are consistent.
+                    return cachedValue;
+                }
+            }
+        }
+        return stats;
+    }
+
+    private static boolean isEmpty(StatsResult stats) {
+        return stats.equals(EMPTY_STATS);
     }
 
     /** @hide */
     public static long getTotalStats(int type) {
-        final StatsResult stats;
-        try {
-            stats = getStatsService().getTotalStats();
-        } catch (RemoteException e) {
-            throw e.rethrowFromSystemServer();
-        }
-        return getEntryValueForType(stats, type);
+        // In practice, Bpf doesn't use UID_ALL for storing per-UID stats.
+        // Use uid cache with UID_ALL to cache total stats.
+        return fetchStats(maybeGetRateLimitUidCache(), UID_ALL,
+                () -> getStatsService().getTotalStats(), type);
     }
 
     /** @hide */
     public static long getIfaceStats(String iface, int type) {
-        final StatsResult stats;
-        try {
-            stats = getStatsService().getIfaceStats(iface);
-        } catch (RemoteException e) {
-            throw e.rethrowFromSystemServer();
-        }
-        return getEntryValueForType(stats, type);
+        return fetchStats(maybeGetRateLimitIfaceCache(), iface,
+                () -> getStatsService().getIfaceStats(iface), type);
     }
 
     /**
diff --git a/service-t/src/com/android/server/net/NetworkStatsService.java b/service-t/src/com/android/server/net/NetworkStatsService.java
index fb712a1..a8e3203 100644
--- a/service-t/src/com/android/server/net/NetworkStatsService.java
+++ b/service-t/src/com/android/server/net/NetworkStatsService.java
@@ -493,7 +493,8 @@
     @Nullable
     private final TrafficStatsRateLimitCache mTrafficStatsUidCache;
     // A feature flag to control whether the client-side rate limit cache should be enabled.
-    static final String TRAFFICSTATS_CLIENT_RATE_LIMIT_CACHE_ENABLED_FLAG =
+    @VisibleForTesting
+    public static final String TRAFFICSTATS_CLIENT_RATE_LIMIT_CACHE_ENABLED_FLAG =
             "trafficstats_client_rate_limit_cache_enabled_flag";
     static final String TRAFFICSTATS_SERVICE_RATE_LIMIT_CACHE_ENABLED_FLAG =
             "trafficstats_rate_limit_cache_enabled_flag";