Add pullTimeoutDurations to statsdstats

Bug: 156790145
Test: atest statsd_test
Change-Id: Iaf6ceb94bb8729cb73e5e067b15f369dca1f52d8
diff --git a/cmds/statsd/src/external/StatsPuller.cpp b/cmds/statsd/src/external/StatsPuller.cpp
index 9df4d1f..bb5d0a6 100644
--- a/cmds/statsd/src/external/StatsPuller.cpp
+++ b/cmds/statsd/src/external/StatsPuller.cpp
@@ -44,7 +44,8 @@
 
 bool StatsPuller::Pull(const int64_t eventTimeNs, std::vector<std::shared_ptr<LogEvent>>* data) {
     lock_guard<std::mutex> lock(mLock);
-    int64_t elapsedTimeNs = getElapsedRealtimeNs();
+    const int64_t elapsedTimeNs = getElapsedRealtimeNs();
+    const int64_t systemUptimeMillis = getSystemUptimeMillis();
     StatsdStats::getInstance().notePull(mTagId);
     const bool shouldUseCache =
             (mLastEventTimeNs == eventTimeNs) || (elapsedTimeNs - mLastPullTimeNs < mCoolDownNs);
@@ -67,16 +68,18 @@
     if (!mHasGoodData) {
         return mHasGoodData;
     }
-    const int64_t pullDurationNs = getElapsedRealtimeNs() - elapsedTimeNs;
-    StatsdStats::getInstance().notePullTime(mTagId, pullDurationNs);
-    const bool pullTimeOut = pullDurationNs > mPullTimeoutNs;
+    const int64_t pullElapsedDurationNs = getElapsedRealtimeNs() - elapsedTimeNs;
+    const int64_t pullSystemUptimeDurationMillis = getSystemUptimeMillis() - systemUptimeMillis;
+    StatsdStats::getInstance().notePullTime(mTagId, pullElapsedDurationNs);
+    const bool pullTimeOut = pullElapsedDurationNs > mPullTimeoutNs;
     if (pullTimeOut) {
         // Something went wrong. Discard the data.
         mCachedData.clear();
         mHasGoodData = false;
-        StatsdStats::getInstance().notePullTimeout(mTagId);
+        StatsdStats::getInstance().notePullTimeout(
+                mTagId, pullSystemUptimeDurationMillis, NanoToMillis(pullElapsedDurationNs));
         ALOGW("Pull for atom %d exceeds timeout %lld nano seconds.", mTagId,
-              (long long)pullDurationNs);
+              (long long)pullElapsedDurationNs);
         return mHasGoodData;
     }
 
diff --git a/cmds/statsd/src/guardrail/StatsdStats.cpp b/cmds/statsd/src/guardrail/StatsdStats.cpp
index c027fff..6e89038 100644
--- a/cmds/statsd/src/guardrail/StatsdStats.cpp
+++ b/cmds/statsd/src/guardrail/StatsdStats.cpp
@@ -38,6 +38,7 @@
 using std::lock_guard;
 using std::shared_ptr;
 using std::string;
+using std::to_string;
 using std::vector;
 
 const int FIELD_ID_BEGIN_TIME = 1;
@@ -436,9 +437,18 @@
     mPulledAtomStats[pullAtomId].dataError++;
 }
 
-void StatsdStats::notePullTimeout(int pullAtomId) {
+void StatsdStats::notePullTimeout(int pullAtomId,
+                                  int64_t pullUptimeMillis,
+                                  int64_t pullElapsedMillis) {
     lock_guard<std::mutex> lock(mLock);
-    mPulledAtomStats[pullAtomId].pullTimeout++;
+    PulledAtomStats& pulledAtomStats = mPulledAtomStats[pullAtomId];
+    pulledAtomStats.pullTimeout++;
+
+    if (pulledAtomStats.pullTimeoutMetadata.size() == kMaxTimestampCount) {
+        pulledAtomStats.pullTimeoutMetadata.pop_front();
+    }
+
+    pulledAtomStats.pullTimeoutMetadata.emplace_back(pullUptimeMillis, pullElapsedMillis);
 }
 
 void StatsdStats::notePullExceedMaxDelay(int pullAtomId) {
@@ -630,6 +640,7 @@
         pullStats.second.unregisteredCount = 0;
         pullStats.second.atomErrorCount = 0;
         pullStats.second.binderCallFailCount = 0;
+        pullStats.second.pullTimeoutMetadata.clear();
     }
     mAtomMetricStats.clear();
     mActivationBroadcastGuardrailStats.clear();
@@ -786,6 +797,20 @@
                 pair.second.pullUidProviderNotFound, pair.second.pullerNotFound,
                 pair.second.registeredCount, pair.second.unregisteredCount,
                 pair.second.atomErrorCount);
+        if (pair.second.pullTimeoutMetadata.size() > 0) {
+            string uptimeMillis = "(pull timeout system uptime millis) ";
+            string pullTimeoutMillis = "(pull timeout elapsed time millis) ";
+            for (const auto& stats : pair.second.pullTimeoutMetadata) {
+                uptimeMillis.append(to_string(stats.pullTimeoutUptimeMillis)).append(",");;
+                pullTimeoutMillis.append(to_string(stats.pullTimeoutElapsedMillis)).append(",");
+            }
+            uptimeMillis.pop_back();
+            uptimeMillis.push_back('\n');
+            pullTimeoutMillis.pop_back();
+            pullTimeoutMillis.push_back('\n');
+            dprintf(out, "%s", uptimeMillis.c_str());
+            dprintf(out, "%s", pullTimeoutMillis.c_str());
+        }
     }
 
     if (mAnomalyAlarmRegisteredStats > 0) {
diff --git a/cmds/statsd/src/guardrail/StatsdStats.h b/cmds/statsd/src/guardrail/StatsdStats.h
index 8587e145..8e8912a 100644
--- a/cmds/statsd/src/guardrail/StatsdStats.h
+++ b/cmds/statsd/src/guardrail/StatsdStats.h
@@ -352,7 +352,7 @@
     /*
      * Records pull exceeds timeout for the puller.
      */
-    void notePullTimeout(int pullAtomId);
+    void notePullTimeout(int pullAtomId, int64_t pullUptimeMillis, int64_t pullElapsedMillis);
 
     /*
      * Records pull exceeds max delay for a metric.
@@ -498,6 +498,14 @@
      */
     void dumpStats(int outFd) const;
 
+    typedef struct PullTimeoutMetadata {
+        int64_t pullTimeoutUptimeMillis;
+        int64_t pullTimeoutElapsedMillis;
+        PullTimeoutMetadata(int64_t uptimeMillis, int64_t elapsedMillis) :
+            pullTimeoutUptimeMillis(uptimeMillis),
+            pullTimeoutElapsedMillis(elapsedMillis) {/* do nothing */}
+    } PullTimeoutMetadata;
+
     typedef struct {
         long totalPull = 0;
         long totalPullFromCache = 0;
@@ -519,6 +527,7 @@
         long unregisteredCount = 0;
         int32_t atomErrorCount = 0;
         long binderCallFailCount = 0;
+        std::list<PullTimeoutMetadata> pullTimeoutMetadata;
     } PulledAtomStats;
 
     typedef struct {
diff --git a/cmds/statsd/src/stats_log.proto b/cmds/statsd/src/stats_log.proto
index 6bfa267..ddd2725 100644
--- a/cmds/statsd/src/stats_log.proto
+++ b/cmds/statsd/src/stats_log.proto
@@ -467,6 +467,11 @@
         optional int64 binder_call_failed = 19;
         optional int64 failed_uid_provider_not_found = 20;
         optional int64 puller_not_found = 21;
+        message PullTimeoutMetadata {
+          optional int64 pull_timeout_uptime_millis = 1;
+          optional int64 pull_timeout_elapsed_millis = 2;
+        }
+        repeated PullTimeoutMetadata pull_atom_metadata = 22;
     }
     repeated PulledAtomStats pulled_atom_stats = 10;
 
diff --git a/cmds/statsd/src/stats_log_util.cpp b/cmds/statsd/src/stats_log_util.cpp
index bafdfcb..423bae8 100644
--- a/cmds/statsd/src/stats_log_util.cpp
+++ b/cmds/statsd/src/stats_log_util.cpp
@@ -81,6 +81,9 @@
 const int FIELD_ID_BINDER_CALL_FAIL_COUNT = 19;
 const int FIELD_ID_PULL_UID_PROVIDER_NOT_FOUND = 20;
 const int FIELD_ID_PULLER_NOT_FOUND = 21;
+const int FIELD_ID_PULL_TIMEOUT_METADATA = 22;
+const int FIELD_ID_PULL_TIMEOUT_METADATA_UPTIME_MILLIS = 1;
+const int FIELD_ID_PULL_TIMEOUT_METADATA_ELAPSED_MILLIS = 2;
 
 // for AtomMetricStats proto
 const int FIELD_ID_ATOM_METRIC_STATS = 17;
@@ -497,6 +500,16 @@
                        (long long)pair.second.pullUidProviderNotFound);
     protoOutput->write(FIELD_TYPE_INT64 | FIELD_ID_PULLER_NOT_FOUND,
                        (long long)pair.second.pullerNotFound);
+    for (const auto& pullTimeoutMetadata : pair.second.pullTimeoutMetadata) {
+        uint64_t timeoutMetadataToken = protoOutput->start(FIELD_TYPE_MESSAGE |
+                                                           FIELD_ID_PULL_TIMEOUT_METADATA |
+                                                           FIELD_COUNT_REPEATED);
+        protoOutput->write(FIELD_TYPE_INT64 | FIELD_ID_PULL_TIMEOUT_METADATA_UPTIME_MILLIS,
+                           pullTimeoutMetadata.pullTimeoutUptimeMillis);
+        protoOutput->write(FIELD_TYPE_INT64 | FIELD_ID_PULL_TIMEOUT_METADATA_ELAPSED_MILLIS,
+                           pullTimeoutMetadata.pullTimeoutElapsedMillis);
+        protoOutput->end(timeoutMetadataToken);
+    }
     protoOutput->end(token);
 }
 
@@ -542,6 +555,10 @@
     return ::android::elapsedRealtime();
 }
 
+int64_t getSystemUptimeMillis() {
+    return ::android::uptimeMillis();
+}
+
 int64_t getWallClockNs() {
     return time(nullptr) * NS_PER_SEC;
 }
diff --git a/cmds/statsd/src/stats_log_util.h b/cmds/statsd/src/stats_log_util.h
index 20d93b5..eb65dc6 100644
--- a/cmds/statsd/src/stats_log_util.h
+++ b/cmds/statsd/src/stats_log_util.h
@@ -61,6 +61,9 @@
 // Gets the elapsed timestamp in seconds.
 int64_t getElapsedRealtimeSec();
 
+// Gets the system uptime in millis.
+int64_t getSystemUptimeMillis();
+
 // Gets the wall clock timestamp in ns.
 int64_t getWallClockNs();