Add pullTimeoutDurations to statsdstats
Bug: 156790145
Test: atest statsd_test
Change-Id: Iaf6ceb94bb8729cb73e5e067b15f369dca1f52d8
diff --git a/cmds/statsd/src/external/StatsPuller.cpp b/cmds/statsd/src/external/StatsPuller.cpp
index 9df4d1f..bb5d0a6 100644
--- a/cmds/statsd/src/external/StatsPuller.cpp
+++ b/cmds/statsd/src/external/StatsPuller.cpp
@@ -44,7 +44,8 @@
bool StatsPuller::Pull(const int64_t eventTimeNs, std::vector<std::shared_ptr<LogEvent>>* data) {
lock_guard<std::mutex> lock(mLock);
- int64_t elapsedTimeNs = getElapsedRealtimeNs();
+ const int64_t elapsedTimeNs = getElapsedRealtimeNs();
+ const int64_t systemUptimeMillis = getSystemUptimeMillis();
StatsdStats::getInstance().notePull(mTagId);
const bool shouldUseCache =
(mLastEventTimeNs == eventTimeNs) || (elapsedTimeNs - mLastPullTimeNs < mCoolDownNs);
@@ -67,16 +68,18 @@
if (!mHasGoodData) {
return mHasGoodData;
}
- const int64_t pullDurationNs = getElapsedRealtimeNs() - elapsedTimeNs;
- StatsdStats::getInstance().notePullTime(mTagId, pullDurationNs);
- const bool pullTimeOut = pullDurationNs > mPullTimeoutNs;
+ const int64_t pullElapsedDurationNs = getElapsedRealtimeNs() - elapsedTimeNs;
+ const int64_t pullSystemUptimeDurationMillis = getSystemUptimeMillis() - systemUptimeMillis;
+ StatsdStats::getInstance().notePullTime(mTagId, pullElapsedDurationNs);
+ const bool pullTimeOut = pullElapsedDurationNs > mPullTimeoutNs;
if (pullTimeOut) {
// Something went wrong. Discard the data.
mCachedData.clear();
mHasGoodData = false;
- StatsdStats::getInstance().notePullTimeout(mTagId);
+ StatsdStats::getInstance().notePullTimeout(
+ mTagId, pullSystemUptimeDurationMillis, NanoToMillis(pullElapsedDurationNs));
ALOGW("Pull for atom %d exceeds timeout %lld nano seconds.", mTagId,
- (long long)pullDurationNs);
+ (long long)pullElapsedDurationNs);
return mHasGoodData;
}
diff --git a/cmds/statsd/src/guardrail/StatsdStats.cpp b/cmds/statsd/src/guardrail/StatsdStats.cpp
index c027fff..6e89038 100644
--- a/cmds/statsd/src/guardrail/StatsdStats.cpp
+++ b/cmds/statsd/src/guardrail/StatsdStats.cpp
@@ -38,6 +38,7 @@
using std::lock_guard;
using std::shared_ptr;
using std::string;
+using std::to_string;
using std::vector;
const int FIELD_ID_BEGIN_TIME = 1;
@@ -436,9 +437,18 @@
mPulledAtomStats[pullAtomId].dataError++;
}
-void StatsdStats::notePullTimeout(int pullAtomId) {
+void StatsdStats::notePullTimeout(int pullAtomId,
+ int64_t pullUptimeMillis,
+ int64_t pullElapsedMillis) {
lock_guard<std::mutex> lock(mLock);
- mPulledAtomStats[pullAtomId].pullTimeout++;
+ PulledAtomStats& pulledAtomStats = mPulledAtomStats[pullAtomId];
+ pulledAtomStats.pullTimeout++;
+
+ if (pulledAtomStats.pullTimeoutMetadata.size() == kMaxTimestampCount) {
+ pulledAtomStats.pullTimeoutMetadata.pop_front();
+ }
+
+ pulledAtomStats.pullTimeoutMetadata.emplace_back(pullUptimeMillis, pullElapsedMillis);
}
void StatsdStats::notePullExceedMaxDelay(int pullAtomId) {
@@ -630,6 +640,7 @@
pullStats.second.unregisteredCount = 0;
pullStats.second.atomErrorCount = 0;
pullStats.second.binderCallFailCount = 0;
+ pullStats.second.pullTimeoutMetadata.clear();
}
mAtomMetricStats.clear();
mActivationBroadcastGuardrailStats.clear();
@@ -786,6 +797,20 @@
pair.second.pullUidProviderNotFound, pair.second.pullerNotFound,
pair.second.registeredCount, pair.second.unregisteredCount,
pair.second.atomErrorCount);
+ if (pair.second.pullTimeoutMetadata.size() > 0) {
+ string uptimeMillis = "(pull timeout system uptime millis) ";
+ string pullTimeoutMillis = "(pull timeout elapsed time millis) ";
+ for (const auto& stats : pair.second.pullTimeoutMetadata) {
+ uptimeMillis.append(to_string(stats.pullTimeoutUptimeMillis)).append(",");;
+ pullTimeoutMillis.append(to_string(stats.pullTimeoutElapsedMillis)).append(",");
+ }
+ uptimeMillis.pop_back();
+ uptimeMillis.push_back('\n');
+ pullTimeoutMillis.pop_back();
+ pullTimeoutMillis.push_back('\n');
+ dprintf(out, "%s", uptimeMillis.c_str());
+ dprintf(out, "%s", pullTimeoutMillis.c_str());
+ }
}
if (mAnomalyAlarmRegisteredStats > 0) {
diff --git a/cmds/statsd/src/guardrail/StatsdStats.h b/cmds/statsd/src/guardrail/StatsdStats.h
index 8587e145..8e8912a 100644
--- a/cmds/statsd/src/guardrail/StatsdStats.h
+++ b/cmds/statsd/src/guardrail/StatsdStats.h
@@ -352,7 +352,7 @@
/*
* Records pull exceeds timeout for the puller.
*/
- void notePullTimeout(int pullAtomId);
+ void notePullTimeout(int pullAtomId, int64_t pullUptimeMillis, int64_t pullElapsedMillis);
/*
* Records pull exceeds max delay for a metric.
@@ -498,6 +498,14 @@
*/
void dumpStats(int outFd) const;
+ typedef struct PullTimeoutMetadata {
+ int64_t pullTimeoutUptimeMillis;
+ int64_t pullTimeoutElapsedMillis;
+ PullTimeoutMetadata(int64_t uptimeMillis, int64_t elapsedMillis) :
+ pullTimeoutUptimeMillis(uptimeMillis),
+ pullTimeoutElapsedMillis(elapsedMillis) {/* do nothing */}
+ } PullTimeoutMetadata;
+
typedef struct {
long totalPull = 0;
long totalPullFromCache = 0;
@@ -519,6 +527,7 @@
long unregisteredCount = 0;
int32_t atomErrorCount = 0;
long binderCallFailCount = 0;
+ std::list<PullTimeoutMetadata> pullTimeoutMetadata;
} PulledAtomStats;
typedef struct {
diff --git a/cmds/statsd/src/stats_log.proto b/cmds/statsd/src/stats_log.proto
index 6bfa267..ddd2725 100644
--- a/cmds/statsd/src/stats_log.proto
+++ b/cmds/statsd/src/stats_log.proto
@@ -467,6 +467,11 @@
optional int64 binder_call_failed = 19;
optional int64 failed_uid_provider_not_found = 20;
optional int64 puller_not_found = 21;
+ message PullTimeoutMetadata {
+ optional int64 pull_timeout_uptime_millis = 1;
+ optional int64 pull_timeout_elapsed_millis = 2;
+ }
+ repeated PullTimeoutMetadata pull_atom_metadata = 22;
}
repeated PulledAtomStats pulled_atom_stats = 10;
diff --git a/cmds/statsd/src/stats_log_util.cpp b/cmds/statsd/src/stats_log_util.cpp
index bafdfcb..423bae8 100644
--- a/cmds/statsd/src/stats_log_util.cpp
+++ b/cmds/statsd/src/stats_log_util.cpp
@@ -81,6 +81,9 @@
const int FIELD_ID_BINDER_CALL_FAIL_COUNT = 19;
const int FIELD_ID_PULL_UID_PROVIDER_NOT_FOUND = 20;
const int FIELD_ID_PULLER_NOT_FOUND = 21;
+const int FIELD_ID_PULL_TIMEOUT_METADATA = 22;
+const int FIELD_ID_PULL_TIMEOUT_METADATA_UPTIME_MILLIS = 1;
+const int FIELD_ID_PULL_TIMEOUT_METADATA_ELAPSED_MILLIS = 2;
// for AtomMetricStats proto
const int FIELD_ID_ATOM_METRIC_STATS = 17;
@@ -497,6 +500,16 @@
(long long)pair.second.pullUidProviderNotFound);
protoOutput->write(FIELD_TYPE_INT64 | FIELD_ID_PULLER_NOT_FOUND,
(long long)pair.second.pullerNotFound);
+ for (const auto& pullTimeoutMetadata : pair.second.pullTimeoutMetadata) {
+ uint64_t timeoutMetadataToken = protoOutput->start(FIELD_TYPE_MESSAGE |
+ FIELD_ID_PULL_TIMEOUT_METADATA |
+ FIELD_COUNT_REPEATED);
+ protoOutput->write(FIELD_TYPE_INT64 | FIELD_ID_PULL_TIMEOUT_METADATA_UPTIME_MILLIS,
+ pullTimeoutMetadata.pullTimeoutUptimeMillis);
+ protoOutput->write(FIELD_TYPE_INT64 | FIELD_ID_PULL_TIMEOUT_METADATA_ELAPSED_MILLIS,
+ pullTimeoutMetadata.pullTimeoutElapsedMillis);
+ protoOutput->end(timeoutMetadataToken);
+ }
protoOutput->end(token);
}
@@ -542,6 +555,10 @@
return ::android::elapsedRealtime();
}
+int64_t getSystemUptimeMillis() {
+ return ::android::uptimeMillis();
+}
+
int64_t getWallClockNs() {
return time(nullptr) * NS_PER_SEC;
}
diff --git a/cmds/statsd/src/stats_log_util.h b/cmds/statsd/src/stats_log_util.h
index 20d93b5..eb65dc6 100644
--- a/cmds/statsd/src/stats_log_util.h
+++ b/cmds/statsd/src/stats_log_util.h
@@ -61,6 +61,9 @@
// Gets the elapsed timestamp in seconds.
int64_t getElapsedRealtimeSec();
+// Gets the system uptime in millis.
+int64_t getSystemUptimeMillis();
+
// Gets the wall clock timestamp in ns.
int64_t getWallClockNs();