Retry logs write when it fails.
Report skipped event in statsd.
Test: manual test
BUG: b/77222120
Change-Id: I257f5e76d557893c4eb4a8e8a13396d8b5d1afc0
diff --git a/cmds/statsd/src/StatsLogProcessor.cpp b/cmds/statsd/src/StatsLogProcessor.cpp
index de8dfe4..de4ab63 100644
--- a/cmds/statsd/src/StatsLogProcessor.cpp
+++ b/cmds/statsd/src/StatsLogProcessor.cpp
@@ -158,7 +158,10 @@
void StatsLogProcessor::OnLogEvent(LogEvent* event) {
std::lock_guard<std::mutex> lock(mMetricsMutex);
const int64_t currentTimestampNs = event->GetElapsedTimestampNs();
+
if (currentTimestampNs < mLastLogTimestamp) {
+ StatsdStats::getInstance().noteLogEventSkipped(
+ event->GetTagId(), event->GetElapsedTimestampNs());
return;
}
diff --git a/cmds/statsd/src/guardrail/StatsdStats.cpp b/cmds/statsd/src/guardrail/StatsdStats.cpp
index 0c076e9..2315eaa 100644
--- a/cmds/statsd/src/guardrail/StatsdStats.cpp
+++ b/cmds/statsd/src/guardrail/StatsdStats.cpp
@@ -49,17 +49,21 @@
const int FIELD_ID_ANOMALY_ALARM_STATS = 9;
// const int FIELD_ID_PULLED_ATOM_STATS = 10; // The proto is written in stats_log_util.cpp
const int FIELD_ID_LOGGER_ERROR_STATS = 11;
-const int FIELD_ID_SUBSCRIBER_ALARM_STATS = 12;
+const int FIELD_ID_PERIODIC_ALARM_STATS = 12;
+const int FIELD_ID_SKIPPED_LOG_EVENT_STATS = 13;
const int FIELD_ID_ATOM_STATS_TAG = 1;
const int FIELD_ID_ATOM_STATS_COUNT = 2;
const int FIELD_ID_ANOMALY_ALARMS_REGISTERED = 1;
-const int FIELD_ID_SUBSCRIBER_ALARMS_REGISTERED = 1;
+const int FIELD_ID_PERIODIC_ALARMS_REGISTERED = 1;
const int FIELD_ID_LOGGER_STATS_TIME = 1;
const int FIELD_ID_LOGGER_STATS_ERROR_CODE = 2;
+const int FIELD_ID_SKIPPED_LOG_EVENT_STATS_TAG = 1;
+const int FIELD_ID_SKIPPED_LOG_EVENT_STATS_TIMESTAMP = 2;
+
const int FIELD_ID_CONFIG_STATS_UID = 1;
const int FIELD_ID_CONFIG_STATS_ID = 2;
const int FIELD_ID_CONFIG_STATS_CREATION = 3;
@@ -346,6 +350,15 @@
mPushedAtomStats[atomId]++;
}
+void StatsdStats::noteLogEventSkipped(int tag, int64_t timestamp) {
+ lock_guard<std::mutex> lock(mLock);
+ // grows strictly one at a time. so it won't > kMaxSkippedLogEvents
+ if (mSkippedLogEvents.size() == kMaxSkippedLogEvents) {
+ mSkippedLogEvents.pop_front();
+ }
+ mSkippedLogEvents.push_back(std::make_pair(tag, timestamp));
+}
+
void StatsdStats::noteLoggerError(int error) {
lock_guard<std::mutex> lock(mLock);
// grows strictly one at a time. so it won't > kMaxLoggerErrors
@@ -368,6 +381,7 @@
mAnomalyAlarmRegisteredStats = 0;
mPeriodicAlarmRegisteredStats = 0;
mLoggerErrors.clear();
+ mSkippedLogEvents.clear();
for (auto& config : mConfigStats) {
config.second->broadcast_sent_time_sec.clear();
config.second->data_drop_time_sec.clear();
@@ -491,6 +505,9 @@
strftime(buffer, sizeof(buffer), "%Y-%m-%d %I:%M%p\n", error_tm);
fprintf(out, "Logger error %d at %s\n", error.second, buffer);
}
+ for (const auto& skipped : mSkippedLogEvents) {
+ fprintf(out, "Log event (%d) skipped at %lld\n", skipped.first, (long long)skipped.second);
+ }
}
void addConfigStatsToProto(const ConfigStats& configStats, ProtoOutputStream* proto) {
@@ -617,8 +634,8 @@
}
if (mPeriodicAlarmRegisteredStats > 0) {
- uint64_t token = proto.start(FIELD_TYPE_MESSAGE | FIELD_ID_SUBSCRIBER_ALARM_STATS);
- proto.write(FIELD_TYPE_INT32 | FIELD_ID_SUBSCRIBER_ALARMS_REGISTERED,
+ uint64_t token = proto.start(FIELD_TYPE_MESSAGE | FIELD_ID_PERIODIC_ALARM_STATS);
+ proto.write(FIELD_TYPE_INT32 | FIELD_ID_PERIODIC_ALARMS_REGISTERED,
mPeriodicAlarmRegisteredStats);
proto.end(token);
}
@@ -640,6 +657,15 @@
proto.end(token);
}
+ for (const auto& skipped : mSkippedLogEvents) {
+ uint64_t token = proto.start(FIELD_TYPE_MESSAGE | FIELD_ID_SKIPPED_LOG_EVENT_STATS |
+ FIELD_COUNT_REPEATED);
+ proto.write(FIELD_TYPE_INT32 | FIELD_ID_SKIPPED_LOG_EVENT_STATS_TAG, skipped.first);
+ proto.write(FIELD_TYPE_INT64 | FIELD_ID_SKIPPED_LOG_EVENT_STATS_TIMESTAMP,
+ (long long)skipped.second);
+ proto.end(token);
+ }
+
output->clear();
size_t bufferSize = proto.size();
output->resize(bufferSize);
diff --git a/cmds/statsd/src/guardrail/StatsdStats.h b/cmds/statsd/src/guardrail/StatsdStats.h
index 00bef75..93f780c 100644
--- a/cmds/statsd/src/guardrail/StatsdStats.h
+++ b/cmds/statsd/src/guardrail/StatsdStats.h
@@ -105,6 +105,8 @@
const static int kMaxLoggerErrors = 10;
+ const static int kMaxSkippedLogEvents = 200;
+
const static int kMaxTimestampCount = 20;
const static int kMaxLogSourceCount = 50;
@@ -270,6 +272,11 @@
void noteLoggerError(int error);
/**
+ * Records statsd skipped an event.
+ */
+ void noteLogEventSkipped(int tag, int64_t timestamp);
+
+ /**
* Reset the historical stats. Including all stats in icebox, and the tracked stats about
* metrics, matchers, and atoms. The active configs will be kept and StatsdStats will continue
* to collect stats after reset() has been called.
@@ -324,6 +331,9 @@
// Logd errors. Size capped by kMaxLoggerErrors.
std::list<const std::pair<int, int>> mLoggerErrors;
+ // Skipped log events.
+ std::list<const std::pair<int, int64_t>> mSkippedLogEvents;
+
// Stores the number of times statsd modified the anomaly alarm registered with
// StatsCompanionService.
int mAnomalyAlarmRegisteredStats = 0;
diff --git a/cmds/statsd/src/stats_log.proto b/cmds/statsd/src/stats_log.proto
index 4aa3c973..8213252 100644
--- a/cmds/statsd/src/stats_log.proto
+++ b/cmds/statsd/src/stats_log.proto
@@ -292,4 +292,15 @@
optional int32 error_code = 2;
}
repeated LoggerErrorStats logger_error_stats = 11;
+
+ message PeriodicAlarmStats {
+ optional int32 alarms_registered = 1;
+ }
+ optional PeriodicAlarmStats periodic_alarm_stats = 12;
+
+ message SkippedLogEventStats {
+ optional int32 tag = 1;
+ optional int64 elapsed_timestamp_nanos = 2;
+ }
+ repeated SkippedLogEventStats skipped_log_event_stats = 13;
}