Statsd AnomalyDetection stopAlarm also checks old
Every time stopAlarm() is called, it should also,
right then, check to see if the alarm should actually
have already fired (but didn't due to AlarmManager lag).
Right now, the client needs to do this check separately,
but they always go together. Indeed, MaxDurationTracker
forgot to do the check, which is a bug. It would make
much more sense if the stopAlarm takes care of it for
them, to prevent such mistakes.
Bug: 75273733
Test: make statsd_test && adb sync data && adb shell data/nativetest64/statsd_test/statsd_test
Test: cts-tradefed run cts-dev -m CtsStatsdHostTestCases -t android.cts.statsd.alert
Change-Id: I689df13690df822090ac34b1171e948be1ad0d9f
diff --git a/cmds/statsd/src/anomaly/AnomalyTracker.h b/cmds/statsd/src/anomaly/AnomalyTracker.h
index f4d5d57..d3da7dc 100644
--- a/cmds/statsd/src/anomaly/AnomalyTracker.h
+++ b/cmds/statsd/src/anomaly/AnomalyTracker.h
@@ -62,7 +62,7 @@
// Returns true if, based on past buckets plus the new currentBucketValue (which generally
// represents the partially-filled current bucket), an anomaly has happened.
- // Also advances to currBucketNum -1.
+ // Also advances to currBucketNum-1.
bool detectAnomaly(const int64_t& currBucketNum, const MetricDimensionKey& key,
const int64_t& currentBucketValue);
@@ -72,7 +72,7 @@
// Detects if, based on past buckets plus the new currentBucketValue (which generally
// represents the partially-filled current bucket), an anomaly has happened, and if so,
// declares an anomaly and informs relevant subscribers.
- // Also advances to currBucketNum -1.
+ // Also advances to currBucketNum-1.
void detectAndDeclareAnomaly(const uint64_t& timestampNs, const int64_t& currBucketNum,
const MetricDimensionKey& key, const int64_t& currentBucketValue);
diff --git a/cmds/statsd/src/anomaly/DurationAnomalyTracker.cpp b/cmds/statsd/src/anomaly/DurationAnomalyTracker.cpp
index d6704018..397844b 100644
--- a/cmds/statsd/src/anomaly/DurationAnomalyTracker.cpp
+++ b/cmds/statsd/src/anomaly/DurationAnomalyTracker.cpp
@@ -30,7 +30,7 @@
}
DurationAnomalyTracker::~DurationAnomalyTracker() {
- stopAllAlarms();
+ cancelAllAlarms();
}
void DurationAnomalyTracker::resetStorage() {
@@ -38,20 +38,6 @@
if (!mAlarms.empty()) ALOGW("AnomalyTracker.resetStorage() called but mAlarms is NOT empty!");
}
-void DurationAnomalyTracker::declareAnomalyIfAlarmExpired(const MetricDimensionKey& dimensionKey,
- const uint64_t& timestampNs) {
- auto itr = mAlarms.find(dimensionKey);
- if (itr == mAlarms.end()) {
- return;
- }
-
- if (itr->second != nullptr &&
- static_cast<uint32_t>(timestampNs / NS_PER_SEC) >= itr->second->timestampSec) {
- declareAnomaly(timestampNs, dimensionKey);
- stopAlarm(dimensionKey);
- }
-}
-
void DurationAnomalyTracker::startAlarm(const MetricDimensionKey& dimensionKey,
const uint64_t& timestampNs) {
// Alarms are stored in secs. Must round up, since if it fires early, it is ignored completely.
@@ -74,24 +60,30 @@
}
}
-void DurationAnomalyTracker::stopAlarm(const MetricDimensionKey& dimensionKey) {
- auto itr = mAlarms.find(dimensionKey);
- if (itr != mAlarms.end()) {
- mAlarms.erase(dimensionKey);
- if (mAlarmMonitor != nullptr) {
- mAlarmMonitor->remove(itr->second);
- }
+void DurationAnomalyTracker::stopAlarm(const MetricDimensionKey& dimensionKey,
+ const uint64_t& timestampNs) {
+ const auto itr = mAlarms.find(dimensionKey);
+ if (itr == mAlarms.end()) {
+ return;
+ }
+
+ // If the alarm is set in the past but hasn't fired yet (due to lag), catch it now.
+ if (itr->second != nullptr && timestampNs >= NS_PER_SEC * itr->second->timestampSec) {
+ declareAnomaly(timestampNs, dimensionKey);
+ }
+ mAlarms.erase(dimensionKey);
+ if (mAlarmMonitor != nullptr) {
+ mAlarmMonitor->remove(itr->second);
}
}
-void DurationAnomalyTracker::stopAllAlarms() {
- std::set<MetricDimensionKey> keys;
- for (auto itr = mAlarms.begin(); itr != mAlarms.end(); ++itr) {
- keys.insert(itr->first);
+void DurationAnomalyTracker::cancelAllAlarms() {
+ if (mAlarmMonitor != nullptr) {
+ for (const auto& itr : mAlarms) {
+ mAlarmMonitor->remove(itr.second);
+ }
}
- for (auto key : keys) {
- stopAlarm(key);
- }
+ mAlarms.clear();
}
void DurationAnomalyTracker::informAlarmsFired(const uint64_t& timestampNs,
diff --git a/cmds/statsd/src/anomaly/DurationAnomalyTracker.h b/cmds/statsd/src/anomaly/DurationAnomalyTracker.h
index ef9d0d8..7c6d3e2 100644
--- a/cmds/statsd/src/anomaly/DurationAnomalyTracker.h
+++ b/cmds/statsd/src/anomaly/DurationAnomalyTracker.h
@@ -37,14 +37,12 @@
void startAlarm(const MetricDimensionKey& dimensionKey, const uint64_t& eventTime);
// Stops the alarm.
- void stopAlarm(const MetricDimensionKey& dimensionKey);
+ // If it should have already fired, but hasn't yet (e.g. because the AlarmManager is delayed),
+ // declare the anomaly now.
+ void stopAlarm(const MetricDimensionKey& dimensionKey, const uint64_t& timestampNs);
- // Stop all the alarms owned by this tracker.
- void stopAllAlarms();
-
- // Declares the anomaly when the alarm expired given the current timestamp.
- void declareAnomalyIfAlarmExpired(const MetricDimensionKey& dimensionKey,
- const uint64_t& timestampNs);
+ // Stop all the alarms owned by this tracker. Does not declare any anomalies.
+ void cancelAllAlarms();
// Declares an anomaly for each alarm in firedAlarms that belongs to this DurationAnomalyTracker
// and removes it from firedAlarms. The AlarmMonitor is not informed.
diff --git a/cmds/statsd/src/metrics/duration_helper/DurationTracker.h b/cmds/statsd/src/metrics/duration_helper/DurationTracker.h
index bfb1ec7..991a76a 100644
--- a/cmds/statsd/src/metrics/duration_helper/DurationTracker.h
+++ b/cmds/statsd/src/metrics/duration_helper/DurationTracker.h
@@ -128,11 +128,11 @@
}
}
- // Stops the anomaly alarm.
- void stopAnomalyAlarm() {
+ // Stops the anomaly alarm. If it should have already fired, declare the anomaly now.
+ void stopAnomalyAlarm(const uint64_t timestamp) {
for (auto& anomalyTracker : mAnomalyTrackers) {
if (anomalyTracker != nullptr) {
- anomalyTracker->stopAlarm(mEventKey);
+ anomalyTracker->stopAlarm(mEventKey, timestamp);
}
}
}
@@ -155,14 +155,6 @@
}
}
- void declareAnomalyIfAlarmExpired(const uint64_t& timestamp) {
- for (auto& anomalyTracker : mAnomalyTrackers) {
- if (anomalyTracker != nullptr) {
- anomalyTracker->declareAnomalyIfAlarmExpired(mEventKey, timestamp);
- }
- }
- }
-
// Convenience to compute the current bucket's end time, which is always aligned with the
// start time of the metric.
uint64_t getCurrentBucketEndTimeNs() {
diff --git a/cmds/statsd/src/metrics/duration_helper/MaxDurationTracker.cpp b/cmds/statsd/src/metrics/duration_helper/MaxDurationTracker.cpp
index 058940d..335ec4c 100644
--- a/cmds/statsd/src/metrics/duration_helper/MaxDurationTracker.cpp
+++ b/cmds/statsd/src/metrics/duration_helper/MaxDurationTracker.cpp
@@ -130,7 +130,7 @@
case DurationState::kStarted: {
duration.startCount--;
if (forceStop || !mNested || duration.startCount <= 0) {
- stopAnomalyAlarm();
+ stopAnomalyAlarm(eventTime);
duration.state = DurationState::kStopped;
int64_t durationTime = eventTime - duration.lastStartTime;
VLOG("Max, key %s, Stop %lld %lld %lld", key.toString().c_str(),
@@ -284,7 +284,7 @@
// If condition becomes false, kStarted -> kPaused. Record the current duration and
// stop anomaly alarm.
if (!conditionMet) {
- stopAnomalyAlarm();
+ stopAnomalyAlarm(timestamp);
it->second.state = DurationState::kPaused;
it->second.lastDuration += (timestamp - it->second.lastStartTime);
if (anyStarted()) {
diff --git a/cmds/statsd/src/metrics/duration_helper/OringDurationTracker.cpp b/cmds/statsd/src/metrics/duration_helper/OringDurationTracker.cpp
index f84f2cb..b418a85 100644
--- a/cmds/statsd/src/metrics/duration_helper/OringDurationTracker.cpp
+++ b/cmds/statsd/src/metrics/duration_helper/OringDurationTracker.cpp
@@ -92,7 +92,6 @@
void OringDurationTracker::noteStop(const HashableDimensionKey& key, const uint64_t timestamp,
const bool stopAll) {
- declareAnomalyIfAlarmExpired(timestamp);
VLOG("Oring: %s stop", key.toString().c_str());
auto it = mStarted.find(key);
if (it != mStarted.end()) {
@@ -118,12 +117,11 @@
}
}
if (mStarted.empty()) {
- stopAnomalyAlarm();
+ stopAnomalyAlarm(timestamp);
}
}
void OringDurationTracker::noteStopAll(const uint64_t timestamp) {
- declareAnomalyIfAlarmExpired(timestamp);
if (!mStarted.empty()) {
mDuration += (timestamp - mLastStartTime);
VLOG("Oring Stop all: record duration %lld %lld ", (long long)timestamp - mLastStartTime,
@@ -131,7 +129,7 @@
detectAndDeclareAnomaly(timestamp, mCurrentBucketNum, mDuration + mDurationFullBucket);
}
- stopAnomalyAlarm();
+ stopAnomalyAlarm(timestamp);
mStarted.clear();
mPaused.clear();
mConditionKeyMap.clear();
@@ -213,7 +211,6 @@
}
void OringDurationTracker::onSlicedConditionMayChange(const uint64_t timestamp) {
- declareAnomalyIfAlarmExpired(timestamp);
vector<pair<HashableDimensionKey, int>> startedToPaused;
vector<pair<HashableDimensionKey, int>> pausedToStarted;
if (!mStarted.empty()) {
@@ -291,12 +288,11 @@
mPaused.insert(startedToPaused.begin(), startedToPaused.end());
if (mStarted.empty()) {
- stopAnomalyAlarm();
+ stopAnomalyAlarm(timestamp);
}
}
void OringDurationTracker::onConditionChanged(bool condition, const uint64_t timestamp) {
- declareAnomalyIfAlarmExpired(timestamp);
if (condition) {
if (!mPaused.empty()) {
VLOG("Condition true, all started");
@@ -319,7 +315,7 @@
}
}
if (mStarted.empty()) {
- stopAnomalyAlarm();
+ stopAnomalyAlarm(timestamp);
}
}