metrics: add per-version cumulative counters

This adds counters that accumulate some measurement across an OS version;
they are reset at version updates, but reported more frequently (for instance,
daily).

Such counts could be obtained by pseudonymous dremel queries, but this is
more convenient.

The code replaces the "tag" datum in the counters with two tags: a
"report tag" and a "reset tag".  When the report
tag changes, the count is reported but not reset.  When the reset tag
changes, the count is both reported and reset.

This also adds one usage of the new counter which tracks the total
number of kernel crashes since the most recent OS version update.

The state machine in counter.cc changes a bit because it's no longer
true that a counter is reset after reporting it.  That logic is
still rather confusing, and could use a rewrite.

BUG=chromium:339588
TEST=ran on target under various situations
BRANCH=none

Change-Id: I5f83731e1a3d6e055b6d0f89111c9ffc60ccfcb9
Reviewed-on: https://chromium-review.googlesource.com/185081
Reviewed-by: Daniel Erat <derat@chromium.org>
Commit-Queue: Luigi Semenzato <semenzato@chromium.org>
Tested-by: Luigi Semenzato <semenzato@chromium.org>
diff --git a/metrics/counter.cc b/metrics/counter.cc
index 9d94597..f71b4fb 100644
--- a/metrics/counter.cc
+++ b/metrics/counter.cc
@@ -14,8 +14,11 @@
 namespace chromeos_metrics {
 
 // TaggedCounter::Record implementation.
-void TaggedCounter::Record::Init(int32 tag, int32 count) {
-  tag_ = tag;
+void TaggedCounter::Record::Init(uint32 report_tag,
+                                 uint32 reset_tag,
+                                 int32 count) {
+  report_tag_ = report_tag;
+  reset_tag_ = reset_tag;
   count_ = (count > 0) ? count : 0;
 }
 
@@ -48,19 +51,24 @@
   record_state_ = kRecordInvalid;
 }
 
-void TaggedCounter::Update(int32 tag, int32 count) {
-  UpdateInternal(tag,
+void TaggedCounter::Update(uint32 report_tag, uint32 reset_tag, int32 count) {
+  UpdateInternal(report_tag,
+                 reset_tag,
                  count,
                  false);  // No flush.
 }
 
 void TaggedCounter::Flush() {
-  UpdateInternal(0,  // tag
+  UpdateInternal(0,  // report_tag
+                 0,  // reset_tag
                  0,  // count
                  true);  // Do flush.
 }
 
-void TaggedCounter::UpdateInternal(int32 tag, int32 count, bool flush) {
+void TaggedCounter::UpdateInternal(uint32 report_tag,
+                                   uint32 reset_tag,
+                                   int32 count,
+                                   bool flush) {
   if (flush) {
     // Flushing but record is null, so nothing to do.
     if (record_state_ == kRecordNull)
@@ -68,11 +76,15 @@
   } else {
     // If there's no new data and the last record in the aggregation
     // file is with the same tag, there's nothing to do.
-    if (count <= 0 && record_state_ == kRecordValid && record_.tag() == tag)
+    if (count <= 0 &&
+        record_state_ == kRecordValid &&
+        record_.report_tag() == report_tag &&
+        record_.reset_tag() == reset_tag)
       return;
   }
 
-  DLOG(INFO) << "tag: " << tag << " count: " << count << " flush: " << flush;
+  DLOG(INFO) << "report_tag: " << report_tag << " reset_tag: " << reset_tag
+             << " count: " << count << " flush: " << flush;
   DCHECK(!filename_.empty());
 
   // NOTE: The assumption is that this TaggedCounter object is the
@@ -84,10 +96,9 @@
     PLOG(WARNING) << "Unable to open the persistent counter file";
     return;
   }
-
   ReadRecord(fd);
-  ReportRecord(tag, flush);
-  UpdateRecord(tag, count, flush);
+  ReportRecord(report_tag, reset_tag, flush);
+  UpdateRecord(report_tag, reset_tag, count, flush);
   WriteRecord(fd);
 
   HANDLE_EINTR(close(fd));
@@ -97,6 +108,9 @@
   if (record_state_ != kRecordInvalid)
     return;
 
+  // Three cases: 1. empty file (first time), 2. size of file == size of record
+  // (normal case), 3. size of file != size of record (new version).  We treat
+  // cases 1 and 3 identically.
   if (HANDLE_EINTR(read(fd, &record_, sizeof(record_))) == sizeof(record_)) {
     if (record_.count() >= 0) {
       record_state_ = kRecordValid;
@@ -111,42 +125,59 @@
   record_state_ = kRecordNull;
 }
 
-void TaggedCounter::ReportRecord(int32 tag, bool flush) {
+void TaggedCounter::ReportRecord(uint32 report_tag,
+                                 uint32 reset_tag,
+                                 bool flush) {
   // If no valid record, there's nothing to report.
   if (record_state_ != kRecordValid) {
     DCHECK_EQ(record_state_, kRecordNull);
     return;
   }
 
-  // If the current record has the same tag as the new tag, it's not
-  // ready to be reported yet.
-  if (!flush && record_.tag() == tag)
+  // If the current record has the same tags as the new ones, it's
+  // not ready to be reported yet.
+  if (!flush &&
+      record_.report_tag() == report_tag &&
+      record_.reset_tag() == reset_tag)
     return;
 
   if (reporter_) {
-    reporter_(reporter_handle_, record_.tag(), record_.count());
+    reporter_(reporter_handle_, record_.count());
   }
-  record_state_ = kRecordNullDirty;
+  // If the report tag has changed, update it to the current one.
+  if (record_.report_tag() != report_tag) {
+    record_.set_report_tag(report_tag);
+    record_state_ = kRecordValidDirty;
+  }
+  // If the reset tag has changed, the new state is NullDirty, no
+  // matter whether the report tag has changed or not.
+  if (record_.reset_tag() != reset_tag) {
+    record_state_ = kRecordNullDirty;
+  }
 }
 
-void TaggedCounter::UpdateRecord(int32 tag, int32 count, bool flush) {
-  if (flush) {
-    DCHECK(record_state_ == kRecordNull || record_state_ == kRecordNullDirty);
+void TaggedCounter::UpdateRecord(uint32 report_tag,
+                                 uint32 reset_tag,
+                                 int32 count,
+                                 bool flush) {
+  if (flush &&
+      (record_state_ == kRecordNull || record_state_ == kRecordNullDirty))
     return;
-  }
 
   switch (record_state_) {
     case kRecordNull:
     case kRecordNullDirty:
       // Current record is null, starting a new record.
-      record_.Init(tag, count);
+      record_.Init(report_tag, reset_tag, count);
       record_state_ = kRecordValidDirty;
       break;
 
     case kRecordValid:
+    case kRecordValidDirty:
       // If there's an existing record for the current tag,
       // accumulates the counts.
-      DCHECK_EQ(record_.tag(), tag);
+      DCHECK_EQ(record_.report_tag(), report_tag);
+      DCHECK_EQ(record_.reset_tag(), reset_tag);
       if (count > 0) {
         record_.Add(count);
         record_state_ = kRecordValidDirty;
@@ -212,7 +243,7 @@
   CHECK(buckets_ > 0);
 }
 
-void TaggedCounterReporter::Report(void* handle, int32 tag, int32 count) {
+void TaggedCounterReporter::Report(void* handle, int32 count) {
   TaggedCounterReporter* this_reporter =
       reinterpret_cast<TaggedCounterReporter*>(handle);
   DLOG(INFO) << "received metric: " << this_reporter->histogram_name_
@@ -236,17 +267,41 @@
 void FrequencyCounter::Init(TaggedCounterInterface* tagged_counter,
                             time_t cycle_duration) {
   tagged_counter_.reset(tagged_counter);
-  DCHECK(cycle_duration > 0);
+  DCHECK_GT(cycle_duration, 0);
   cycle_duration_ = cycle_duration;
 }
 
 void FrequencyCounter::UpdateInternal(int32 count, time_t now) {
-  DCHECK(tagged_counter_.get() != NULL);
-  tagged_counter_->Update(GetCycleNumber(now), count);
+  DCHECK(tagged_counter_.get());
+  tagged_counter_->Update(0, GetCycleNumber(now), count);
 }
 
 int32 FrequencyCounter::GetCycleNumber(time_t now) {
   return now / cycle_duration_;
 }
 
+VersionCounter::VersionCounter() : cycle_duration_(1) {
+}
+
+VersionCounter::~VersionCounter() {
+}
+
+void VersionCounter::Init(TaggedCounterInterface* tagged_counter,
+                          time_t cycle_duration) {
+  tagged_counter_.reset(tagged_counter);
+  DCHECK_GT(cycle_duration, 0);
+  cycle_duration_ = cycle_duration;
+}
+
+void VersionCounter::UpdateInternal(int32 count,
+                                    time_t now,
+                                    uint32 version_hash) {
+  DCHECK(tagged_counter_.get());
+  tagged_counter_->Update(GetCycleNumber(now), version_hash, count);
+}
+
+int32 VersionCounter::GetCycleNumber(time_t now) {
+  return now / cycle_duration_;
+}
+
 }  // namespace chromeos_metrics