Statsd CPU optimization.
The key change is to revamp how we parse/store/match a log event, especially how we match repeated
field and attribution nodes, and how we construct dimensions and compare them.
+ We use a integer to encode the field of a log element. And also encode the FieldMatcher into an
integer and a bit mask. The log matching becomes 2 integer operations.
+ Dimension is stored as encoded field and value pair. Checking if 2 dimensions are equal is then
becoming checking if the underlying integers are equal. The integers are stored contiguously
in memory, so it's much faster than previous tree structure.
Start review from FieldValue.h
Test: statsd_test + new unit tests
Bug: 72659059
Change-Id: Iec8daeacdd3f39ab297c10ab9cd7b710a9c42e86
diff --git a/cmds/statsd/src/logd/LogEvent.cpp b/cmds/statsd/src/logd/LogEvent.cpp
index e1ab5d5..909b74f 100644
--- a/cmds/statsd/src/logd/LogEvent.cpp
+++ b/cmds/statsd/src/logd/LogEvent.cpp
@@ -17,13 +17,8 @@
#define DEBUG false // STOPSHIP if true
#include "logd/LogEvent.h"
-#include "frameworks/base/cmds/statsd/src/statsd_config.pb.h"
-
-#include <set>
#include <sstream>
-#include "field_util.h"
-#include "dimension.h"
#include "stats_log_util.h"
namespace android {
@@ -152,9 +147,6 @@
if (android_log_write_string8(mContext, node.tag().c_str()) < 0) {
return false;
}
- if (android_log_write_int32(mContext, node.uid()) < 0) {
- return false;
- }
if (android_log_write_list_end(mContext) < 0) {
return false;
}
@@ -163,47 +155,23 @@
return false;
}
-namespace {
-
-void increaseField(Field *field, bool is_child) {
- if (is_child) {
- if (field->child_size() <= 0) {
- field->add_child();
- }
- } else {
- field->clear_child();
- }
- Field* curr = is_child ? field->mutable_child(0) : field;
- if (!curr->has_field()) {
- curr->set_field(1);
- } else {
- curr->set_field(curr->field() + 1);
- }
-}
-
-} // namespace
-
/**
* The elements of each log event are stored as a vector of android_log_list_elements.
* The goal is to do as little preprocessing as possible, because we read a tiny fraction
* of the elements that are written to the log.
+ *
+ * The idea here is to read through the log items once, we get as much information we need for
+ * matching as possible. Because this log will be matched against lots of matchers.
*/
void LogEvent::init(android_log_context context) {
- if (!context) {
- return;
- }
android_log_list_element elem;
- // TODO: The log is actually structured inside one list. This is convenient
- // because we'll be able to use it to put the attribution (WorkSource) block first
- // without doing our own tagging scheme. Until that change is in, just drop the
- // list-related log elements and the order we get there is our index-keyed data
- // structure.
int i = 0;
int seenListStart = 0;
- Field fieldTree;
- Field* atomField = fieldTree.add_child();
+ int32_t field = 0;
+ int depth = -1;
+ int pos[] = {1, 1, 1};
do {
elem = android_log_read_next(context);
switch ((int)elem.type) {
@@ -211,55 +179,81 @@
// elem at [0] is EVENT_TYPE_LIST, [1] is the tag id.
if (i == 1) {
mTagId = elem.data.int32;
- fieldTree.set_field(mTagId);
} else {
- increaseField(atomField, seenListStart > 0/* is_child */);
- mFieldValueMap[fieldTree].set_value_int(elem.data.int32);
- }
- break;
- case EVENT_TYPE_FLOAT:
- {
- increaseField(atomField, seenListStart > 0/* is_child */);
- mFieldValueMap[fieldTree].set_value_float(elem.data.float32);
- }
- break;
- case EVENT_TYPE_STRING:
- {
- increaseField(atomField, seenListStart > 0/* is_child */);
- mFieldValueMap[fieldTree].set_value_str(
- string(elem.data.string, elem.len).c_str());
- }
- break;
- case EVENT_TYPE_LONG:
- {
- increaseField(atomField, seenListStart > 0 /* is_child */);
- mFieldValueMap[fieldTree].set_value_long(elem.data.int64);
- }
- break;
- case EVENT_TYPE_LIST:
- if (i >= 1) {
- if (seenListStart > 0) {
- increasePosition(atomField);
- } else {
- increaseField(atomField, false /* is_child */);
- }
- seenListStart++;
- if (seenListStart >= 3) {
- ALOGE("Depth > 2. Not supported!");
+ if (depth < 0 || depth > 2) {
return;
}
+
+ mValues.push_back(
+ FieldValue(Field(mTagId, pos, depth), Value((int32_t)elem.data.int32)));
+
+ pos[depth]++;
}
break;
- case EVENT_TYPE_LIST_STOP:
- seenListStart--;
- if (seenListStart == 0) {
- atomField->clear_position_index();
- } else {
- if (atomField->child_size() > 0) {
- atomField->mutable_child(0)->clear_field();
+ case EVENT_TYPE_FLOAT: {
+ if (depth < 0 || depth > 2) {
+ ALOGE("Depth > 2. Not supported!");
+ return;
+ }
+
+ mValues.push_back(FieldValue(Field(mTagId, pos, depth), Value(elem.data.float32)));
+
+ pos[depth]++;
+
+ } break;
+ case EVENT_TYPE_STRING: {
+ if (depth < 0 || depth > 2) {
+ ALOGE("Depth > 2. Not supported!");
+ return;
+ }
+
+ mValues.push_back(FieldValue(Field(mTagId, pos, depth),
+ Value(string(elem.data.string, elem.len))));
+
+ pos[depth]++;
+
+ } break;
+ case EVENT_TYPE_LONG: {
+ if (depth < 0 || depth > 2) {
+ ALOGE("Depth > 2. Not supported!");
+ return;
+ }
+ mValues.push_back(
+ FieldValue(Field(mTagId, pos, depth), Value((int64_t)elem.data.int64)));
+
+ pos[depth]++;
+
+ } break;
+ case EVENT_TYPE_LIST:
+ depth++;
+ if (depth > 2) {
+ ALOGE("Depth > 2. Not supported!");
+ return;
+ }
+ pos[depth] = 1;
+
+ break;
+ case EVENT_TYPE_LIST_STOP: {
+ int prevDepth = depth;
+ depth--;
+ if (depth >= 0 && depth < 2) {
+ // Now go back to decorate the previous items that are last at prevDepth.
+ // So that we can later easily match them with Position=Last matchers.
+ pos[prevDepth]--;
+ int path = getEncodedField(pos, prevDepth, false);
+ for (size_t j = mValues.size() - 1; j >= 0; j--) {
+ if (mValues[j].mField.getDepth() >= prevDepth &&
+ mValues[j].mField.getPath(prevDepth) == path) {
+ mValues[j].mField.decorateLastPos(prevDepth);
+ } else {
+ // Safe to break, because the items are in DFS order.
+ break;
+ }
}
+ pos[depth]++;
}
break;
+ }
case EVENT_TYPE_UNKNOWN:
break;
default:
@@ -270,162 +264,115 @@
}
int64_t LogEvent::GetLong(size_t key, status_t* err) const {
- DimensionsValue value;
- if (!GetSimpleAtomDimensionsValueProto(key, &value)) {
- *err = BAD_INDEX;
- return 0;
- }
- const DimensionsValue* leafValue = getSingleLeafValue(&value);
- switch (leafValue->value_case()) {
- case DimensionsValue::ValueCase::kValueInt:
- return (int64_t)leafValue->value_int();
- case DimensionsValue::ValueCase::kValueLong:
- return leafValue->value_long();
- case DimensionsValue::ValueCase::kValueBool:
- return leafValue->value_bool() ? 1 : 0;
- case DimensionsValue::ValueCase::kValueFloat:
- return (int64_t)leafValue->value_float();
- case DimensionsValue::ValueCase::kValueTuple:
- case DimensionsValue::ValueCase::kValueStr:
- case DimensionsValue::ValueCase::VALUE_NOT_SET: {
- *err = BAD_TYPE;
- return 0;
+ // TODO: encapsulate the magical operations all in Field struct as a static function.
+ int field = getSimpleField(key);
+ for (const auto& value : mValues) {
+ if (value.mField.getField() == field) {
+ if (value.mValue.getType() == INT) {
+ return value.mValue.int_value;
+ } else {
+ *err = BAD_TYPE;
+ return 0;
+ }
+ }
+ if ((size_t)value.mField.getPosAtDepth(0) > key) {
+ break;
}
}
+
+ *err = BAD_INDEX;
+ return 0;
}
int LogEvent::GetInt(size_t key, status_t* err) const {
- DimensionsValue value;
- if (!GetSimpleAtomDimensionsValueProto(key, &value)) {
+ int field = getSimpleField(key);
+ for (const auto& value : mValues) {
+ if (value.mField.getField() == field) {
+ if (value.mValue.getType() == INT) {
+ return value.mValue.int_value;
+ } else {
+ *err = BAD_TYPE;
+ return 0;
+ }
+ }
+ if ((size_t)value.mField.getPosAtDepth(0) > key) {
+ break;
+ }
+ }
+
*err = BAD_INDEX;
return 0;
- }
- const DimensionsValue* leafValue = getSingleLeafValue(&value);
- switch (leafValue->value_case()) {
- case DimensionsValue::ValueCase::kValueInt:
- return leafValue->value_int();
- case DimensionsValue::ValueCase::kValueLong:
- case DimensionsValue::ValueCase::kValueBool:
- case DimensionsValue::ValueCase::kValueFloat:
- case DimensionsValue::ValueCase::kValueTuple:
- case DimensionsValue::ValueCase::kValueStr:
- case DimensionsValue::ValueCase::VALUE_NOT_SET: {
- *err = BAD_TYPE;
- return 0;
- }
- }
}
const char* LogEvent::GetString(size_t key, status_t* err) const {
- DimensionsValue value;
- if (!GetSimpleAtomDimensionsValueProto(key, &value)) {
- *err = BAD_INDEX;
- return 0;
- }
- const DimensionsValue* leafValue = getSingleLeafValue(&value);
- switch (leafValue->value_case()) {
- case DimensionsValue::ValueCase::kValueStr:
- return leafValue->value_str().c_str();
- case DimensionsValue::ValueCase::kValueInt:
- case DimensionsValue::ValueCase::kValueLong:
- case DimensionsValue::ValueCase::kValueBool:
- case DimensionsValue::ValueCase::kValueFloat:
- case DimensionsValue::ValueCase::kValueTuple:
- case DimensionsValue::ValueCase::VALUE_NOT_SET: {
- *err = BAD_TYPE;
- return 0;
+ int field = getSimpleField(key);
+ for (const auto& value : mValues) {
+ if (value.mField.getField() == field) {
+ if (value.mValue.getType() == STRING) {
+ return value.mValue.str_value.c_str();
+ } else {
+ *err = BAD_TYPE;
+ return 0;
+ }
+ }
+ if ((size_t)value.mField.getPosAtDepth(0) > key) {
+ break;
}
}
+
+ *err = BAD_INDEX;
+ return NULL;
}
bool LogEvent::GetBool(size_t key, status_t* err) const {
- DimensionsValue value;
- if (!GetSimpleAtomDimensionsValueProto(key, &value)) {
- *err = BAD_INDEX;
- return 0;
- }
- const DimensionsValue* leafValue = getSingleLeafValue(&value);
- switch (leafValue->value_case()) {
- case DimensionsValue::ValueCase::kValueInt:
- return leafValue->value_int() != 0;
- case DimensionsValue::ValueCase::kValueLong:
- return leafValue->value_long() != 0;
- case DimensionsValue::ValueCase::kValueBool:
- return leafValue->value_bool();
- case DimensionsValue::ValueCase::kValueFloat:
- return leafValue->value_float() != 0;
- case DimensionsValue::ValueCase::kValueTuple:
- case DimensionsValue::ValueCase::kValueStr:
- case DimensionsValue::ValueCase::VALUE_NOT_SET: {
- *err = BAD_TYPE;
- return 0;
+ int field = getSimpleField(key);
+ for (const auto& value : mValues) {
+ if (value.mField.getField() == field) {
+ if (value.mValue.getType() == INT) {
+ return value.mValue.int_value != 0;
+ } else if (value.mValue.getType() == LONG) {
+ return value.mValue.long_value != 0;
+ } else {
+ *err = BAD_TYPE;
+ return false;
+ }
+ }
+ if ((size_t)value.mField.getPosAtDepth(0) > key) {
+ break;
}
}
+
+ *err = BAD_INDEX;
+ return false;
}
float LogEvent::GetFloat(size_t key, status_t* err) const {
- DimensionsValue value;
- if (!GetSimpleAtomDimensionsValueProto(key, &value)) {
- *err = BAD_INDEX;
- return 0;
- }
- const DimensionsValue* leafValue = getSingleLeafValue(&value);
- switch (leafValue->value_case()) {
- case DimensionsValue::ValueCase::kValueInt:
- return (float)leafValue->value_int();
- case DimensionsValue::ValueCase::kValueLong:
- return (float)leafValue->value_long();
- case DimensionsValue::ValueCase::kValueBool:
- return leafValue->value_bool() ? 1.0f : 0.0f;
- case DimensionsValue::ValueCase::kValueFloat:
- return leafValue->value_float();
- case DimensionsValue::ValueCase::kValueTuple:
- case DimensionsValue::ValueCase::kValueStr:
- case DimensionsValue::ValueCase::VALUE_NOT_SET: {
- *err = BAD_TYPE;
- return 0;
+ int field = getSimpleField(key);
+ for (const auto& value : mValues) {
+ if (value.mField.getField() == field) {
+ if (value.mValue.getType() == FLOAT) {
+ return value.mValue.float_value;
+ } else {
+ *err = BAD_TYPE;
+ return 0.0;
+ }
+ }
+ if ((size_t)value.mField.getPosAtDepth(0) > key) {
+ break;
}
}
-}
-void LogEvent::GetAtomDimensionsValueProtos(const FieldMatcher& matcher,
- std::vector<DimensionsValue> *dimensionsValues) const {
- findDimensionsValues(mFieldValueMap, matcher, dimensionsValues);
-}
-
-bool LogEvent::GetAtomDimensionsValueProto(const FieldMatcher& matcher,
- DimensionsValue* dimensionsValue) const {
- std::vector<DimensionsValue> rootDimensionsValues;
- findDimensionsValues(mFieldValueMap, matcher, &rootDimensionsValues);
- if (rootDimensionsValues.size() != 1) {
- return false;
- }
- *dimensionsValue = rootDimensionsValues.front();
- return true;
-}
-
-bool LogEvent::GetSimpleAtomDimensionsValueProto(size_t atomField,
- DimensionsValue* dimensionsValue) const {
- FieldMatcher matcher;
- buildSimpleAtomFieldMatcher(mTagId, atomField, &matcher);
- return GetAtomDimensionsValueProto(matcher, dimensionsValue);
-}
-
-DimensionsValue* LogEvent::findFieldValueOrNull(const Field& field) {
- auto it = mFieldValueMap.find(field);
- if (it == mFieldValueMap.end()) {
- return nullptr;
- }
- return &it->second;
+ *err = BAD_INDEX;
+ return 0.0;
}
string LogEvent::ToString() const {
ostringstream result;
result << "{ " << mTimestampNs << " (" << mTagId << ")";
- for (const auto& itr : mFieldValueMap) {
- result << FieldToString(itr.first);
+ for (const auto& value : mValues) {
+ result << StringPrintf("%#x", value.mField.getField());
result << "->";
- result << DimensionsValueToString(itr.second);
+ result << value.mValue.toString();
result << " ";
}
result << " }";
@@ -433,7 +380,7 @@
}
void LogEvent::ToProto(ProtoOutputStream& protoOutput) const {
- writeFieldValueTreeToStream(getFieldValueMap(), &protoOutput);
+ writeFieldValueTreeToStream(mTagId, getValues(), &protoOutput);
}
} // namespace statsd