libtimeinstate: fix bug in clearUidCpuFreqTimes
In a preallocated bpf hash table, deleting a map element makes it
available for reuse without clearing the data it holds. This reuse can
occur when a bpf program calls bpf_map_update_elem with a key not
already present in the map. If the map is percpu, bpf_map_update_elem
will overwrite the reused entry's data for the current CPU but not for
other CPUs, resulting in stale data that is now associated with the
wrong key.
For time in state, this can show up as impossible data (e.g. reporting
that a UID ran on one cluster at a frequency only available on a
different cluster), which is detected by the SingleAndAllUidConsistent
test since getUidCpuFreqTimes() only looks up potentially valid keys
while getUidsCpuFreqTimes() iterates through every map entry.
Avoid this problem by zeroing out each entry's data prior to deleting
it from the map. Also modify getUidCpuFreqTimes and
getUidsCpuFreqTimes to check for impossible data and fail if any is
detected, since it's a sign that other map entries may also be wrong.
Bug: 138317993
Test: libtimeinstate_test can now be run repeatedly without
SingleAndAllUidConsistent test eventually failing
Signed-off-by: Connor O'Brien <connoro@google.com>
Change-Id: I17dd407f897d1e86eb85cc99842a581d88e5bc78
diff --git a/libs/cputimeinstate/cputimeinstate.cpp b/libs/cputimeinstate/cputimeinstate.cpp
index 0e68e62..a02b285 100644
--- a/libs/cputimeinstate/cputimeinstate.cpp
+++ b/libs/cputimeinstate/cputimeinstate.cpp
@@ -22,6 +22,7 @@
#include <dirent.h>
#include <errno.h>
#include <inttypes.h>
+#include <sys/sysinfo.h>
#include <mutex>
#include <optional>
@@ -167,9 +168,12 @@
return {};
}
for (uint32_t i = 0; i < gNPolicies; ++i) {
- if (idxs[i] == gPolicyFreqs[i].size() || freq != gPolicyFreqs[i][idxs[i]]) continue;
uint64_t time = 0;
for (uint32_t cpu : gPolicyCpus[i]) time += value.ar[cpu];
+ if (idxs[i] == gPolicyFreqs[i].size() || freq != gPolicyFreqs[i][idxs[i]]) {
+ if (time != 0) return {};
+ else continue;
+ }
idxs[i] += 1;
out[i].emplace_back(time);
}
@@ -209,10 +213,12 @@
}
for (size_t policy = 0; policy < gNPolicies; ++policy) {
- for (const auto &cpu : gPolicyCpus[policy]) {
- auto freqIdx = policyFreqIdxs[policy][key.freq];
- map[key.uid][policy][freqIdx] += val.ar[cpu];
- }
+ uint64_t time = 0;
+ for (const auto &cpu : gPolicyCpus[policy]) time += val.ar[cpu];
+ if (!time) continue;
+ auto it = policyFreqIdxs[policy].find(key.freq);
+ if (it == policyFreqIdxs[policy].end()) return android::netdutils::Status(-1);
+ map[key.uid][policy][it->second] += time;
}
return android::netdutils::status::ok;
};
@@ -225,9 +231,10 @@
if (!gInitialized && !initGlobals()) return false;
time_key_t key = {.uid = uid, .freq = 0};
- std::vector<uint32_t> idxs(gNPolicies, 0);
+ std::vector<uint64_t> vals(get_nprocs_conf(), 0);
for (auto freq : gAllFreqs) {
key.freq = freq;
+ if (writeToMapEntry(gMapFd, &key, vals.data(), BPF_EXIST) && errno != ENOENT) return false;
if (deleteMapEntry(gMapFd, &key) && errno != ENOENT) return false;
}
return true;