Use eBPF-based time-in-state monitoring for groups of threads
Bug: 169279846
Test: atest libtimeinstate_test
Change-Id: I69de04cd93f065e4e2e4d78b7bb5a35fea8811ca
diff --git a/libs/cputimeinstate/Android.bp b/libs/cputimeinstate/Android.bp
index b1943a4..e3cd085 100644
--- a/libs/cputimeinstate/Android.bp
+++ b/libs/cputimeinstate/Android.bp
@@ -33,5 +33,6 @@
"-Wall",
"-Wextra",
],
+ require_root: true,
}
diff --git a/libs/cputimeinstate/cputimeinstate.cpp b/libs/cputimeinstate/cputimeinstate.cpp
index 6058430..4209dc5 100644
--- a/libs/cputimeinstate/cputimeinstate.cpp
+++ b/libs/cputimeinstate/cputimeinstate.cpp
@@ -59,6 +59,7 @@
static unique_fd gTisMapFd;
static unique_fd gConcurrentMapFd;
static unique_fd gUidLastUpdateMapFd;
+static unique_fd gPidTisMapFd;
static std::optional<std::vector<uint32_t>> readNumbersFromFile(const std::string &path) {
std::string data;
@@ -139,6 +140,12 @@
unique_fd{bpf_obj_get(BPF_FS_PATH "map_time_in_state_uid_last_update_map")};
if (gUidLastUpdateMapFd < 0) return false;
+ gPidTisMapFd = unique_fd{mapRetrieveRO(BPF_FS_PATH "map_time_in_state_pid_time_in_state_map")};
+ if (gPidTisMapFd < 0) return false;
+
+ unique_fd trackedPidMapFd(mapRetrieveWO(BPF_FS_PATH "map_time_in_state_pid_tracked_map"));
+ if (trackedPidMapFd < 0) return false;
+
gInitialized = true;
return true;
}
@@ -222,7 +229,8 @@
}
gTracking = attachTracepointProgram("sched", "sched_switch") &&
- attachTracepointProgram("power", "cpu_frequency");
+ attachTracepointProgram("power", "cpu_frequency") &&
+ attachTracepointProgram("sched", "sched_process_free");
return gTracking;
}
@@ -502,5 +510,106 @@
return true;
}
+bool startTrackingProcessCpuTimes(pid_t pid) {
+ if (!gInitialized && !initGlobals()) return false;
+
+ unique_fd trackedPidHashMapFd(
+ mapRetrieveWO(BPF_FS_PATH "map_time_in_state_pid_tracked_hash_map"));
+ if (trackedPidHashMapFd < 0) return false;
+
+ unique_fd trackedPidMapFd(mapRetrieveWO(BPF_FS_PATH "map_time_in_state_pid_tracked_map"));
+ if (trackedPidMapFd < 0) return false;
+
+ for (uint32_t index = 0; index < MAX_TRACKED_PIDS; index++) {
+ // Find first available [index, pid] entry in the pid_tracked_hash_map map
+ if (writeToMapEntry(trackedPidHashMapFd, &index, &pid, BPF_NOEXIST) != 0) {
+ if (errno != EEXIST) {
+ return false;
+ }
+ continue; // This index is already taken
+ }
+
+ tracked_pid_t tracked_pid = {.pid = pid, .state = TRACKED_PID_STATE_ACTIVE};
+ if (writeToMapEntry(trackedPidMapFd, &index, &tracked_pid, BPF_ANY) != 0) {
+ return false;
+ }
+ return true;
+ }
+ return false;
+}
+
+// Marks the specified task identified by its PID (aka TID) for CPU time-in-state tracking
+// aggregated with other tasks sharing the same TGID and aggregation key.
+bool startAggregatingTaskCpuTimes(pid_t pid, uint16_t aggregationKey) {
+ if (!gInitialized && !initGlobals()) return false;
+
+ unique_fd taskAggregationMapFd(
+ mapRetrieveWO(BPF_FS_PATH "map_time_in_state_pid_task_aggregation_map"));
+ if (taskAggregationMapFd < 0) return false;
+
+ return writeToMapEntry(taskAggregationMapFd, &pid, &aggregationKey, BPF_ANY) == 0;
+}
+
+// Retrieves the times in ns that each thread spent running at each CPU freq, aggregated by
+// aggregation key.
+// Return contains no value on error, otherwise it contains a map from aggregation keys
+// to vectors of vectors using the format:
+// { aggKey0 -> [[t0_0_0, t0_0_1, ...], [t0_1_0, t0_1_1, ...], ...],
+// aggKey1 -> [[t1_0_0, t1_0_1, ...], [t1_1_0, t1_1_1, ...], ...], ... }
+// where ti_j_k is the ns tid i spent running on the jth cluster at the cluster's kth lowest freq.
+std::optional<std::unordered_map<uint16_t, std::vector<std::vector<uint64_t>>>>
+getAggregatedTaskCpuFreqTimes(pid_t tgid, const std::vector<uint16_t> &aggregationKeys) {
+ if (!gInitialized && !initGlobals()) return {};
+
+ uint32_t maxFreqCount = 0;
+ std::vector<std::vector<uint64_t>> mapFormat;
+ for (const auto &freqList : gPolicyFreqs) {
+ if (freqList.size() > maxFreqCount) maxFreqCount = freqList.size();
+ mapFormat.emplace_back(freqList.size(), 0);
+ }
+
+ bool dataCollected = false;
+ std::unordered_map<uint16_t, std::vector<std::vector<uint64_t>>> map;
+ std::vector<tis_val_t> vals(gNCpus);
+ for (uint16_t aggregationKey : aggregationKeys) {
+ map.emplace(aggregationKey, mapFormat);
+
+ aggregated_task_tis_key_t key{.tgid = tgid, .aggregation_key = aggregationKey};
+ for (key.bucket = 0; key.bucket <= (maxFreqCount - 1) / FREQS_PER_ENTRY; ++key.bucket) {
+ if (findMapEntry(gPidTisMapFd, &key, vals.data()) != 0) {
+ if (errno != ENOENT) {
+ return {};
+ }
+ continue;
+ } else {
+ dataCollected = true;
+ }
+
+ // Combine data by aggregating time-in-state data grouped by CPU cluster aka policy.
+ uint32_t offset = key.bucket * FREQS_PER_ENTRY;
+ uint32_t nextOffset = offset + FREQS_PER_ENTRY;
+ for (uint32_t j = 0; j < gNPolicies; ++j) {
+ if (offset >= gPolicyFreqs[j].size()) continue;
+ auto begin = map[key.aggregation_key][j].begin() + offset;
+ auto end = nextOffset < gPolicyFreqs[j].size() ? begin + FREQS_PER_ENTRY
+ : map[key.aggregation_key][j].end();
+ for (const auto &cpu : gPolicyCpus[j]) {
+ std::transform(begin, end, std::begin(vals[cpu].ar), begin,
+ std::plus<uint64_t>());
+ }
+ }
+ }
+ }
+
+ if (!dataCollected) {
+ // Check if eBPF is supported on this device. If it is, gTisMap should not be empty.
+ time_key_t key;
+ if (getFirstMapKey(gTisMapFd, &key) != 0) {
+ return {};
+ }
+ }
+ return map;
+}
+
} // namespace bpf
} // namespace android
diff --git a/libs/cputimeinstate/cputimeinstate.h b/libs/cputimeinstate/cputimeinstate.h
index b7600f5..87a328a 100644
--- a/libs/cputimeinstate/cputimeinstate.h
+++ b/libs/cputimeinstate/cputimeinstate.h
@@ -41,5 +41,10 @@
getUidsUpdatedConcurrentTimes(uint64_t *lastUpdate);
bool clearUidTimes(unsigned int uid);
+bool startTrackingProcessCpuTimes(pid_t pid);
+bool startAggregatingTaskCpuTimes(pid_t pid, uint16_t aggregationKey);
+std::optional<std::unordered_map<uint16_t, std::vector<std::vector<uint64_t>>>>
+getAggregatedTaskCpuFreqTimes(pid_t pid, const std::vector<uint16_t> &aggregationKeys);
+
} // namespace bpf
} // namespace android
diff --git a/libs/cputimeinstate/testtimeinstate.cpp b/libs/cputimeinstate/testtimeinstate.cpp
index 0d5f412..519689b 100644
--- a/libs/cputimeinstate/testtimeinstate.cpp
+++ b/libs/cputimeinstate/testtimeinstate.cpp
@@ -19,6 +19,8 @@
#include <sys/sysinfo.h>
+#include <pthread.h>
+#include <semaphore.h>
#include <numeric>
#include <unordered_map>
#include <vector>
@@ -504,5 +506,85 @@
for (size_t i = 0; i < freqs->size(); ++i) EXPECT_EQ((*freqs)[i].size(), (*times)[i].size());
}
+uint64_t timeNanos() {
+ struct timespec spec;
+ clock_gettime(CLOCK_MONOTONIC, &spec);
+ return spec.tv_sec * 1000000000 + spec.tv_nsec;
+}
+
+// Keeps CPU busy with some number crunching
+void useCpu() {
+ long sum = 0;
+ for (int i = 0; i < 100000; i++) {
+ sum *= i;
+ }
+}
+
+sem_t pingsem, pongsem;
+
+void *testThread(void *) {
+ for (int i = 0; i < 10; i++) {
+ sem_wait(&pingsem);
+ useCpu();
+ sem_post(&pongsem);
+ }
+ return nullptr;
+}
+
+TEST(TimeInStateTest, GetAggregatedTaskCpuFreqTimes) {
+ uint64_t startTimeNs = timeNanos();
+
+ sem_init(&pingsem, 0, 1);
+ sem_init(&pongsem, 0, 0);
+
+ pthread_t thread;
+ ASSERT_EQ(pthread_create(&thread, NULL, &testThread, NULL), 0);
+
+ // This process may have been running for some time, so when we start tracking
+ // CPU time, the very first switch may include the accumulated time.
+ // Yield the remainder of this timeslice to the newly created thread.
+ sem_wait(&pongsem);
+ sem_post(&pingsem);
+
+ pid_t tgid = getpid();
+ startTrackingProcessCpuTimes(tgid);
+
+ pid_t tid = pthread_gettid_np(thread);
+ startAggregatingTaskCpuTimes(tid, 42);
+
+ // Play ping-pong with the other thread to ensure that both threads get
+ // some CPU time.
+ for (int i = 0; i < 9; i++) {
+ sem_wait(&pongsem);
+ useCpu();
+ sem_post(&pingsem);
+ }
+
+ pthread_join(thread, NULL);
+
+ std::optional<std::unordered_map<uint16_t, std::vector<std::vector<uint64_t>>>> optionalMap =
+ getAggregatedTaskCpuFreqTimes(tgid, {0, 42});
+ ASSERT_TRUE(optionalMap);
+
+ std::unordered_map<uint16_t, std::vector<std::vector<uint64_t>>> map = *optionalMap;
+ ASSERT_EQ(map.size(), 2u);
+
+ uint64_t testDurationNs = timeNanos() - startTimeNs;
+ for (auto pair : map) {
+ uint16_t aggregationKey = pair.first;
+ ASSERT_TRUE(aggregationKey == 0 || aggregationKey == 42);
+
+ std::vector<std::vector<uint64_t>> timesInState = pair.second;
+ uint64_t totalCpuTime = 0;
+ for (size_t i = 0; i < timesInState.size(); i++) {
+ for (size_t j = 0; j < timesInState[i].size(); j++) {
+ totalCpuTime += timesInState[i][j];
+ }
+ }
+ ASSERT_GT(totalCpuTime, 0ul);
+ ASSERT_LE(totalCpuTime, testDurationNs);
+ }
+}
+
} // namespace bpf
} // namespace android