Upload GPU frequency info via statsd
Bug: b/213577484
Change-Id: I6a1035932db3426e69aad81df8b2d03f6b37c9d6
diff --git a/services/gpuservice/gpuwork/Android.bp b/services/gpuservice/gpuwork/Android.bp
index e6bad47..89b31a6 100644
--- a/services/gpuservice/gpuwork/Android.bp
+++ b/services/gpuservice/gpuwork/Android.bp
@@ -26,10 +26,13 @@
],
shared_libs: [
"libbase",
+ "libbinder",
"libbpf_bcc",
"libbpf_android",
"libcutils",
"liblog",
+ "libstatslog",
+ "libstatspull",
"libutils",
],
export_include_dirs: [
@@ -41,6 +44,7 @@
export_shared_lib_headers: [
"libbase",
"libbpf_android",
+ "libstatspull",
],
cppflags: [
"-Wall",
diff --git a/services/gpuservice/gpuwork/GpuWork.cpp b/services/gpuservice/gpuwork/GpuWork.cpp
index e2b7d34..e7b1cd4 100644
--- a/services/gpuservice/gpuwork/GpuWork.cpp
+++ b/services/gpuservice/gpuwork/GpuWork.cpp
@@ -21,16 +21,22 @@
#include "gpuwork/GpuWork.h"
#include <android-base/stringprintf.h>
+#include <binder/PermissionCache.h>
#include <bpf/WaitForProgsLoaded.h>
#include <libbpf.h>
#include <libbpf_android.h>
#include <log/log.h>
+#include <random>
+#include <stats_event.h>
+#include <statslog.h>
#include <unistd.h>
#include <utils/Timers.h>
#include <utils/Trace.h>
+#include <bit>
#include <chrono>
#include <cstdint>
+#include <limits>
#include <map>
#include <mutex>
#include <unordered_map>
@@ -58,6 +64,37 @@
return true;
}
+template <typename SourceType>
+inline int32_t cast_int32(SourceType) = delete;
+
+template <typename SourceType>
+inline int32_t bitcast_int32(SourceType) = delete;
+
+template <>
+inline int32_t bitcast_int32<uint32_t>(uint32_t source) {
+ int32_t result;
+ memcpy(&result, &source, sizeof(result));
+ return result;
+}
+
+template <>
+inline int32_t cast_int32<uint64_t>(uint64_t source) {
+ if (source > std::numeric_limits<int32_t>::max()) {
+ return std::numeric_limits<int32_t>::max();
+ }
+ return static_cast<int32_t>(source);
+}
+
+template <>
+inline int32_t cast_int32<long long>(long long source) {
+ if (source > std::numeric_limits<int32_t>::max()) {
+ return std::numeric_limits<int32_t>::max();
+ } else if (source < std::numeric_limits<int32_t>::min()) {
+ return std::numeric_limits<int32_t>::min();
+ }
+ return static_cast<int32_t>(source);
+}
+
} // namespace
using base::StringAppendF;
@@ -76,6 +113,13 @@
mMapClearerThread.join();
}
+ {
+ std::scoped_lock<std::mutex> lock(mMutex);
+ if (mStatsdRegistered) {
+ AStatsManager_clearPullAtomCallback(android::util::GPU_FREQ_TIME_IN_STATE_PER_UID);
+ }
+ }
+
bpf_detach_tracepoint("power", "gpu_work_period");
}
@@ -83,6 +127,8 @@
// Make sure BPF programs are loaded.
bpf::waitForProgsLoaded();
+ waitForPermissions();
+
// Get the BPF maps before trying to attach the BPF program; if we can't get
// the maps then there is no point in attaching the BPF program.
{
@@ -95,6 +141,8 @@
if (!getBpfMap("/sys/fs/bpf/map_gpu_work_gpu_work_global_data", &mGpuWorkGlobalDataMap)) {
return;
}
+
+ mPreviousMapClearTimePoint = std::chrono::steady_clock::now();
}
// Attach the tracepoint ONLY if we got the map above.
@@ -108,6 +156,13 @@
mMapClearerThread.swap(thread);
+ {
+ std::lock_guard<std::mutex> lock(mMutex);
+ AStatsManager_setPullAtomCallback(int32_t{android::util::GPU_FREQ_TIME_IN_STATE_PER_UID},
+ nullptr, GpuWork::pullAtomCallback, this);
+ mStatsdRegistered = true;
+ }
+
ALOGI("Initialized!");
mInitialized.store(true);
@@ -215,6 +270,127 @@
return true;
}
+AStatsManager_PullAtomCallbackReturn GpuWork::pullAtomCallback(int32_t atomTag,
+ AStatsEventList* data,
+ void* cookie) {
+ ATRACE_CALL();
+
+ GpuWork* gpuWork = reinterpret_cast<GpuWork*>(cookie);
+ if (atomTag == android::util::GPU_FREQ_TIME_IN_STATE_PER_UID) {
+ return gpuWork->pullFrequencyAtoms(data);
+ }
+
+ return AStatsManager_PULL_SKIP;
+}
+
+AStatsManager_PullAtomCallbackReturn GpuWork::pullFrequencyAtoms(AStatsEventList* data) {
+ ATRACE_CALL();
+
+ if (!data || !mInitialized.load()) {
+ return AStatsManager_PULL_SKIP;
+ }
+
+ std::lock_guard<std::mutex> lock(mMutex);
+
+ if (!mGpuWorkMap.isValid()) {
+ return AStatsManager_PULL_SKIP;
+ }
+
+ std::unordered_map<Uid, UidTrackingInfo> uidInfos;
+
+ // Iteration of BPF hash maps can be unreliable (no data races, but elements
+ // may be repeated), as the map is typically being modified by other
+ // threads. The buckets are all preallocated. Our eBPF program only updates
+ // entries (in-place) or adds entries. |GpuWork| only iterates or clears the
+ // map while holding |mMutex|. Given this, we should be able to iterate over
+ // all elements reliably. In the worst case, we might see elements more than
+ // once.
+
+ // Note that userspace reads of BPF maps make a copy of the value, and thus
+ // the returned value is not being concurrently accessed by the BPF program
+ // (no atomic reads needed below).
+
+ mGpuWorkMap.iterateWithValue(
+ [&uidInfos](const Uid& key, const UidTrackingInfo& value,
+ const android::bpf::BpfMap<Uid, UidTrackingInfo>&) -> base::Result<void> {
+ uidInfos[key] = value;
+ return {};
+ });
+
+ ALOGI("pullFrequencyAtoms: uidInfos.size() == %zu", uidInfos.size());
+
+ // Get a list of just the UIDs; the order does not matter.
+ std::vector<Uid> uids;
+ for (const auto& pair : uidInfos) {
+ uids.push_back(pair.first);
+ }
+
+ std::random_device device;
+ std::default_random_engine random_engine(device());
+
+ // If we have more than |kNumSampledUids| UIDs, choose |kNumSampledUids|
+ // random UIDs. We swap them to the front of the list. Given the list
+ // indices 0..i..n-1, we have the following inclusive-inclusive ranges:
+ // - [0, i-1] == the randomly chosen elements.
+ // - [i, n-1] == the remaining unchosen elements.
+ if (uids.size() > kNumSampledUids) {
+ for (size_t i = 0; i < kNumSampledUids; ++i) {
+ std::uniform_int_distribution<size_t> uniform_dist(i, uids.size() - 1);
+ size_t random_index = uniform_dist(random_engine);
+ std::swap(uids[i], uids[random_index]);
+ }
+ // Only keep the front |kNumSampledUids| elements.
+ uids.resize(kNumSampledUids);
+ }
+
+ ALOGI("pullFrequencyAtoms: uids.size() == %zu", uids.size());
+
+ auto now = std::chrono::steady_clock::now();
+
+ int32_t duration = cast_int32(
+ std::chrono::duration_cast<std::chrono::seconds>(now - mPreviousMapClearTimePoint)
+ .count());
+
+ for (const Uid uid : uids) {
+ const UidTrackingInfo& info = uidInfos[uid];
+ ALOGI("pullFrequencyAtoms: adding stats for UID %" PRIu32, uid);
+ android::util::addAStatsEvent(data, int32_t{android::util::GPU_FREQ_TIME_IN_STATE_PER_UID},
+ // uid
+ bitcast_int32(uid),
+ // time_duration_seconds
+ int32_t{duration},
+ // max_freq_mhz
+ int32_t{1000},
+ // freq_0_mhz_time_millis
+ cast_int32(info.frequency_times_ns[0] / 1000000),
+ // freq_50_mhz_time_millis
+ cast_int32(info.frequency_times_ns[1] / 1000000),
+ // ... etc. ...
+ cast_int32(info.frequency_times_ns[2] / 1000000),
+ cast_int32(info.frequency_times_ns[3] / 1000000),
+ cast_int32(info.frequency_times_ns[4] / 1000000),
+ cast_int32(info.frequency_times_ns[5] / 1000000),
+ cast_int32(info.frequency_times_ns[6] / 1000000),
+ cast_int32(info.frequency_times_ns[7] / 1000000),
+ cast_int32(info.frequency_times_ns[8] / 1000000),
+ cast_int32(info.frequency_times_ns[9] / 1000000),
+ cast_int32(info.frequency_times_ns[10] / 1000000),
+ cast_int32(info.frequency_times_ns[11] / 1000000),
+ cast_int32(info.frequency_times_ns[12] / 1000000),
+ cast_int32(info.frequency_times_ns[13] / 1000000),
+ cast_int32(info.frequency_times_ns[14] / 1000000),
+ cast_int32(info.frequency_times_ns[15] / 1000000),
+ cast_int32(info.frequency_times_ns[16] / 1000000),
+ cast_int32(info.frequency_times_ns[17] / 1000000),
+ cast_int32(info.frequency_times_ns[18] / 1000000),
+ cast_int32(info.frequency_times_ns[19] / 1000000),
+ // freq_1000_mhz_time_millis
+ cast_int32(info.frequency_times_ns[20] / 1000000));
+ }
+ clearMap();
+ return AStatsManager_PULL_SUCCESS;
+}
+
void GpuWork::periodicallyClearMap() {
std::unique_lock<std::mutex> lock(mMutex);
@@ -264,6 +440,21 @@
return;
}
+ clearMap();
+}
+
+void GpuWork::clearMap() {
+ if (!mInitialized.load() || !mGpuWorkMap.isValid() || !mGpuWorkGlobalDataMap.isValid()) {
+ ALOGW("Map clearing could not occur because we are not initialized properly");
+ return;
+ }
+
+ base::Result<GlobalData> globalData = mGpuWorkGlobalDataMap.readValue(0);
+ if (!globalData.ok()) {
+ ALOGW("Could not read BPF global data map entry");
+ return;
+ }
+
// Iterating BPF maps to delete keys is tricky. If we just repeatedly call
// |getFirstKey()| and delete that, we may loop forever (or for a long time)
// because our BPF program might be repeatedly re-adding UID keys. Also,
@@ -290,6 +481,23 @@
// |writeValue|.
globalData.value().num_map_entries = 0;
mGpuWorkGlobalDataMap.writeValue(0, globalData.value(), BPF_ANY);
+
+ // Update |mPreviousMapClearTimePoint| so we know when we started collecting
+ // the stats.
+ mPreviousMapClearTimePoint = std::chrono::steady_clock::now();
+}
+
+void GpuWork::waitForPermissions() {
+ const String16 permissionRegisterStatsPullAtom(kPermissionRegisterStatsPullAtom);
+ int count = 0;
+ while (!PermissionCache::checkPermission(permissionRegisterStatsPullAtom, getpid(), getuid())) {
+ if (++count > kPermissionsWaitTimeoutSeconds) {
+ ALOGW("Timed out waiting for android.permission.REGISTER_STATS_PULL_ATOM");
+ return;
+ }
+ // Retry.
+ sleep(1);
+ }
}
} // namespace gpuwork
diff --git a/services/gpuservice/gpuwork/include/gpuwork/GpuWork.h b/services/gpuservice/gpuwork/include/gpuwork/GpuWork.h
index 0ef10d0..b6f493d 100644
--- a/services/gpuservice/gpuwork/include/gpuwork/GpuWork.h
+++ b/services/gpuservice/gpuwork/include/gpuwork/GpuWork.h
@@ -17,6 +17,7 @@
#pragma once
#include <bpf/BpfMap.h>
+#include <stats_pull_atom_callback.h>
#include <utils/Mutex.h>
#include <utils/String16.h>
#include <utils/Vector.h>
@@ -49,6 +50,13 @@
static bool attachTracepoint(const char* program_path, const char* tracepoint_group,
const char* tracepoint_name);
+ // Native atom puller callback registered in statsd.
+ static AStatsManager_PullAtomCallbackReturn pullAtomCallback(int32_t atomTag,
+ AStatsEventList* data,
+ void* cookie);
+
+ AStatsManager_PullAtomCallbackReturn pullFrequencyAtoms(AStatsEventList* data);
+
// Periodically calls |clearMapIfNeeded| to clear the |mGpuWorkMap| map, if
// needed.
//
@@ -61,6 +69,14 @@
// it.
void clearMapIfNeeded() REQUIRES(mMutex);
+ // Clears the |mGpuWorkMap| map.
+ void clearMap() REQUIRES(mMutex);
+
+ // Waits for required permissions to become set. This seems to be needed
+ // because platform service permissions might not be set when a service
+ // first starts. See b/214085769.
+ void waitForPermissions();
+
// Indicates whether our eBPF components have been initialized.
std::atomic<bool> mInitialized = false;
@@ -89,6 +105,22 @@
// The wait duration for the map clearer thread; the thread checks the map
// every ~1 hour.
static constexpr uint32_t kMapClearerWaitDurationSeconds = 60 * 60;
+
+ // Whether our |pullAtomCallback| function is registered.
+ bool mStatsdRegistered GUARDED_BY(mMutex) = false;
+
+ // The number of randomly chosen (i.e. sampled) UIDs to log stats for.
+ static constexpr int kNumSampledUids = 10;
+
+ // The previous time point at which |mGpuWorkMap| was cleared.
+ std::chrono::steady_clock::time_point mPreviousMapClearTimePoint GUARDED_BY(mMutex);
+
+ // Permission to register a statsd puller.
+ static constexpr char16_t kPermissionRegisterStatsPullAtom[] =
+ u"android.permission.REGISTER_STATS_PULL_ATOM";
+
+ // Time limit for waiting for permissions.
+ static constexpr int kPermissionsWaitTimeoutSeconds = 30;
};
} // namespace gpuwork