Expose shared timeline counters into telemetry:
* Add UID into both shared timeline and timestats tracking: due to BLAST
APIs, layer name is insufficient for identifying applications
* Plumb through jank counters from shared timeline into timestats and WW
* Fixed bug where SurfaceflingerDeadlineMissed was not being tracked.
One caveat is that transactions are tracked in shared timeline, so as
a consequence timestats will start tracking them as well.
Bug: 171309796
Test: builds, boots
Test: statsd_testdrive
Test: libsurfacefinger_unittest
Test: dumpsys SurfaceFlinger --timestats -dump
Change-Id: I71057c0976ce81bbb605e126cb30b9d6f06c5873
diff --git a/services/surfaceflinger/TimeStats/TimeStats.cpp b/services/surfaceflinger/TimeStats/TimeStats.cpp
index 37194c6..fe9e737 100644
--- a/services/surfaceflinger/TimeStats/TimeStats.cpp
+++ b/services/surfaceflinger/TimeStats/TimeStats.cpp
@@ -14,9 +14,6 @@
* limitations under the License.
*/
-// TODO(b/129481165): remove the #pragma below and fix conversion issues
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wconversion"
#undef LOG_TAG
#define LOG_TAG "TimeStats"
#define ATRACE_TAG ATRACE_TAG_GRAPHICS
@@ -33,6 +30,8 @@
#include <algorithm>
#include <chrono>
+#include "timestatsproto/TimeStatsHelper.h"
+
namespace android {
namespace impl {
@@ -115,6 +114,13 @@
mMaxPulledHistogramBuckets);
mStatsDelegate->statsEventWriteByteArray(event, (const uint8_t*)renderEngineTimingBytes.c_str(),
renderEngineTimingBytes.size());
+
+ mStatsDelegate->statsEventWriteInt32(event, mTimeStats.jankPayload.totalFrames);
+ mStatsDelegate->statsEventWriteInt32(event, mTimeStats.jankPayload.totalJankyFrames);
+ mStatsDelegate->statsEventWriteInt32(event, mTimeStats.jankPayload.totalSFLongCpu);
+ mStatsDelegate->statsEventWriteInt32(event, mTimeStats.jankPayload.totalSFLongGpu);
+ mStatsDelegate->statsEventWriteInt32(event, mTimeStats.jankPayload.totalSFUnattributed);
+ mStatsDelegate->statsEventWriteInt32(event, mTimeStats.jankPayload.totalAppUnattributed);
mStatsDelegate->statsEventBuild(event);
clearGlobalLocked();
@@ -160,6 +166,13 @@
mStatsDelegate->statsEventWriteInt64(event, layer->lateAcquireFrames);
mStatsDelegate->statsEventWriteInt64(event, layer->badDesiredPresentFrames);
+ mStatsDelegate->statsEventWriteInt32(event, layer->uid);
+ mStatsDelegate->statsEventWriteInt32(event, layer->jankPayload.totalFrames);
+ mStatsDelegate->statsEventWriteInt32(event, layer->jankPayload.totalJankyFrames);
+ mStatsDelegate->statsEventWriteInt32(event, layer->jankPayload.totalSFLongCpu);
+ mStatsDelegate->statsEventWriteInt32(event, layer->jankPayload.totalSFLongGpu);
+ mStatsDelegate->statsEventWriteInt32(event, layer->jankPayload.totalSFUnattributed);
+ mStatsDelegate->statsEventWriteInt32(event, layer->jankPayload.totalAppUnattributed);
mStatsDelegate->statsEventBuild(event);
}
@@ -397,11 +410,13 @@
timeRecords[0].frameTime.frameNumber, timeRecords[0].frameTime.presentTime);
if (prevTimeRecord.ready) {
+ uid_t uid = layerRecord.uid;
const std::string& layerName = layerRecord.layerName;
- if (!mTimeStats.stats.count(layerName)) {
- mTimeStats.stats[layerName].layerName = layerName;
+ if (!mTimeStats.stats.count({uid, layerName})) {
+ mTimeStats.stats[{uid, layerName}].uid = uid;
+ mTimeStats.stats[{uid, layerName}].layerName = layerName;
}
- TimeStatsHelper::TimeStatsLayer& timeStatsLayer = mTimeStats.stats[layerName];
+ TimeStatsHelper::TimeStatsLayer& timeStatsLayer = mTimeStats.stats[{uid, layerName}];
timeStatsLayer.totalFrames++;
timeStatsLayer.droppedFrames += layerRecord.droppedFrames;
timeStatsLayer.lateAcquireFrames += layerRecord.lateAcquireFrames;
@@ -462,8 +477,13 @@
layerName.compare(0, kMinLenLayerName, kPopupWindowPrefix) != 0;
}
+bool TimeStats::canAddNewAggregatedStats(uid_t uid, const std::string& layerName) {
+ return mTimeStats.stats.count({uid, layerName}) > 0 ||
+ mTimeStats.stats.size() < MAX_NUM_LAYER_STATS;
+}
+
void TimeStats::setPostTime(int32_t layerId, uint64_t frameNumber, const std::string& layerName,
- nsecs_t postTime) {
+ uid_t uid, nsecs_t postTime) {
if (!mEnabled.load()) return;
ATRACE_CALL();
@@ -471,11 +491,12 @@
postTime);
std::lock_guard<std::mutex> lock(mMutex);
- if (!mTimeStats.stats.count(layerName) && mTimeStats.stats.size() >= MAX_NUM_LAYER_STATS) {
+ if (!canAddNewAggregatedStats(uid, layerName)) {
return;
}
if (!mTimeStatsTracker.count(layerId) && mTimeStatsTracker.size() < MAX_NUM_LAYER_RECORDS &&
layerNameIsValid(layerName)) {
+ mTimeStatsTracker[layerId].uid = uid;
mTimeStatsTracker[layerId].layerName = layerName;
}
if (!mTimeStatsTracker.count(layerId)) return;
@@ -655,6 +676,66 @@
flushAvailableRecordsToStatsLocked(layerId);
}
+template <class T>
+static void updateJankPayload(T& t, int32_t reasons) {
+ t.jankPayload.totalFrames++;
+
+ static const constexpr int32_t kValidJankyReason =
+ TimeStats::JankType::SurfaceFlingerDeadlineMissed |
+ TimeStats::JankType::SurfaceFlingerGpuDeadlineMissed |
+ TimeStats::JankType::AppDeadlineMissed | TimeStats::JankType::Display;
+ if (reasons & kValidJankyReason) {
+ t.jankPayload.totalJankyFrames++;
+ if ((reasons & TimeStats::JankType::SurfaceFlingerDeadlineMissed) != 0) {
+ t.jankPayload.totalSFLongCpu++;
+ }
+ if ((reasons & TimeStats::JankType::SurfaceFlingerGpuDeadlineMissed) != 0) {
+ t.jankPayload.totalSFLongGpu++;
+ }
+ if ((reasons & TimeStats::JankType::Display) != 0) {
+ t.jankPayload.totalSFUnattributed++;
+ }
+ if ((reasons & TimeStats::JankType::AppDeadlineMissed) != 0) {
+ t.jankPayload.totalAppUnattributed++;
+ }
+ }
+}
+
+void TimeStats::incrementJankyFrames(int32_t reasons) {
+ if (!mEnabled.load()) return;
+
+ ATRACE_CALL();
+ std::lock_guard<std::mutex> lock(mMutex);
+
+ updateJankPayload<TimeStatsHelper::TimeStatsGlobal>(mTimeStats, reasons);
+}
+
+void TimeStats::incrementJankyFrames(uid_t uid, const std::string& layerName, int32_t reasons) {
+ if (!mEnabled.load()) return;
+
+ ATRACE_CALL();
+ std::lock_guard<std::mutex> lock(mMutex);
+
+ // Only update layer stats if we're allowed to do so.
+ // As an implementation detail, we do this because this method is expected to be
+ // called from FrameTimeline, which is allowed to do jank analysis well after a frame is
+ // presented. This means that we can't rely on TimeStats to flush layer records over to the
+ // aggregated stats.
+ if (!canAddNewAggregatedStats(uid, layerName)) {
+ return;
+ }
+
+ // Defensively initialize the stats in case FrameTimeline flushes its signaled present fences
+ // before TimeStats does.
+ if (!mTimeStats.stats.count({uid, layerName})) {
+ mTimeStats.stats[{uid, layerName}].uid = uid;
+ mTimeStats.stats[{uid, layerName}].layerName = layerName;
+ }
+
+ TimeStatsHelper::TimeStatsLayer& timeStatsLayer = mTimeStats.stats[{uid, layerName}];
+ updateJankPayload<TimeStatsHelper::TimeStatsLayer>(timeStatsLayer, reasons);
+}
+
void TimeStats::onDestroy(int32_t layerId) {
ATRACE_CALL();
ALOGV("[%d]-onDestroy", layerId);
@@ -860,6 +941,7 @@
mTimeStats.presentToPresent.hist.clear();
mTimeStats.frameDuration.hist.clear();
mTimeStats.renderEngineTiming.hist.clear();
+ mTimeStats.jankPayload = TimeStatsHelper::JankPayload();
mTimeStats.refreshRateStats.clear();
mPowerTime.prevTime = systemTime();
mGlobalRecord.prevPresentTime = 0;
@@ -905,6 +987,3 @@
} // namespace impl
} // namespace android
-
-// TODO(b/129481165): remove the #pragma below and fix conversion issues
-#pragma clang diagnostic pop // ignored "-Wconversion"
diff --git a/services/surfaceflinger/TimeStats/TimeStats.h b/services/surfaceflinger/TimeStats/TimeStats.h
index 8de5d0c..4fa0a02 100644
--- a/services/surfaceflinger/TimeStats/TimeStats.h
+++ b/services/surfaceflinger/TimeStats/TimeStats.h
@@ -17,6 +17,7 @@
#pragma once
// TODO(b/129481165): remove the #pragma below and fix conversion issues
+#include <cstdint>
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wconversion"
@@ -85,7 +86,7 @@
const std::shared_ptr<FenceTime>& readyFence) = 0;
virtual void setPostTime(int32_t layerId, uint64_t frameNumber, const std::string& layerName,
- nsecs_t postTime) = 0;
+ uid_t uid, nsecs_t postTime) = 0;
virtual void setLatchTime(int32_t layerId, uint64_t frameNumber, nsecs_t latchTime) = 0;
// Reasons why latching a particular buffer may be skipped
enum class LatchSkipReason {
@@ -108,6 +109,40 @@
virtual void setPresentTime(int32_t layerId, uint64_t frameNumber, nsecs_t presentTime) = 0;
virtual void setPresentFence(int32_t layerId, uint64_t frameNumber,
const std::shared_ptr<FenceTime>& presentFence) = 0;
+
+ // Subset of jank metadata tracked by FrameTimeline for the purpose of funneling to telemetry.
+ enum JankType {
+ // No Jank
+ None = 0x0,
+ // Jank not related to SurfaceFlinger or the App
+ Display = 0x1,
+ // SF took too long on the CPU
+ SurfaceFlingerDeadlineMissed = 0x2,
+ // SF took too long on the GPU
+ SurfaceFlingerGpuDeadlineMissed = 0x4,
+ // Either App or GPU took too long on the frame
+ AppDeadlineMissed = 0x8,
+ // Predictions live for 120ms, if prediction is expired for a frame, there is definitely a
+ // jank
+ // associated with the App if this is for a SurfaceFrame, and SF for a DisplayFrame.
+ PredictionExpired = 0x10,
+ // Latching a buffer early might cause an early present of the frame
+ SurfaceFlingerEarlyLatch = 0x20,
+ };
+
+ // Increments janky frames, tracked globally. Because FrameTimeline is the infrastructure
+ // responsible for computing jank in the system, this is expected to be called from
+ // FrameTimeline, rather than directly from SurfaceFlinger or individual layers. If there are no
+ // jank reasons, then total frames are incremented but jank is not, for accurate accounting of
+ // janky frames.
+ virtual void incrementJankyFrames(int32_t reasons) = 0;
+ // Increments janky frames, blamed to the provided {uid, layerName} key, with JankMetadata as
+ // supplementary reasons for the jank. Because FrameTimeline is the infrastructure responsible
+ // for computing jank in the system, this is expected to be called from FrameTimeline, rather
+ // than directly from SurfaceFlinger or individual layers.
+ // If there are no jank reasons, then total frames are incremented but jank is not, for accurate
+ // accounting of janky frames.
+ virtual void incrementJankyFrames(uid_t uid, const std::string& layerName, int32_t reasons) = 0;
// Clean up the layer record
virtual void onDestroy(int32_t layerId) = 0;
// If SF skips or rejects a buffer, remove the corresponding TimeRecord.
@@ -142,6 +177,7 @@
};
struct LayerRecord {
+ uid_t uid;
std::string layerName;
// This is the index in timeRecords, at which the timestamps for that
// specific frame are still not fully received. This is not waiting for
@@ -241,7 +277,7 @@
void recordRenderEngineDuration(nsecs_t startTime,
const std::shared_ptr<FenceTime>& readyFence) override;
- void setPostTime(int32_t layerId, uint64_t frameNumber, const std::string& layerName,
+ void setPostTime(int32_t layerId, uint64_t frameNumber, const std::string& layerName, uid_t uid,
nsecs_t postTime) override;
void setLatchTime(int32_t layerId, uint64_t frameNumber, nsecs_t latchTime) override;
void incrementLatchSkipped(int32_t layerId, LatchSkipReason reason) override;
@@ -253,6 +289,8 @@
void setPresentTime(int32_t layerId, uint64_t frameNumber, nsecs_t presentTime) override;
void setPresentFence(int32_t layerId, uint64_t frameNumber,
const std::shared_ptr<FenceTime>& presentFence) override;
+ void incrementJankyFrames(int32_t reasons) override;
+ void incrementJankyFrames(uid_t uid, const std::string& layerName, int32_t reasons) override;
// Clean up the layer record
void onDestroy(int32_t layerId) override;
// If SF skips or rejects a buffer, remove the corresponding TimeRecord.
@@ -276,6 +314,7 @@
void flushAvailableRecordsToStatsLocked(int32_t layerId);
void flushPowerTimeLocked();
void flushAvailableGlobalRecordsToStatsLocked();
+ bool canAddNewAggregatedStats(uid_t uid, const std::string& layerName);
void enable();
void disable();
diff --git a/services/surfaceflinger/TimeStats/timestatsproto/TimeStatsHelper.cpp b/services/surfaceflinger/TimeStats/timestatsproto/TimeStatsHelper.cpp
index d77387a..0fb748f 100644
--- a/services/surfaceflinger/TimeStats/timestatsproto/TimeStatsHelper.cpp
+++ b/services/surfaceflinger/TimeStats/timestatsproto/TimeStatsHelper.cpp
@@ -77,14 +77,28 @@
return result;
}
+std::string TimeStatsHelper::JankPayload::toString() const {
+ std::string result;
+ StringAppendF(&result, "totalTimelineFrames = %d\n", totalFrames);
+ StringAppendF(&result, "jankyFrames = %d\n", totalJankyFrames);
+ StringAppendF(&result, "sfLongCpuJankyFrames = %d\n", totalSFLongCpu);
+ StringAppendF(&result, "sfLongGpuJankyFrames = %d\n", totalSFLongGpu);
+ StringAppendF(&result, "sfUnattributedJankyFrame = %d\n", totalSFUnattributed);
+ StringAppendF(&result, "appUnattributedJankyFrame = %d\n", totalAppUnattributed);
+ return result;
+}
+
std::string TimeStatsHelper::TimeStatsLayer::toString() const {
std::string result = "\n";
+ StringAppendF(&result, "uid = %d\n", uid);
StringAppendF(&result, "layerName = %s\n", layerName.c_str());
StringAppendF(&result, "packageName = %s\n", packageName.c_str());
StringAppendF(&result, "totalFrames = %d\n", totalFrames);
StringAppendF(&result, "droppedFrames = %d\n", droppedFrames);
StringAppendF(&result, "lateAcquireFrames = %d\n", lateAcquireFrames);
StringAppendF(&result, "badDesiredPresentFrames = %d\n", badDesiredPresentFrames);
+ result.append("Jank payload for this layer:\n");
+ result.append(jankPayload.toString());
const auto iter = deltas.find("present2present");
if (iter != deltas.end()) {
const float averageTime = iter->second.averageTime();
@@ -110,6 +124,8 @@
StringAppendF(&result, "refreshRateSwitches = %d\n", refreshRateSwitches);
StringAppendF(&result, "compositionStrategyChanges = %d\n", compositionStrategyChanges);
StringAppendF(&result, "displayOnTime = %" PRId64 " ms\n", displayOnTime);
+ result.append("Global aggregated jank payload:\n");
+ result.append(jankPayload.toString());
StringAppendF(&result, "displayConfigStats is as below:\n");
for (const auto& [fps, duration] : refreshRateStats) {
StringAppendF(&result, "%dfps = %ldms\n", fps, ns2ms(duration));
diff --git a/services/surfaceflinger/TimeStats/timestatsproto/include/timestatsproto/TimeStatsHelper.h b/services/surfaceflinger/TimeStats/timestatsproto/include/timestatsproto/TimeStatsHelper.h
index 0c75f96..033eb5d 100644
--- a/services/surfaceflinger/TimeStats/timestatsproto/include/timestatsproto/TimeStatsHelper.h
+++ b/services/surfaceflinger/TimeStats/timestatsproto/include/timestatsproto/TimeStatsHelper.h
@@ -40,14 +40,28 @@
std::string toString() const;
};
+ struct JankPayload {
+ // note that transactions are counted for these frames.
+ int32_t totalFrames = 0;
+ int32_t totalJankyFrames = 0;
+ int32_t totalSFLongCpu = 0;
+ int32_t totalSFLongGpu = 0;
+ int32_t totalSFUnattributed = 0;
+ int32_t totalAppUnattributed = 0;
+
+ std::string toString() const;
+ };
+
class TimeStatsLayer {
public:
+ uid_t uid;
std::string layerName;
std::string packageName;
int32_t totalFrames = 0;
int32_t droppedFrames = 0;
int32_t lateAcquireFrames = 0;
int32_t badDesiredPresentFrames = 0;
+ JankPayload jankPayload;
std::unordered_map<std::string, Histogram> deltas;
std::string toString() const;
@@ -69,8 +83,17 @@
Histogram presentToPresent;
Histogram frameDuration;
Histogram renderEngineTiming;
- std::unordered_map<std::string, TimeStatsLayer> stats;
+
+ struct StatsHasher {
+ size_t operator()(const std::pair<uid_t, std::string>& p) const {
+ // Normally this isn't a very good hash function due to symmetry reasons,
+ // but these are distinct types so this should be good enough
+ return std::hash<uid_t>{}(p.first) ^ std::hash<std::string>{}(p.second);
+ }
+ };
+ std::unordered_map<std::pair<uid_t, std::string>, TimeStatsLayer, StatsHasher> stats;
std::unordered_map<uint32_t, nsecs_t> refreshRateStats;
+ JankPayload jankPayload;
std::string toString(std::optional<uint32_t> maxLayers) const;
SFTimeStatsGlobalProto toProto(std::optional<uint32_t> maxLayers) const;