Expose shared timeline counters into telemetry:

* Add UID into both shared timeline and timestats tracking: due to BLAST
APIs, layer name is insufficient for identifying applications
* Plumb through jank counters from shared timeline into timestats and WW
* Fixed bug where SurfaceflingerDeadlineMissed was not being tracked.

One caveat is that transactions are tracked in shared timeline, so as
a consequence timestats will start tracking them as well.

Bug: 171309796
Test: builds, boots
Test: statsd_testdrive
Test: libsurfacefinger_unittest
Test: dumpsys SurfaceFlinger --timestats -dump

Change-Id: I71057c0976ce81bbb605e126cb30b9d6f06c5873
diff --git a/services/surfaceflinger/TimeStats/TimeStats.cpp b/services/surfaceflinger/TimeStats/TimeStats.cpp
index 37194c6..fe9e737 100644
--- a/services/surfaceflinger/TimeStats/TimeStats.cpp
+++ b/services/surfaceflinger/TimeStats/TimeStats.cpp
@@ -14,9 +14,6 @@
  * limitations under the License.
  */
 
-// TODO(b/129481165): remove the #pragma below and fix conversion issues
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wconversion"
 #undef LOG_TAG
 #define LOG_TAG "TimeStats"
 #define ATRACE_TAG ATRACE_TAG_GRAPHICS
@@ -33,6 +30,8 @@
 #include <algorithm>
 #include <chrono>
 
+#include "timestatsproto/TimeStatsHelper.h"
+
 namespace android {
 
 namespace impl {
@@ -115,6 +114,13 @@
                                        mMaxPulledHistogramBuckets);
     mStatsDelegate->statsEventWriteByteArray(event, (const uint8_t*)renderEngineTimingBytes.c_str(),
                                              renderEngineTimingBytes.size());
+
+    mStatsDelegate->statsEventWriteInt32(event, mTimeStats.jankPayload.totalFrames);
+    mStatsDelegate->statsEventWriteInt32(event, mTimeStats.jankPayload.totalJankyFrames);
+    mStatsDelegate->statsEventWriteInt32(event, mTimeStats.jankPayload.totalSFLongCpu);
+    mStatsDelegate->statsEventWriteInt32(event, mTimeStats.jankPayload.totalSFLongGpu);
+    mStatsDelegate->statsEventWriteInt32(event, mTimeStats.jankPayload.totalSFUnattributed);
+    mStatsDelegate->statsEventWriteInt32(event, mTimeStats.jankPayload.totalAppUnattributed);
     mStatsDelegate->statsEventBuild(event);
     clearGlobalLocked();
 
@@ -160,6 +166,13 @@
 
         mStatsDelegate->statsEventWriteInt64(event, layer->lateAcquireFrames);
         mStatsDelegate->statsEventWriteInt64(event, layer->badDesiredPresentFrames);
+        mStatsDelegate->statsEventWriteInt32(event, layer->uid);
+        mStatsDelegate->statsEventWriteInt32(event, layer->jankPayload.totalFrames);
+        mStatsDelegate->statsEventWriteInt32(event, layer->jankPayload.totalJankyFrames);
+        mStatsDelegate->statsEventWriteInt32(event, layer->jankPayload.totalSFLongCpu);
+        mStatsDelegate->statsEventWriteInt32(event, layer->jankPayload.totalSFLongGpu);
+        mStatsDelegate->statsEventWriteInt32(event, layer->jankPayload.totalSFUnattributed);
+        mStatsDelegate->statsEventWriteInt32(event, layer->jankPayload.totalAppUnattributed);
 
         mStatsDelegate->statsEventBuild(event);
     }
@@ -397,11 +410,13 @@
               timeRecords[0].frameTime.frameNumber, timeRecords[0].frameTime.presentTime);
 
         if (prevTimeRecord.ready) {
+            uid_t uid = layerRecord.uid;
             const std::string& layerName = layerRecord.layerName;
-            if (!mTimeStats.stats.count(layerName)) {
-                mTimeStats.stats[layerName].layerName = layerName;
+            if (!mTimeStats.stats.count({uid, layerName})) {
+                mTimeStats.stats[{uid, layerName}].uid = uid;
+                mTimeStats.stats[{uid, layerName}].layerName = layerName;
             }
-            TimeStatsHelper::TimeStatsLayer& timeStatsLayer = mTimeStats.stats[layerName];
+            TimeStatsHelper::TimeStatsLayer& timeStatsLayer = mTimeStats.stats[{uid, layerName}];
             timeStatsLayer.totalFrames++;
             timeStatsLayer.droppedFrames += layerRecord.droppedFrames;
             timeStatsLayer.lateAcquireFrames += layerRecord.lateAcquireFrames;
@@ -462,8 +477,13 @@
             layerName.compare(0, kMinLenLayerName, kPopupWindowPrefix) != 0;
 }
 
+bool TimeStats::canAddNewAggregatedStats(uid_t uid, const std::string& layerName) {
+    return mTimeStats.stats.count({uid, layerName}) > 0 ||
+            mTimeStats.stats.size() < MAX_NUM_LAYER_STATS;
+}
+
 void TimeStats::setPostTime(int32_t layerId, uint64_t frameNumber, const std::string& layerName,
-                            nsecs_t postTime) {
+                            uid_t uid, nsecs_t postTime) {
     if (!mEnabled.load()) return;
 
     ATRACE_CALL();
@@ -471,11 +491,12 @@
           postTime);
 
     std::lock_guard<std::mutex> lock(mMutex);
-    if (!mTimeStats.stats.count(layerName) && mTimeStats.stats.size() >= MAX_NUM_LAYER_STATS) {
+    if (!canAddNewAggregatedStats(uid, layerName)) {
         return;
     }
     if (!mTimeStatsTracker.count(layerId) && mTimeStatsTracker.size() < MAX_NUM_LAYER_RECORDS &&
         layerNameIsValid(layerName)) {
+        mTimeStatsTracker[layerId].uid = uid;
         mTimeStatsTracker[layerId].layerName = layerName;
     }
     if (!mTimeStatsTracker.count(layerId)) return;
@@ -655,6 +676,66 @@
     flushAvailableRecordsToStatsLocked(layerId);
 }
 
+template <class T>
+static void updateJankPayload(T& t, int32_t reasons) {
+    t.jankPayload.totalFrames++;
+
+    static const constexpr int32_t kValidJankyReason =
+            TimeStats::JankType::SurfaceFlingerDeadlineMissed |
+            TimeStats::JankType::SurfaceFlingerGpuDeadlineMissed |
+            TimeStats::JankType::AppDeadlineMissed | TimeStats::JankType::Display;
+    if (reasons & kValidJankyReason) {
+        t.jankPayload.totalJankyFrames++;
+        if ((reasons & TimeStats::JankType::SurfaceFlingerDeadlineMissed) != 0) {
+            t.jankPayload.totalSFLongCpu++;
+        }
+        if ((reasons & TimeStats::JankType::SurfaceFlingerGpuDeadlineMissed) != 0) {
+            t.jankPayload.totalSFLongGpu++;
+        }
+        if ((reasons & TimeStats::JankType::Display) != 0) {
+            t.jankPayload.totalSFUnattributed++;
+        }
+        if ((reasons & TimeStats::JankType::AppDeadlineMissed) != 0) {
+            t.jankPayload.totalAppUnattributed++;
+        }
+    }
+}
+
+void TimeStats::incrementJankyFrames(int32_t reasons) {
+    if (!mEnabled.load()) return;
+
+    ATRACE_CALL();
+    std::lock_guard<std::mutex> lock(mMutex);
+
+    updateJankPayload<TimeStatsHelper::TimeStatsGlobal>(mTimeStats, reasons);
+}
+
+void TimeStats::incrementJankyFrames(uid_t uid, const std::string& layerName, int32_t reasons) {
+    if (!mEnabled.load()) return;
+
+    ATRACE_CALL();
+    std::lock_guard<std::mutex> lock(mMutex);
+
+    // Only update layer stats if we're allowed to do so.
+    // As an implementation detail, we do this because this method is expected to be
+    // called from FrameTimeline, which is allowed to do jank analysis well after a frame is
+    // presented. This means that we can't rely on TimeStats to flush layer records over to the
+    // aggregated stats.
+    if (!canAddNewAggregatedStats(uid, layerName)) {
+        return;
+    }
+
+    // Defensively initialize the stats in case FrameTimeline flushes its signaled present fences
+    // before TimeStats does.
+    if (!mTimeStats.stats.count({uid, layerName})) {
+        mTimeStats.stats[{uid, layerName}].uid = uid;
+        mTimeStats.stats[{uid, layerName}].layerName = layerName;
+    }
+
+    TimeStatsHelper::TimeStatsLayer& timeStatsLayer = mTimeStats.stats[{uid, layerName}];
+    updateJankPayload<TimeStatsHelper::TimeStatsLayer>(timeStatsLayer, reasons);
+}
+
 void TimeStats::onDestroy(int32_t layerId) {
     ATRACE_CALL();
     ALOGV("[%d]-onDestroy", layerId);
@@ -860,6 +941,7 @@
     mTimeStats.presentToPresent.hist.clear();
     mTimeStats.frameDuration.hist.clear();
     mTimeStats.renderEngineTiming.hist.clear();
+    mTimeStats.jankPayload = TimeStatsHelper::JankPayload();
     mTimeStats.refreshRateStats.clear();
     mPowerTime.prevTime = systemTime();
     mGlobalRecord.prevPresentTime = 0;
@@ -905,6 +987,3 @@
 } // namespace impl
 
 } // namespace android
-
-// TODO(b/129481165): remove the #pragma below and fix conversion issues
-#pragma clang diagnostic pop // ignored "-Wconversion"
diff --git a/services/surfaceflinger/TimeStats/TimeStats.h b/services/surfaceflinger/TimeStats/TimeStats.h
index 8de5d0c..4fa0a02 100644
--- a/services/surfaceflinger/TimeStats/TimeStats.h
+++ b/services/surfaceflinger/TimeStats/TimeStats.h
@@ -17,6 +17,7 @@
 #pragma once
 
 // TODO(b/129481165): remove the #pragma below and fix conversion issues
+#include <cstdint>
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wconversion"
 
@@ -85,7 +86,7 @@
                                             const std::shared_ptr<FenceTime>& readyFence) = 0;
 
     virtual void setPostTime(int32_t layerId, uint64_t frameNumber, const std::string& layerName,
-                             nsecs_t postTime) = 0;
+                             uid_t uid, nsecs_t postTime) = 0;
     virtual void setLatchTime(int32_t layerId, uint64_t frameNumber, nsecs_t latchTime) = 0;
     // Reasons why latching a particular buffer may be skipped
     enum class LatchSkipReason {
@@ -108,6 +109,40 @@
     virtual void setPresentTime(int32_t layerId, uint64_t frameNumber, nsecs_t presentTime) = 0;
     virtual void setPresentFence(int32_t layerId, uint64_t frameNumber,
                                  const std::shared_ptr<FenceTime>& presentFence) = 0;
+
+    // Subset of jank metadata tracked by FrameTimeline for the purpose of funneling to telemetry.
+    enum JankType {
+        // No Jank
+        None = 0x0,
+        // Jank not related to SurfaceFlinger or the App
+        Display = 0x1,
+        // SF took too long on the CPU
+        SurfaceFlingerDeadlineMissed = 0x2,
+        // SF took too long on the GPU
+        SurfaceFlingerGpuDeadlineMissed = 0x4,
+        // Either App or GPU took too long on the frame
+        AppDeadlineMissed = 0x8,
+        // Predictions live for 120ms, if prediction is expired for a frame, there is definitely a
+        // jank
+        // associated with the App if this is for a SurfaceFrame, and SF for a DisplayFrame.
+        PredictionExpired = 0x10,
+        // Latching a buffer early might cause an early present of the frame
+        SurfaceFlingerEarlyLatch = 0x20,
+    };
+
+    // Increments janky frames, tracked globally. Because FrameTimeline is the infrastructure
+    // responsible for computing jank in the system, this is expected to be called from
+    // FrameTimeline, rather than directly from SurfaceFlinger or individual layers. If there are no
+    // jank reasons, then total frames are incremented but jank is not, for accurate accounting of
+    // janky frames.
+    virtual void incrementJankyFrames(int32_t reasons) = 0;
+    // Increments janky frames, blamed to the provided {uid, layerName} key, with JankMetadata as
+    // supplementary reasons for the jank. Because FrameTimeline is the infrastructure responsible
+    // for computing jank in the system, this is expected to be called from FrameTimeline, rather
+    // than directly from SurfaceFlinger or individual layers.
+    // If there are no jank reasons, then total frames are incremented but jank is not, for accurate
+    // accounting of janky frames.
+    virtual void incrementJankyFrames(uid_t uid, const std::string& layerName, int32_t reasons) = 0;
     // Clean up the layer record
     virtual void onDestroy(int32_t layerId) = 0;
     // If SF skips or rejects a buffer, remove the corresponding TimeRecord.
@@ -142,6 +177,7 @@
     };
 
     struct LayerRecord {
+        uid_t uid;
         std::string layerName;
         // This is the index in timeRecords, at which the timestamps for that
         // specific frame are still not fully received. This is not waiting for
@@ -241,7 +277,7 @@
     void recordRenderEngineDuration(nsecs_t startTime,
                                     const std::shared_ptr<FenceTime>& readyFence) override;
 
-    void setPostTime(int32_t layerId, uint64_t frameNumber, const std::string& layerName,
+    void setPostTime(int32_t layerId, uint64_t frameNumber, const std::string& layerName, uid_t uid,
                      nsecs_t postTime) override;
     void setLatchTime(int32_t layerId, uint64_t frameNumber, nsecs_t latchTime) override;
     void incrementLatchSkipped(int32_t layerId, LatchSkipReason reason) override;
@@ -253,6 +289,8 @@
     void setPresentTime(int32_t layerId, uint64_t frameNumber, nsecs_t presentTime) override;
     void setPresentFence(int32_t layerId, uint64_t frameNumber,
                          const std::shared_ptr<FenceTime>& presentFence) override;
+    void incrementJankyFrames(int32_t reasons) override;
+    void incrementJankyFrames(uid_t uid, const std::string& layerName, int32_t reasons) override;
     // Clean up the layer record
     void onDestroy(int32_t layerId) override;
     // If SF skips or rejects a buffer, remove the corresponding TimeRecord.
@@ -276,6 +314,7 @@
     void flushAvailableRecordsToStatsLocked(int32_t layerId);
     void flushPowerTimeLocked();
     void flushAvailableGlobalRecordsToStatsLocked();
+    bool canAddNewAggregatedStats(uid_t uid, const std::string& layerName);
 
     void enable();
     void disable();
diff --git a/services/surfaceflinger/TimeStats/timestatsproto/TimeStatsHelper.cpp b/services/surfaceflinger/TimeStats/timestatsproto/TimeStatsHelper.cpp
index d77387a..0fb748f 100644
--- a/services/surfaceflinger/TimeStats/timestatsproto/TimeStatsHelper.cpp
+++ b/services/surfaceflinger/TimeStats/timestatsproto/TimeStatsHelper.cpp
@@ -77,14 +77,28 @@
     return result;
 }
 
+std::string TimeStatsHelper::JankPayload::toString() const {
+    std::string result;
+    StringAppendF(&result, "totalTimelineFrames = %d\n", totalFrames);
+    StringAppendF(&result, "jankyFrames = %d\n", totalJankyFrames);
+    StringAppendF(&result, "sfLongCpuJankyFrames = %d\n", totalSFLongCpu);
+    StringAppendF(&result, "sfLongGpuJankyFrames = %d\n", totalSFLongGpu);
+    StringAppendF(&result, "sfUnattributedJankyFrame = %d\n", totalSFUnattributed);
+    StringAppendF(&result, "appUnattributedJankyFrame = %d\n", totalAppUnattributed);
+    return result;
+}
+
 std::string TimeStatsHelper::TimeStatsLayer::toString() const {
     std::string result = "\n";
+    StringAppendF(&result, "uid = %d\n", uid);
     StringAppendF(&result, "layerName = %s\n", layerName.c_str());
     StringAppendF(&result, "packageName = %s\n", packageName.c_str());
     StringAppendF(&result, "totalFrames = %d\n", totalFrames);
     StringAppendF(&result, "droppedFrames = %d\n", droppedFrames);
     StringAppendF(&result, "lateAcquireFrames = %d\n", lateAcquireFrames);
     StringAppendF(&result, "badDesiredPresentFrames = %d\n", badDesiredPresentFrames);
+    result.append("Jank payload for this layer:\n");
+    result.append(jankPayload.toString());
     const auto iter = deltas.find("present2present");
     if (iter != deltas.end()) {
         const float averageTime = iter->second.averageTime();
@@ -110,6 +124,8 @@
     StringAppendF(&result, "refreshRateSwitches = %d\n", refreshRateSwitches);
     StringAppendF(&result, "compositionStrategyChanges = %d\n", compositionStrategyChanges);
     StringAppendF(&result, "displayOnTime = %" PRId64 " ms\n", displayOnTime);
+    result.append("Global aggregated jank payload:\n");
+    result.append(jankPayload.toString());
     StringAppendF(&result, "displayConfigStats is as below:\n");
     for (const auto& [fps, duration] : refreshRateStats) {
         StringAppendF(&result, "%dfps = %ldms\n", fps, ns2ms(duration));
diff --git a/services/surfaceflinger/TimeStats/timestatsproto/include/timestatsproto/TimeStatsHelper.h b/services/surfaceflinger/TimeStats/timestatsproto/include/timestatsproto/TimeStatsHelper.h
index 0c75f96..033eb5d 100644
--- a/services/surfaceflinger/TimeStats/timestatsproto/include/timestatsproto/TimeStatsHelper.h
+++ b/services/surfaceflinger/TimeStats/timestatsproto/include/timestatsproto/TimeStatsHelper.h
@@ -40,14 +40,28 @@
         std::string toString() const;
     };
 
+    struct JankPayload {
+        // note that transactions are counted for these frames.
+        int32_t totalFrames = 0;
+        int32_t totalJankyFrames = 0;
+        int32_t totalSFLongCpu = 0;
+        int32_t totalSFLongGpu = 0;
+        int32_t totalSFUnattributed = 0;
+        int32_t totalAppUnattributed = 0;
+
+        std::string toString() const;
+    };
+
     class TimeStatsLayer {
     public:
+        uid_t uid;
         std::string layerName;
         std::string packageName;
         int32_t totalFrames = 0;
         int32_t droppedFrames = 0;
         int32_t lateAcquireFrames = 0;
         int32_t badDesiredPresentFrames = 0;
+        JankPayload jankPayload;
         std::unordered_map<std::string, Histogram> deltas;
 
         std::string toString() const;
@@ -69,8 +83,17 @@
         Histogram presentToPresent;
         Histogram frameDuration;
         Histogram renderEngineTiming;
-        std::unordered_map<std::string, TimeStatsLayer> stats;
+
+        struct StatsHasher {
+            size_t operator()(const std::pair<uid_t, std::string>& p) const {
+                // Normally this isn't a very good hash function due to symmetry reasons,
+                // but these are distinct types so this should be good enough
+                return std::hash<uid_t>{}(p.first) ^ std::hash<std::string>{}(p.second);
+            }
+        };
+        std::unordered_map<std::pair<uid_t, std::string>, TimeStatsLayer, StatsHasher> stats;
         std::unordered_map<uint32_t, nsecs_t> refreshRateStats;
+        JankPayload jankPayload;
 
         std::string toString(std::optional<uint32_t> maxLayers) const;
         SFTimeStatsGlobalProto toProto(std::optional<uint32_t> maxLayers) const;