Change SF power hints to use early frame predictions Currently we issue hints at the end of the frame reporting what happened that frame. However, this is often too late anyway and lacks critical information not known until after the frame completes, like gpu timing. By waiting until the next frame start and using those timings along with information about gpu duration, the new expected present time, and the new wakeup time, we can instead use those timings to predict the duration of the upcoming frame and signal in advance of a frame drop. This patch also changes how and where frame timing is done and measures gpu duration so we can predict if a cpu speedup can help gpu composition happen earlier, preventing a drop that way. Bug: b/195990840 Test: manual Change-Id: I5f0ae796198631fced503dce8e6c495885fb256a

commit: 50c44062d3d59f82afec0e817ef045fff92c20e4 [log] [tgz]
author: Matt Buckley <mattbuckley@google.com> Mon Jan 17 20:48:10 2022 +0000
committer: Matt Buckley <mattbuckley@google.com> Thu May 05 20:27:42 2022 +0000
tree: d89763563c5e9cddfdb6bdbba5736f39dfcd18c4
parent: 7b2bea286ebdc696bb872c617a79a6e8e557d029 [diff] [blame]
diff --git a/services/surfaceflinger/DisplayHardware/PowerAdvisor.h b/services/surfaceflinger/DisplayHardware/PowerAdvisor.h
index 61bb32b..19a3b88 100644
--- a/services/surfaceflinger/DisplayHardware/PowerAdvisor.h
+++ b/services/surfaceflinger/DisplayHardware/PowerAdvisor.h

@@ -18,11 +18,15 @@
 
 #include <atomic>
 #include <chrono>
+#include <unordered_map>
 #include <unordered_set>
 
+#include <ui/DisplayId.h>
+#include <ui/FenceTime.h>
 #include <utils/Mutex.h>
 
 #include <android/hardware/power/IPower.h>
+#include <compositionengine/impl/OutputCompositionState.h>
 #include <ui/DisplayIdentification.h>
 #include "../Scheduler/OneShotTimer.h"
 
@@ -44,13 +48,47 @@
     virtual void setExpensiveRenderingExpected(DisplayId displayId, bool expected) = 0;
     virtual bool isUsingExpensiveRendering() = 0;
     virtual void notifyDisplayUpdateImminent() = 0;
+    // Checks both if it supports and if it's enabled
     virtual bool usePowerHintSession() = 0;
     virtual bool supportsPowerHintSession() = 0;
     virtual bool isPowerHintSessionRunning() = 0;
-    virtual void setTargetWorkDuration(int64_t targetDurationNanos) = 0;
-    virtual void sendActualWorkDuration(int64_t actualDurationNanos, nsecs_t timestamp) = 0;
+    // Sends a power hint that updates to the target work duration for the frame
+    virtual void setTargetWorkDuration(nsecs_t targetDuration) = 0;
+    // Sends a power hint for the actual known work duration at the end of the frame
+    virtual void sendActualWorkDuration() = 0;
+    // Sends a power hint for the upcoming frame predicted from previous frame timing
+    virtual void sendPredictedWorkDuration() = 0;
+    // Sets whether the power hint session is enabled
     virtual void enablePowerHint(bool enabled) = 0;
+    // Initializes the power hint session
     virtual bool startPowerHintSession(const std::vector<int32_t>& threadIds) = 0;
+    // Provides PowerAdvisor with a copy of the gpu fence so it can determine the gpu end time
+    virtual void setGpuFenceTime(DisplayId displayId, std::unique_ptr<FenceTime>&& fenceTime) = 0;
+    // Reports the start and end times of a present call this frame for a given display
+    virtual void setValidateTiming(DisplayId displayId, nsecs_t validateStartTime,
+                                   nsecs_t validateEndTime) = 0;
+    // Reports the start and end times of a present call this frame for a given display
+    virtual void setPresentTiming(DisplayId displayId, nsecs_t presentStartTime,
+                                  nsecs_t presentEndTime) = 0;
+    virtual void setExpectedPresentTime(nsecs_t expectedPresentTime) = 0;
+    // Reports whether a display used client composition this frame
+    virtual void setRequiresClientComposition(DisplayId displayId,
+                                              bool requiresClientComposition) = 0;
+    // Reports whether a given display skipped validation this frame
+    virtual void setSkippedValidate(DisplayId displayId, bool skipped) = 0;
+    // Reports how much a given display delayed its present call this frame
+    virtual void setPresentDelayedTime(
+            DisplayId displayId, std::chrono::steady_clock::time_point earliestFrameStartTime) = 0;
+    // Reports the start delay for SurfaceFlinger this frame
+    virtual void setFrameDelay(nsecs_t frameDelayDuration) = 0;
+    // Reports the SurfaceFlinger commit start time this frame
+    virtual void setCommitStart(nsecs_t commitStartTime) = 0;
+    // Reports the SurfaceFlinger composite end time this frame
+    virtual void setCompositeEnd(nsecs_t compositeEndTime) = 0;
+    // Reports the list of the currently active displays
+    virtual void setDisplays(std::vector<DisplayId>& displayIds) = 0;
+    // Sets the target duration for the entire pipeline including the gpu
+    virtual void setTotalFrameTargetWorkDuration(nsecs_t targetDuration) = 0;
 };
 
 namespace impl {
@@ -70,12 +108,11 @@
         virtual void restartPowerHintSession() = 0;
         virtual void setPowerHintSessionThreadIds(const std::vector<int32_t>& threadIds) = 0;
         virtual bool startPowerHintSession() = 0;
-        virtual void setTargetWorkDuration(int64_t targetDurationNanos) = 0;
-        virtual void sendActualWorkDuration(int64_t actualDurationNanos,
-                                            nsecs_t timeStampNanos) = 0;
+        virtual void setTargetWorkDuration(nsecs_t targetDuration) = 0;
+        virtual void sendActualWorkDuration(nsecs_t actualDuration, nsecs_t timestamp) = 0;
         virtual bool shouldReconnectHAL() = 0;
         virtual std::vector<int32_t> getPowerHintSessionThreadIds() = 0;
-        virtual std::optional<int64_t> getTargetWorkDuration() = 0;
+        virtual std::optional<nsecs_t> getTargetWorkDuration() = 0;
     };
 
     PowerAdvisor(SurfaceFlinger& flinger);
@@ -89,10 +126,28 @@
     bool usePowerHintSession() override;
     bool supportsPowerHintSession() override;
     bool isPowerHintSessionRunning() override;
-    void setTargetWorkDuration(int64_t targetDurationNanos) override;
-    void sendActualWorkDuration(int64_t actualDurationNanos, nsecs_t timestamp) override;
+    void setTargetWorkDuration(nsecs_t targetDuration) override;
+    void sendActualWorkDuration() override;
+    void sendPredictedWorkDuration() override;
     void enablePowerHint(bool enabled) override;
     bool startPowerHintSession(const std::vector<int32_t>& threadIds) override;
+    void setGpuFenceTime(DisplayId displayId, std::unique_ptr<FenceTime>&& fenceTime);
+    void setValidateTiming(DisplayId displayId, nsecs_t valiateStartTime,
+                           nsecs_t validateEndTime) override;
+    void setPresentTiming(DisplayId displayId, nsecs_t presentStartTime,
+                          nsecs_t presentEndTime) override;
+    void setSkippedValidate(DisplayId displayId, bool skipped) override;
+    void setRequiresClientComposition(DisplayId displayId, bool requiresClientComposition) override;
+    void setExpectedPresentTime(nsecs_t expectedPresentTime) override;
+    void setPresentDelayedTime(
+            DisplayId displayId,
+            std::chrono::steady_clock::time_point earliestFrameStartTime) override;
+
+    void setFrameDelay(nsecs_t frameDelayDuration) override;
+    void setCommitStart(nsecs_t commitStartTime) override;
+    void setCompositeEnd(nsecs_t compositeEndTime) override;
+    void setDisplays(std::vector<DisplayId>& displayIds) override;
+    void setTotalFrameTargetWorkDuration(nsecs_t targetDuration) override;
 
 private:
     HalWrapper* getPowerHal() REQUIRES(mPowerHalMutex);
@@ -100,15 +155,6 @@
     std::mutex mPowerHalMutex;
 
     std::atomic_bool mBootFinished = false;
-    std::optional<bool> mPowerHintEnabled;
-    std::optional<bool> mSupportsPowerHint;
-    bool mPowerHintSessionRunning = false;
-
-    // An adjustable safety margin which moves the "target" earlier to allow flinger to
-    // go a bit over without dropping a frame, especially since we can't measure
-    // the exact time HWC finishes composition so "actual" durations are measured
-    // from the end of present() instead, which is a bit later.
-    static constexpr const std::chrono::nanoseconds kTargetSafetyMargin = 2ms;
 
     std::unordered_set<DisplayId> mExpensiveDisplays;
     bool mNotifiedExpensiveRendering = false;
@@ -117,6 +163,108 @@
     const bool mUseScreenUpdateTimer;
     std::atomic_bool mSendUpdateImminent = true;
     scheduler::OneShotTimer mScreenUpdateTimer;
+
+    // Higher-level timing data used for estimation
+    struct DisplayTimeline {
+        nsecs_t prePresentTime = -1;
+        nsecs_t postPresentTime = -1;
+        // Usually equals prePresentTime but can be delayed if we wait for the next valid vsync
+        nsecs_t presentStartTime = -1;
+        // When we think we started waiting for the fence after calling into present and
+        // after potentially waiting for the earliest present time
+        nsecs_t preFenceWaitTime = -1;
+        // How long we ran after we finished waiting for the fence but before present happened
+        nsecs_t postFenceDuration = 0;
+        // Are we likely to have waited for the present fence during composition
+        bool probablyWaitsForFence = false;
+        // Estimate one frame's timeline from that of a previous frame
+        DisplayTimeline estimateTimelineFromReference(nsecs_t fenceTime, nsecs_t displayStartTime);
+    };
+
+    struct GpuTimeline {
+        nsecs_t duration = 0;
+        nsecs_t startTime = -1;
+    };
+
+    // Power hint session data recorded from the pipeline
+    struct DisplayTimingData {
+        std::unique_ptr<FenceTime> gpuEndFenceTime;
+        std::optional<nsecs_t> gpuStartTime;
+        std::optional<nsecs_t> lastValidGpuEndTime;
+        std::optional<nsecs_t> lastValidGpuStartTime;
+        std::optional<nsecs_t> presentStartTime;
+        std::optional<nsecs_t> presentEndTime;
+        std::optional<nsecs_t> validateStartTime;
+        std::optional<nsecs_t> validateEndTime;
+        std::optional<nsecs_t> presentDelayedTime;
+        bool usedClientComposition = false;
+        bool skippedValidate = false;
+        // Calculate high-level timing milestones from more granular display timing data
+        DisplayTimeline calculateDisplayTimeline(nsecs_t fenceTime);
+        // Estimate the gpu duration for a given display from previous gpu timing data
+        std::optional<GpuTimeline> estimateGpuTiming(std::optional<nsecs_t> previousEnd);
+    };
+
+    template <class T, size_t N>
+    class RingBuffer {
+        std::array<T, N> elements = {};
+        size_t mIndex = 0;
+        size_t numElements = 0;
+
+    public:
+        void append(T item) {
+            mIndex = (mIndex + 1) % N;
+            numElements = std::min(N, numElements + 1);
+            elements[mIndex] = item;
+        }
+        bool isFull() const { return numElements == N; }
+        // Allows access like [0] == current, [-1] = previous, etc..
+        T& operator[](int offset) {
+            size_t positiveOffset =
+                    static_cast<size_t>((offset % static_cast<int>(N)) + static_cast<int>(N));
+            return elements[(mIndex + positiveOffset) % N];
+        }
+    };
+
+    // Filter and sort the display ids by a given property
+    std::vector<DisplayId> getOrderedDisplayIds(std::optional<nsecs_t> DisplayTimingData::*sortBy);
+    // Estimates a frame's total work duration including gpu time.
+    // Runs either at the beginning or end of a frame, using the most recent data available
+    std::optional<nsecs_t> estimateWorkDuration(bool earlyHint);
+    // There are two different targets and actual work durations we care about,
+    // this normalizes them together and takes the max of the two
+    nsecs_t combineTimingEstimates(nsecs_t totalDuration, nsecs_t flingerDuration);
+
+    std::unordered_map<DisplayId, DisplayTimingData> mDisplayTimingData;
+
+    // Current frame's delay
+    nsecs_t mFrameDelayDuration = 0;
+    // Last frame's composite end time
+    nsecs_t mLastCompositeEndTime = -1;
+    // Last frame's post-composition duration
+    nsecs_t mLastPostcompDuration = 0;
+    // Buffer of recent commit start times
+    RingBuffer<nsecs_t, 2> mCommitStartTimes;
+    // Buffer of recent expected present times
+    RingBuffer<nsecs_t, 3> mExpectedPresentTimes;
+    // Target for the entire pipeline including gpu
+    std::optional<nsecs_t> mTotalFrameTargetDuration;
+    // Updated list of display IDs
+    std::vector<DisplayId> mDisplayIds;
+
+    std::optional<bool> mPowerHintEnabled;
+    std::optional<bool> mSupportsPowerHint;
+    bool mPowerHintSessionRunning = false;
+
+    // An adjustable safety margin which moves the "target" earlier to allow flinger to
+    // go a bit over without dropping a frame, especially since we can't measure
+    // the exact time HWC finishes composition so "actual" durations are measured
+    // from the end of present() instead, which is a bit later.
+    static constexpr const std::chrono::nanoseconds kTargetSafetyMargin = 1ms;
+
+    // How long we expect hwc to run after the present call until it waits for the fence
+    static constexpr const std::chrono::nanoseconds kPrefenceDelayValidated = 150us;
+    static constexpr const std::chrono::nanoseconds kPrefenceDelaySkippedValidate = 250us;
 };
 
 class AidlPowerHalWrapper : public PowerAdvisor::HalWrapper {
@@ -133,50 +281,50 @@
     void restartPowerHintSession() override;
     void setPowerHintSessionThreadIds(const std::vector<int32_t>& threadIds) override;
     bool startPowerHintSession() override;
-    void setTargetWorkDuration(int64_t targetDurationNanos) override;
-    void sendActualWorkDuration(int64_t actualDurationNanos, nsecs_t timeStampNanos) override;
+    void setTargetWorkDuration(nsecs_t targetDuration) override;
+    void sendActualWorkDuration(nsecs_t actualDuration, nsecs_t timestamp) override;
     bool shouldReconnectHAL() override;
     std::vector<int32_t> getPowerHintSessionThreadIds() override;
-    std::optional<int64_t> getTargetWorkDuration() override;
+    std::optional<nsecs_t> getTargetWorkDuration() override;
 
 private:
+    friend class AidlPowerHalWrapperTest;
+
     bool checkPowerHintSessionSupported();
     void closePowerHintSession();
-    bool shouldReportActualDurationsNow();
-    bool shouldSetTargetDuration(int64_t targetDurationNanos);
+    bool shouldReportActualDurations();
+
+    // Used for testing
+    void setAllowedActualDeviation(nsecs_t);
 
     const sp<hardware::power::IPower> mPowerHal = nullptr;
     bool mHasExpensiveRendering = false;
     bool mHasDisplayUpdateImminent = false;
     // Used to indicate an error state and need for reconstruction
     bool mShouldReconnectHal = false;
-    // This is not thread safe, but is currently protected by mPowerHalMutex so it needs no lock
+
+    // Power hint session data
+
+    // Concurrent access for this is protected by mPowerHalMutex
     sp<hardware::power::IPowerHintSession> mPowerHintSession = nullptr;
     // Queue of actual durations saved to report
     std::vector<hardware::power::WorkDuration> mPowerHintQueue;
-    // The latest un-normalized values we have received for target and actual
-    int64_t mTargetDuration = kDefaultTarget.count();
-    std::optional<int64_t> mActualDuration;
+    // The latest values we have received for target and actual
+    nsecs_t mTargetDuration = kDefaultTarget.count();
+    std::optional<nsecs_t> mActualDuration;
     // The list of thread ids, stored so we can restart the session from this class if needed
     std::vector<int32_t> mPowerHintThreadIds;
-    bool mSupportsPowerHint;
+    bool mSupportsPowerHint = false;
     // Keep track of the last messages sent for rate limiter change detection
-    std::optional<int64_t> mLastActualDurationSent;
-    // timestamp of the last report we sent, used to avoid stale sessions
-    int64_t mLastActualReportTimestamp = 0;
-    int64_t mLastTargetDurationSent = kDefaultTarget.count();
-    // Whether to normalize all the actual values as error terms relative to a constant target
-    // This saves a binder call by not setting the target, and should not affect the pid values
-    static const bool sNormalizeTarget;
+    std::optional<nsecs_t> mLastActualDurationSent;
+    // Timestamp of the last report we sent, used to avoid stale sessions
+    nsecs_t mLastActualReportTimestamp = 0;
+    nsecs_t mLastTargetDurationSent = kDefaultTarget.count();
+    // Max amount the error term can vary without causing an actual value report
+    nsecs_t mAllowedActualDeviation = -1;
     // Whether we should emit ATRACE_INT data for hint sessions
     static const bool sTraceHintSessionData;
-
-    // Max percent the actual duration can vary without causing a report (eg: 0.1 = 10%)
-    static constexpr double kAllowedActualDeviationPercent = 0.1;
-    // Max percent the target duration can vary without causing a report (eg: 0.1 = 10%)
-    static constexpr double kAllowedTargetDeviationPercent = 0.1;
-    // Target used for init and normalization, the actual value does not really matter
-    static constexpr const std::chrono::nanoseconds kDefaultTarget = 50ms;
+    static constexpr const std::chrono::nanoseconds kDefaultTarget = 16ms;
     // Amount of time after the last message was sent before the session goes stale
     // actually 100ms but we use 80 here to ideally avoid going stale
     static constexpr const std::chrono::nanoseconds kStaleTimeout = 80ms;
commit	50c44062d3d59f82afec0e817ef045fff92c20e4	[log] [tgz]
author	Matt Buckley <mattbuckley@google.com>	Mon Jan 17 20:48:10 2022 +0000
committer	Matt Buckley <mattbuckley@google.com>	Thu May 05 20:27:42 2022 +0000
tree	d89763563c5e9cddfdb6bdbba5736f39dfcd18c4
parent	7b2bea286ebdc696bb872c617a79a6e8e557d029 [diff] [blame]