Add public ADPF load hints with better rate limiter and hint batching

- Adds ADPF public load hint NDK API
- Updates rate limiter to use a single budget for all load hints
- Adds support for load hint batching, to send multiple hints at a time
- Requires users of load hints to annotate their hints for better traces

Bug: 367803904
Test: atest HintManagerServiceTest
Test: atest PerformanceHintNativeTestCases
Flag: android.os.adpf_use_load_hints

Change-Id: I189fc1d96fcb83ddd558cbe0aca22be218b481ae
diff --git a/native/android/performance_hint.cpp b/native/android/performance_hint.cpp
index 15f77ce..e2fa94d 100644
--- a/native/android/performance_hint.cpp
+++ b/native/android/performance_hint.cpp
@@ -33,12 +33,14 @@
 #include <android/performance_hint.h>
 #include <android/trace.h>
 #include <android_os.h>
+#include <cutils/trace.h>
 #include <fmq/AidlMessageQueue.h>
 #include <inttypes.h>
 #include <performance_hint_private.h>
 #include <utils/SystemClock.h>
 
 #include <chrono>
+#include <format>
 #include <future>
 #include <set>
 #include <utility>
@@ -63,6 +65,22 @@
 constexpr int64_t SEND_HINT_TIMEOUT = std::chrono::nanoseconds(100ms).count();
 struct AWorkDuration : public hal::WorkDuration {};
 
+// A pair of values that determine the behavior of the
+// load hint rate limiter, to only allow "X hints every Y seconds"
+constexpr double kLoadHintInterval = std::chrono::nanoseconds(2s).count();
+constexpr double kMaxLoadHintsPerInterval = 20;
+constexpr double kReplenishRate = kMaxLoadHintsPerInterval / kLoadHintInterval;
+bool kForceNewHintBehavior = false;
+
+template <class T>
+constexpr int32_t enum_size() {
+    return static_cast<int32_t>(*(ndk::enum_range<T>().end() - 1)) + 1;
+}
+
+bool useNewLoadHintBehavior() {
+    return android::os::adpf_use_load_hints() || kForceNewHintBehavior;
+}
+
 // Shared lock for the whole PerformanceHintManager and sessions
 static std::mutex sHintMutex = std::mutex{};
 class FMQWrapper {
@@ -76,7 +94,8 @@
                                    hal::WorkDuration* durations, size_t count) REQUIRES(sHintMutex);
     bool updateTargetWorkDuration(std::optional<hal::SessionConfig>& config,
                                   int64_t targetDurationNanos) REQUIRES(sHintMutex);
-    bool sendHint(std::optional<hal::SessionConfig>& config, SessionHint hint) REQUIRES(sHintMutex);
+    bool sendHints(std::optional<hal::SessionConfig>& config, std::vector<hal::SessionHint>& hint,
+                   int64_t now) REQUIRES(sHintMutex);
     bool setMode(std::optional<hal::SessionConfig>& config, hal::SessionMode, bool enabled)
             REQUIRES(sHintMutex);
     void setToken(ndk::SpAIBinder& token);
@@ -86,10 +105,11 @@
 private:
     template <HalChannelMessageContents::Tag T, bool urgent = false,
               class C = HalChannelMessageContents::_at<T>>
-    bool sendMessages(std::optional<hal::SessionConfig>& config, C* message, size_t count = 1)
-            REQUIRES(sHintMutex);
+    bool sendMessages(std::optional<hal::SessionConfig>& config, C* message, size_t count = 1,
+                      int64_t now = ::android::uptimeNanos()) REQUIRES(sHintMutex);
     template <HalChannelMessageContents::Tag T, class C = HalChannelMessageContents::_at<T>>
-    void writeBuffer(C* message, hal::SessionConfig& config, size_t count) REQUIRES(sHintMutex);
+    void writeBuffer(C* message, hal::SessionConfig& config, size_t count, int64_t now)
+            REQUIRES(sHintMutex);
 
     bool isActiveLocked() REQUIRES(sHintMutex);
     bool updatePersistentTransaction() REQUIRES(sHintMutex);
@@ -120,6 +140,7 @@
                                            hal::SessionTag tag = hal::SessionTag::APP);
     int64_t getPreferredRateNanos() const;
     FMQWrapper& getFMQWrapper();
+    bool canSendLoadHints(std::vector<hal::SessionHint>& hints, int64_t now) REQUIRES(sHintMutex);
 
 private:
     // Necessary to create an empty binder object
@@ -138,6 +159,8 @@
     ndk::SpAIBinder mToken;
     const int64_t mPreferredRateNanos;
     FMQWrapper mFMQWrapper;
+    double mHintBudget = kMaxLoadHintsPerInterval;
+    int64_t mLastBudgetReplenish = 0;
 };
 
 struct APerformanceHintSession {
@@ -151,7 +174,9 @@
 
     int updateTargetWorkDuration(int64_t targetDurationNanos);
     int reportActualWorkDuration(int64_t actualDurationNanos);
-    int sendHint(SessionHint hint);
+    int sendHints(std::vector<hal::SessionHint>& hints, int64_t now, const char* debugName);
+    int notifyWorkloadIncrease(bool cpu, bool gpu, const char* debugName);
+    int notifyWorkloadReset(bool cpu, bool gpu, const char* debugName);
     int setThreads(const int32_t* threadIds, size_t size);
     int getThreadIds(int32_t* const threadIds, size_t* size);
     int setPreferPowerEfficiency(bool enabled);
@@ -173,6 +198,8 @@
     // Last target hit timestamp
     int64_t mLastTargetMetTimestamp GUARDED_BY(sHintMutex);
     // Last hint reported from sendHint indexed by hint value
+    // This is only used by the old rate limiter impl and is replaced
+    // with the new rate limiter under a flag
     std::vector<int64_t> mLastHintSentTimestamp GUARDED_BY(sHintMutex);
     // Cached samples
     std::vector<hal::WorkDuration> mActualWorkDurations GUARDED_BY(sHintMutex);
@@ -255,6 +282,21 @@
     return new APerformanceHintManager(manager, preferredRateNanos);
 }
 
+bool APerformanceHintManager::canSendLoadHints(std::vector<hal::SessionHint>& hints, int64_t now) {
+    mHintBudget =
+            std::max(kMaxLoadHintsPerInterval,
+                     mHintBudget +
+                             static_cast<double>(now - mLastBudgetReplenish) * kReplenishRate);
+    mLastBudgetReplenish = now;
+
+    // If this youngest timestamp isn't older than the timeout time, we can't send
+    if (hints.size() > mHintBudget) {
+        return false;
+    }
+    mHintBudget -= hints.size();
+    return true;
+}
+
 APerformanceHintSession* APerformanceHintManager::createSession(
         const int32_t* threadIds, size_t size, int64_t initialTargetWorkDurationNanos,
         hal::SessionTag tag) {
@@ -292,9 +334,7 @@
 
 // ===================================== APerformanceHintSession implementation
 
-constexpr int kNumEnums =
-        ndk::enum_range<hal::SessionHint>().end() - ndk::enum_range<hal::SessionHint>().begin();
-
+constexpr int kNumEnums = enum_size<hal::SessionHint>();
 APerformanceHintSession::APerformanceHintSession(std::shared_ptr<IHintManager> hintManager,
                                                  std::shared_ptr<IHintSession> session,
                                                  int64_t preferredRateNanos,
@@ -361,31 +401,83 @@
     return reportActualWorkDurationInternal(static_cast<AWorkDuration*>(&workDuration));
 }
 
-int APerformanceHintSession::sendHint(SessionHint hint) {
+int APerformanceHintSession::sendHints(std::vector<hal::SessionHint>& hints, int64_t now,
+                                       const char*) {
     std::scoped_lock lock(sHintMutex);
-    if (hint < 0 || hint >= static_cast<int32_t>(mLastHintSentTimestamp.size())) {
-        ALOGE("%s: invalid session hint %d", __FUNCTION__, hint);
+    if (hints.empty()) {
         return EINVAL;
     }
-    int64_t now = uptimeNanos();
-
-    // Limit sendHint to a pre-detemined rate for safety
-    if (now < (mLastHintSentTimestamp[hint] + SEND_HINT_TIMEOUT)) {
-        return 0;
-    }
-
-    if (!getFMQ().sendHint(mSessionConfig, hint)) {
-        ndk::ScopedAStatus ret = mHintSession->sendHint(hint);
-
-        if (!ret.isOk()) {
-            ALOGE("%s: HintSession sendHint failed: %s", __FUNCTION__, ret.getMessage());
-            return EPIPE;
+    for (auto&& hint : hints) {
+        if (static_cast<int32_t>(hint) < 0 || static_cast<int32_t>(hint) >= kNumEnums) {
+            ALOGE("%s: invalid session hint %d", __FUNCTION__, hint);
+            return EINVAL;
         }
     }
-    mLastHintSentTimestamp[hint] = now;
+
+    if (useNewLoadHintBehavior()) {
+        if (!APerformanceHintManager::getInstance()->canSendLoadHints(hints, now)) {
+            return EBUSY;
+        }
+    }
+    // keep old rate limiter behavior for legacy flag
+    else {
+        for (auto&& hint : hints) {
+            if (now < (mLastHintSentTimestamp[static_cast<int32_t>(hint)] + SEND_HINT_TIMEOUT)) {
+                return EBUSY;
+            }
+        }
+    }
+
+    if (!getFMQ().sendHints(mSessionConfig, hints, now)) {
+        for (auto&& hint : hints) {
+            ndk::ScopedAStatus ret = mHintSession->sendHint(static_cast<int32_t>(hint));
+
+            if (!ret.isOk()) {
+                ALOGE("%s: HintSession sendHint failed: %s", __FUNCTION__, ret.getMessage());
+                return EPIPE;
+            }
+        }
+    }
+
+    if (!useNewLoadHintBehavior()) {
+        for (auto&& hint : hints) {
+            mLastHintSentTimestamp[static_cast<int32_t>(hint)] = now;
+        }
+    }
+
+    if (ATrace_isEnabled()) {
+        ATRACE_INSTANT("Sending load hint");
+    }
+
     return 0;
 }
 
+int APerformanceHintSession::notifyWorkloadIncrease(bool cpu, bool gpu, const char* debugName) {
+    std::vector<hal::SessionHint> hints(2);
+    hints.clear();
+    if (cpu) {
+        hints.push_back(hal::SessionHint::CPU_LOAD_UP);
+    }
+    if (gpu) {
+        hints.push_back(hal::SessionHint::GPU_LOAD_UP);
+    }
+    int64_t now = ::android::uptimeNanos();
+    return sendHints(hints, now, debugName);
+}
+
+int APerformanceHintSession::notifyWorkloadReset(bool cpu, bool gpu, const char* debugName) {
+    std::vector<hal::SessionHint> hints(2);
+    hints.clear();
+    if (cpu) {
+        hints.push_back(hal::SessionHint::CPU_LOAD_RESET);
+    }
+    if (gpu) {
+        hints.push_back(hal::SessionHint::GPU_LOAD_RESET);
+    }
+    int64_t now = ::android::uptimeNanos();
+    return sendHints(hints, now, debugName);
+}
+
 int APerformanceHintSession::setThreads(const int32_t* threadIds, size_t size) {
     if (size == 0) {
         ALOGE("%s: the list of thread ids must not be empty.", __FUNCTION__);
@@ -565,24 +657,25 @@
 }
 
 template <HalChannelMessageContents::Tag T, class C>
-void FMQWrapper::writeBuffer(C* message, hal::SessionConfig& config, size_t) {
-    new (mFmqTransaction.getSlot(0)) hal::ChannelMessage{
-            .sessionID = static_cast<int32_t>(config.id),
-            .timeStampNanos = ::android::uptimeNanos(),
-            .data = HalChannelMessageContents::make<T, C>(std::move(*message)),
-    };
+void FMQWrapper::writeBuffer(C* message, hal::SessionConfig& config, size_t count, int64_t now) {
+    for (size_t i = 0; i < count; ++i) {
+        new (mFmqTransaction.getSlot(i)) hal::ChannelMessage{
+                .sessionID = static_cast<int32_t>(config.id),
+                .timeStampNanos = now,
+                .data = HalChannelMessageContents::make<T, C>(std::move(*(message + i))),
+        };
+    }
 }
 
 template <>
 void FMQWrapper::writeBuffer<HalChannelMessageContents::workDuration>(hal::WorkDuration* messages,
                                                                       hal::SessionConfig& config,
-                                                                      size_t count) {
+                                                                      size_t count, int64_t now) {
     for (size_t i = 0; i < count; ++i) {
         hal::WorkDuration& message = messages[i];
         new (mFmqTransaction.getSlot(i)) hal::ChannelMessage{
                 .sessionID = static_cast<int32_t>(config.id),
-                .timeStampNanos =
-                        (i == count - 1) ? ::android::uptimeNanos() : message.timeStampNanos,
+                .timeStampNanos = (i == count - 1) ? now : message.timeStampNanos,
                 .data = HalChannelMessageContents::make<HalChannelMessageContents::workDuration,
                                                         hal::WorkDurationFixedV1>({
                         .durationNanos = message.cpuDurationNanos,
@@ -595,7 +688,8 @@
 }
 
 template <HalChannelMessageContents::Tag T, bool urgent, class C>
-bool FMQWrapper::sendMessages(std::optional<hal::SessionConfig>& config, C* message, size_t count) {
+bool FMQWrapper::sendMessages(std::optional<hal::SessionConfig>& config, C* message, size_t count,
+                              int64_t now) {
     if (!isActiveLocked() || !config.has_value() || mCorrupted) {
         return false;
     }
@@ -609,7 +703,7 @@
             return false;
         }
     }
-    writeBuffer<T, C>(message, *config, count);
+    writeBuffer<T, C>(message, *config, count, now);
     mQueue->commitWrite(count);
     mEventFlag->wake(mWriteMask);
     // Re-create the persistent transaction after writing
@@ -641,10 +735,9 @@
     return sendMessages<HalChannelMessageContents::targetDuration>(config, &targetDurationNanos);
 }
 
-bool FMQWrapper::sendHint(std::optional<hal::SessionConfig>& config, SessionHint hint) {
-    return sendMessages<HalChannelMessageContents::hint>(config,
-                                                         reinterpret_cast<hal::SessionHint*>(
-                                                                 &hint));
+bool FMQWrapper::sendHints(std::optional<hal::SessionConfig>& config,
+                           std::vector<hal::SessionHint>& hints, int64_t now) {
+    return sendMessages<HalChannelMessageContents::hint>(config, hints.data(), hints.size(), now);
 }
 
 bool FMQWrapper::setMode(std::optional<hal::SessionConfig>& config, hal::SessionMode mode,
@@ -758,7 +851,9 @@
 
 int APerformanceHint_sendHint(APerformanceHintSession* session, SessionHint hint) {
     VALIDATE_PTR(session)
-    return session->sendHint(hint);
+    std::vector<hal::SessionHint> hints{static_cast<hal::SessionHint>(hint)};
+    int64_t now = ::android::uptimeNanos();
+    return session->sendHints(hints, now, "HWUI hint");
 }
 
 int APerformanceHint_setThreads(APerformanceHintSession* session, const pid_t* threadIds,
@@ -791,6 +886,26 @@
     return session->reportActualWorkDuration(workDurationPtr);
 }
 
+int APerformanceHint_notifyWorkloadIncrease(APerformanceHintSession* session, bool cpu, bool gpu,
+                                            const char* debugName) {
+    VALIDATE_PTR(session)
+    VALIDATE_PTR(debugName)
+    if (!useNewLoadHintBehavior()) {
+        return ENOTSUP;
+    }
+    return session->notifyWorkloadIncrease(cpu, gpu, debugName);
+}
+
+int APerformanceHint_notifyWorkloadReset(APerformanceHintSession* session, bool cpu, bool gpu,
+                                         const char* debugName) {
+    VALIDATE_PTR(session)
+    VALIDATE_PTR(debugName)
+    if (!useNewLoadHintBehavior()) {
+        return ENOTSUP;
+    }
+    return session->notifyWorkloadReset(cpu, gpu, debugName);
+}
+
 AWorkDuration* AWorkDuration_create() {
     return new AWorkDuration();
 }
@@ -838,3 +953,13 @@
 void APerformanceHint_setUseFMQForTesting(bool enabled) {
     gForceFMQEnabled = enabled;
 }
+
+void APerformanceHint_getRateLimiterPropertiesForTesting(int32_t* maxLoadHintsPerInterval,
+                                                         int64_t* loadHintInterval) {
+    *maxLoadHintsPerInterval = kMaxLoadHintsPerInterval;
+    *loadHintInterval = kLoadHintInterval;
+}
+
+void APerformanceHint_setUseNewLoadHintBehaviorForTesting(bool newBehavior) {
+    kForceNewHintBehavior = newBehavior;
+}