Introduce a dependency monitor for fences
This allows for userspace logging for a buffer and read/write dependencies on the buffer.
Hook up SF to the dependency monitor.
Right now this _does_ emit logs in SF when the primary display is powered down, which is likely indicative of SF being sloppy about release fences in situations where tearing won't be noticeable, but I did verify that manually making screenshotting forget to merge GPU work into a layer's release fence, which has been one way SF's torn the screen, triggers logcat, which is ultimately what we want.
Bug: 360932099
Flag: com.android.graphics.surfaceflinger.flags.monitor_buffer_fences
Test: manually remove screenshot fence handling and check logs
Change-Id: Ica391dfa8a4f2924bb72664b9d9399e4ad9e1747
diff --git a/libs/ui/Android.bp b/libs/ui/Android.bp
index 87e213e..10cb992 100644
--- a/libs/ui/Android.bp
+++ b/libs/ui/Android.bp
@@ -122,6 +122,7 @@
srcs: [
"DebugUtils.cpp",
+ "DependencyMonitor.cpp",
"DeviceProductInfo.cpp",
"DisplayIdentification.cpp",
"DynamicDisplayInfo.cpp",
diff --git a/libs/ui/DependencyMonitor.cpp b/libs/ui/DependencyMonitor.cpp
new file mode 100644
index 0000000..b7e490e
--- /dev/null
+++ b/libs/ui/DependencyMonitor.cpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright 2025 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// #define LOG_NDEBUG 0
+#undef LOG_TAG
+#define LOG_TAG "DependencyMonitor"
+
+#include <ui/DependencyMonitor.h>
+#include <ui/Fence.h>
+#include <utils/Timers.h>
+
+#include <inttypes.h>
+
+namespace android {
+
+void DependencyMonitor::addIngress(FenceTimePtr fence, std::string annotation) {
+ std::lock_guard lock(mMutex);
+ resolveLocked();
+ if (mDependencies.isFull() && !mDependencies.front().updateSignalTimes(true)) {
+ ALOGD("%s: Clobbering unresolved dependencies -- make me bigger!", mToken.c_str());
+ }
+
+ auto& entry = mDependencies.next();
+ entry.reset(mToken.c_str());
+ ALOGV("%" PRId64 "/%s: addIngress at CPU time %" PRId64 " (%s)", mDependencies.back().id,
+ mToken.c_str(), systemTime(), annotation.c_str());
+
+ mDependencies.back().ingress = {std::move(fence), std::move(annotation)};
+}
+
+void DependencyMonitor::addAccessCompletion(FenceTimePtr fence, std::string annotation) {
+ std::lock_guard lock(mMutex);
+ if (mDependencies.size() == 0) {
+ return;
+ }
+ ALOGV("%" PRId64 "/%s: addAccessCompletion at CPU time %" PRId64 " (%s)",
+ mDependencies.back().id, mToken.c_str(), systemTime(), annotation.c_str());
+ mDependencies.back().accessCompletions.emplace_back(std::move(fence), std::move(annotation));
+}
+
+void DependencyMonitor::addEgress(FenceTimePtr fence, std::string annotation) {
+ std::lock_guard lock(mMutex);
+ if (mDependencies.size() == 0) {
+ return;
+ }
+ ALOGV("%" PRId64 "/%s: addEgress at CPU time %" PRId64 " (%s)", mDependencies.back().id,
+ mToken.c_str(), systemTime(), annotation.c_str());
+ mDependencies.back().egress = {std::move(fence), std::move(annotation)};
+}
+
+void DependencyMonitor::resolveLocked() {
+ if (mDependencies.size() == 0) {
+ return;
+ }
+
+ for (size_t i = mDependencies.size(); i > 0; i--) {
+ auto& dependencyBlock = mDependencies[i - 1];
+
+ if (dependencyBlock.validated) {
+ continue;
+ }
+
+ if (!dependencyBlock.updateSignalTimes(false)) {
+ break;
+ }
+
+ dependencyBlock.validated = true;
+ dependencyBlock.checkUnsafeAccess();
+ }
+}
+
+bool DependencyMonitor::DependencyBlock::updateSignalTimes(bool excludeIngress) {
+ if (egress.fence->getSignalTime() == Fence::SIGNAL_TIME_PENDING) {
+ return false;
+ }
+
+ if (!excludeIngress && ingress.fence->getSignalTime() == Fence::SIGNAL_TIME_PENDING) {
+ return false;
+ }
+
+ for (auto& accessCompletion : accessCompletions) {
+ if (accessCompletion.fence->getSignalTime() == Fence::SIGNAL_TIME_PENDING) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void DependencyMonitor::DependencyBlock::checkUnsafeAccess() const {
+ const nsecs_t egressTime = egress.fence->getCachedSignalTime();
+ const nsecs_t ingressTime = ingress.fence->getCachedSignalTime();
+
+ ALOGV_IF(egressTime != Fence::SIGNAL_TIME_INVALID,
+ "%" PRId64 "/%s: Egress time: %" PRId64 " (%s)", token, id, egressTime,
+ egress.annotation.c_str());
+ ALOGV_IF(Fence::isValidTimestamp(egressTime) && Fence::isValidTimestamp(ingressTime) &&
+ egressTime < ingressTime,
+ "%" PRId64 "/%s: Detected egress before ingress!: %" PRId64 " (%s) < %" PRId64 " (%s)",
+ id, token, egressTime, egress.annotation, ingressTime, ingress.annotation.c_str());
+
+ for (auto& accessCompletion : accessCompletions) {
+ const nsecs_t accessCompletionTime = accessCompletion.fence->getCachedSignalTime();
+ if (!Fence::isValidTimestamp(accessCompletionTime)) {
+ ALOGI("%" PRId64 "/%s: Detected invalid access completion! <%s>", id, token,
+ accessCompletion.annotation.c_str());
+ continue;
+ } else {
+ ALOGV("%" PRId64 "/%s: Access completion time: %" PRId64 " <%s>", id, token,
+ accessCompletionTime, accessCompletion.annotation.c_str());
+ }
+
+ ALOGI_IF(Fence::isValidTimestamp(egressTime) && accessCompletionTime > egressTime,
+ "%" PRId64 "/%s: Detected access completion after egress!: %" PRId64
+ " (%s) > %" PRId64 " (%s)",
+ id, token, accessCompletionTime, accessCompletion.annotation.c_str(), egressTime,
+ egress.annotation.c_str());
+
+ ALOGI_IF(Fence::isValidTimestamp(ingressTime) && accessCompletionTime < ingressTime,
+ "%" PRId64 "/%s: Detected access completion prior to ingress!: %" PRId64
+ " (%s) < %" PRId64 " (%s)",
+ id, token, accessCompletionTime, accessCompletion.annotation.c_str(), ingressTime,
+ ingress.annotation.c_str());
+ }
+
+ ALOGV_IF(ingressTime != Fence::SIGNAL_TIME_INVALID,
+ "%" PRId64 "/%s: Ingress time: %" PRId64 " (%s)", id, token, ingressTime,
+ ingress.annotation.c_str());
+}
+
+} // namespace android
\ No newline at end of file
diff --git a/libs/ui/FenceTime.cpp b/libs/ui/FenceTime.cpp
index 4246c40..81afe9e 100644
--- a/libs/ui/FenceTime.cpp
+++ b/libs/ui/FenceTime.cpp
@@ -59,6 +59,14 @@
}
}
+FenceTimePtr FenceTime::makeValid(const sp<Fence>& fence) {
+ if (fence && fence->isValid()) {
+ return std::make_shared<FenceTime>(fence);
+ } else {
+ return std::make_shared<FenceTime>(systemTime());
+ }
+}
+
void FenceTime::applyTrustedSnapshot(const Snapshot& src) {
if (CC_UNLIKELY(src.state != Snapshot::State::SIGNAL_TIME)) {
// Applying Snapshot::State::FENCE, could change the valid state of the
@@ -289,9 +297,10 @@
// ============================================================================
void FenceTimeline::push(const std::shared_ptr<FenceTime>& fence) {
std::lock_guard<std::mutex> lock(mMutex);
- while (mQueue.size() >= MAX_ENTRIES) {
+ static constexpr size_t MAX_QUEUE_SIZE = 64;
+ while (mQueue.size() >= MAX_QUEUE_SIZE) {
// This is a sanity check to make sure the queue doesn't grow unbounded.
- // MAX_ENTRIES should be big enough not to trigger this path.
+ // MAX_QUEUE_SIZE should be big enough not to trigger this path.
// In case this path is taken though, users of FenceTime must make sure
// not to rely solely on FenceTimeline to get the final timestamp and
// should eventually call Fence::getSignalTime on their own.
diff --git a/libs/ui/GraphicBuffer.cpp b/libs/ui/GraphicBuffer.cpp
index 18c9a6b..f7c9400 100644
--- a/libs/ui/GraphicBuffer.cpp
+++ b/libs/ui/GraphicBuffer.cpp
@@ -27,6 +27,8 @@
#include <ui/GraphicBufferMapper.h>
#include <utils/Trace.h>
+#include <string>
+
namespace android {
// ===========================================================================
@@ -104,6 +106,7 @@
usage = 0;
layerCount = 0;
handle = nullptr;
+ mDependencyMonitor.setToken(std::to_string(mId));
}
// deprecated
@@ -155,6 +158,8 @@
layerCount = request.layerCount;
usage = request.usage;
usage_deprecated = int(usage);
+ std::string name = request.requestorName;
+ mDependencyMonitor.setToken(name.append(":").append(std::to_string(mId)));
}
}
@@ -252,6 +257,7 @@
usage = inUsage;
usage_deprecated = int(usage);
stride = static_cast<int>(outStride);
+ mDependencyMonitor.setToken(requestorName.append(":").append(std::to_string(mId)));
}
return err;
}
@@ -609,6 +615,14 @@
mBufferMapper.getTransportSize(handle, &mTransportNumFds, &mTransportNumInts);
}
+ std::string name;
+ status_t err = mBufferMapper.getName(handle, &name);
+ if (err != NO_ERROR) {
+ name = "<Unknown>";
+ }
+
+ mDependencyMonitor.setToken(name.append(":").append(std::to_string(mId)));
+
buffer = static_cast<void const*>(static_cast<uint8_t const*>(buffer) + sizeNeeded);
size -= sizeNeeded;
fds += numFds;
diff --git a/libs/ui/include/ui/DependencyMonitor.h b/libs/ui/include/ui/DependencyMonitor.h
new file mode 100644
index 0000000..5ad1fd9
--- /dev/null
+++ b/libs/ui/include/ui/DependencyMonitor.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2025 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <ui/FatVector.h>
+#include <ui/FenceTime.h>
+#include <ui/RingBuffer.h>
+
+namespace android {
+
+// Debugging class for that tries to add userspace logging for fence depencencies.
+// The model that a DependencyMonitor tries to follow is, for each access of some resource:
+// 1. There is a single ingress fence, that guards whether a resource is now safe to read from
+// another system.
+// 2. There are multiple access fences, that are fired when a resource is read.
+// 3. There is a single egress fence, that is fired when a resource is released and sent to another
+// system.
+//
+// Note that there can be repeated ingress and egress of a resource, but the assumption is that
+// there is exactly one egress for every ingress, unless the resource is destroyed rather than
+// released.
+//
+// The DependencyMonitor will log if there is an anomaly in the fences tracked for some resource.
+// This includes:
+// * If (2) happens before (1)
+// * If (2) happens after (3)
+//
+// Note that this class has no knowledge of the "other system". I.e., if the other system ignores
+// the fence reported in (3), but still takes a long time to write to the resource and produce (1),
+// then nothing will be logged. That other system must have its own DependencyMonitor. Conversely,
+// this class has imperfect knowledge of the system it is monitoring. For example, this class does
+// not know the precise start times of reading from a resource, the exact time that a read might
+// occur from a hardware unit is not known to userspace.
+//
+// In other words, this class logs specific classes of fence violations, but is not sensitive to
+// *all* violations. One property of this is that unless the system tracked by a DependencyMonitor
+// is feeding in literally incorrect fences, then there is no chance of a false positive.
+//
+// This class is thread safe.
+class DependencyMonitor {
+public:
+ // Sets a debug token identifying the resource this monitor is tracking.
+ void setToken(std::string token) { mToken = std::move(token); }
+
+ // Adds a fence that is fired when the resource ready to be ingested by the system using the
+ // DependencyMonitor.
+ void addIngress(FenceTimePtr fence, std::string annotation);
+ // Adds a fence that is fired when the resource is accessed.
+ void addAccessCompletion(FenceTimePtr fence, std::string annotation);
+ // Adds a fence that is fired when the resource is released to another system.
+ void addEgress(FenceTimePtr fence, std::string annotation);
+
+private:
+ struct AnnotatedFenceTime {
+ FenceTimePtr fence;
+ std::string annotation;
+ };
+
+ struct DependencyBlock {
+ int64_t id = -1;
+ AnnotatedFenceTime ingress = {FenceTime::NO_FENCE, ""};
+ FatVector<AnnotatedFenceTime> accessCompletions;
+ AnnotatedFenceTime egress = {FenceTime::NO_FENCE, ""};
+ bool validated = false;
+ const char* token = nullptr;
+
+ void reset(const char* newToken) {
+ static std::atomic<int64_t> counter = 0;
+ id = counter++;
+ ingress = {FenceTime::NO_FENCE, ""};
+ accessCompletions.clear();
+ egress = {FenceTime::NO_FENCE, ""};
+ validated = false;
+ token = newToken;
+ }
+
+ // Returns true if all fences in this block have valid signal times.
+ bool updateSignalTimes(bool excludeIngress);
+
+ void checkUnsafeAccess() const;
+ };
+
+ void resolveLocked() REQUIRES(mMutex);
+
+ std::string mToken;
+ std::mutex mMutex;
+ ui::RingBuffer<DependencyBlock, 10> mDependencies GUARDED_BY(mMutex);
+};
+
+} // namespace android
\ No newline at end of file
diff --git a/libs/ui/include/ui/FenceTime.h b/libs/ui/include/ui/FenceTime.h
index 334106f..3560d57 100644
--- a/libs/ui/include/ui/FenceTime.h
+++ b/libs/ui/include/ui/FenceTime.h
@@ -17,6 +17,7 @@
#ifndef ANDROID_FENCE_TIME_H
#define ANDROID_FENCE_TIME_H
+#include <stddef.h>
#include <ui/Fence.h>
#include <utils/Flattenable.h>
#include <utils/Mutex.h>
@@ -30,6 +31,8 @@
namespace android {
class FenceToFenceTimeMap;
+class FenceTime;
+using FenceTimePtr = std::shared_ptr<FenceTime>;
// A wrapper around fence that only implements isValid and getSignalTime.
// It automatically closes the fence in a thread-safe manner once the signal
@@ -95,6 +98,10 @@
FenceTime& operator=(const FenceTime&) = delete;
FenceTime& operator=(FenceTime&&) = delete;
+ // Constructs a FenceTime, falling back to a timestamp if the fence is
+ // invalid.
+ static FenceTimePtr makeValid(const sp<Fence>& fence);
+
// This method should only be called when replacing the fence with
// a signalTime. Since this is an indirect way of setting the signal time
// of a fence, the snapshot should come from a trusted source.
@@ -142,8 +149,6 @@
std::atomic<nsecs_t> mSignalTime{Fence::SIGNAL_TIME_INVALID};
};
-using FenceTimePtr = std::shared_ptr<FenceTime>;
-
// A queue of FenceTimes that are expected to signal in FIFO order.
// Only maintains a queue of weak pointers so it doesn't keep references
// to Fences on its own.
@@ -162,8 +167,6 @@
// different threads.
class FenceTimeline {
public:
- static constexpr size_t MAX_ENTRIES = 64;
-
void push(const std::shared_ptr<FenceTime>& fence);
void updateSignalTimes();
diff --git a/libs/ui/include/ui/GraphicBuffer.h b/libs/ui/include/ui/GraphicBuffer.h
index 936bf8f..9305180 100644
--- a/libs/ui/include/ui/GraphicBuffer.h
+++ b/libs/ui/include/ui/GraphicBuffer.h
@@ -23,6 +23,7 @@
#include <string>
#include <utility>
#include <vector>
+#include "ui/DependencyMonitor.h"
#include <android/hardware_buffer.h>
#include <ui/ANativeObjectBase.h>
@@ -229,6 +230,8 @@
void addDeathCallback(GraphicBufferDeathCallback deathCallback, void* context);
+ DependencyMonitor& getDependencyMonitor() { return mDependencyMonitor; }
+
private:
~GraphicBuffer();
@@ -295,6 +298,8 @@
// and informs SurfaceFlinger that it should drop its strong pointer reference to the buffer.
std::vector<std::pair<GraphicBufferDeathCallback, void* /*mDeathCallbackContext*/>>
mDeathCallbacks;
+
+ DependencyMonitor mDependencyMonitor;
};
} // namespace android