Fix deadlock of main thread and Perfetto thread (LayerDataSource::OnStart)
The deadly embrace used to happen in this situation:
1. Perfetto shmem buffer full.
2. Main thread blocked while writing into shmem buffer (active tracing),
i.e. main thread waiting for Perfetto thread to signal available
shmem chunks.
3. LayerDataSource::OnStart with MODE_DUMP (executed by Perfetto thread)
waiting for main thread to capture a layers snapshot.
This commit removes the capture of layers snapshot in OnStart.
Now OnStop (Perfetto thread) triggers the layers snapshot capture
to be performed by the main thread. However, the Perfetto thread
doesn't wait for the main thread to complete the capture. The actual
stop of the tracing session is deferred and signalled later by the main
thread through LayerDataSource::StopArgs::HandleStopAsynchronously()
when the capture is complete.
Fix: b/313130597
Test: reproduced the issue forcing the main thread to write 100MB into the shmem buffer:
mLayerTracing.setTakeLayersSnapshotProtoFunction([&](uint32_t traceFlags) {
auto snapshot = perfetto::protos::LayersSnapshotProto{};
mScheduler
->schedule([&]() FTL_FAKE_GUARD(mStateLock) FTL_FAKE_GUARD(kMainThreadContext) {
snapshot = takeLayersSnapshotProto(traceFlags, TimePoint::now(),
mLastCommittedVsyncId, true);
+ LayerDataSource::Trace([&](LayerDataSource::TraceContext context) {
+ auto packet = context.NewTracePacket();
+ packet->set_timestamp(0);
+ packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_MONOTONIC);
+ auto* snapshotProto = packet->set_surfaceflinger_layers_snapshot();
+
+ auto data = std::string(100000000, 0xaa);
+ snapshotProto->AppendRawProtoBytes(data.data(), data.size());
+ });
})
.wait();
return snapshot;
});
Change-Id: Iea253da2a420d186d092521597f4783e5c8d157d
diff --git a/services/surfaceflinger/SurfaceFlinger.cpp b/services/surfaceflinger/SurfaceFlinger.cpp
index c4de02f..69ec47e 100644
--- a/services/surfaceflinger/SurfaceFlinger.cpp
+++ b/services/surfaceflinger/SurfaceFlinger.cpp
@@ -844,16 +844,20 @@
initScheduler(display);
dispatchDisplayHotplugEvent(display->getPhysicalId(), true);
- mLayerTracing.setTakeLayersSnapshotProtoFunction([&](uint32_t traceFlags) {
- auto snapshot = perfetto::protos::LayersSnapshotProto{};
- mScheduler
- ->schedule([&]() FTL_FAKE_GUARD(mStateLock) FTL_FAKE_GUARD(kMainThreadContext) {
- snapshot = takeLayersSnapshotProto(traceFlags, TimePoint::now(),
- mLastCommittedVsyncId, true);
- })
- .wait();
- return snapshot;
- });
+ mLayerTracing.setTakeLayersSnapshotProtoFunction(
+ [&](uint32_t traceFlags,
+ const LayerTracing::OnLayersSnapshotCallback& onLayersSnapshot) {
+ // Do not wait the future to avoid deadlocks
+ // between main and Perfetto threads (b/313130597)
+ static_cast<void>(mScheduler->schedule(
+ [&, onLayersSnapshot]() FTL_FAKE_GUARD(mStateLock)
+ FTL_FAKE_GUARD(kMainThreadContext) {
+ auto snapshot =
+ takeLayersSnapshotProto(traceFlags, TimePoint::now(),
+ mLastCommittedVsyncId, true);
+ onLayersSnapshot(std::move(snapshot));
+ }));
+ });
// Commit secondary display(s).
processDisplayChangesLocked();
diff --git a/services/surfaceflinger/Tracing/LayerDataSource.cpp b/services/surfaceflinger/Tracing/LayerDataSource.cpp
index ed1e2ec..cc0063c 100644
--- a/services/surfaceflinger/Tracing/LayerDataSource.cpp
+++ b/services/surfaceflinger/Tracing/LayerDataSource.cpp
@@ -82,10 +82,13 @@
}
}
-void LayerDataSource::OnStop(const LayerDataSource::StopArgs&) {
+void LayerDataSource::OnStop(const LayerDataSource::StopArgs& args) {
ALOGD("Received OnStop event (mode = 0x%02x, flags = 0x%02x)", mMode, mFlags);
if (auto* p = mLayerTracing.load()) {
- p->onStop(mMode);
+ // In dump mode we need to defer the stop (through HandleStopAsynchronously()) till
+ // the layers snapshot has been captured and written to perfetto. We must avoid writing
+ // to perfetto within the OnStop callback to prevent deadlocks (b/313130597).
+ p->onStop(mMode, mFlags, args.HandleStopAsynchronously());
}
}
diff --git a/services/surfaceflinger/Tracing/LayerTracing.cpp b/services/surfaceflinger/Tracing/LayerTracing.cpp
index 41bcdf0..d2e00ed 100644
--- a/services/surfaceflinger/Tracing/LayerTracing.cpp
+++ b/services/surfaceflinger/Tracing/LayerTracing.cpp
@@ -32,7 +32,7 @@
namespace android {
LayerTracing::LayerTracing() {
- mTakeLayersSnapshotProto = [](uint32_t) { return perfetto::protos::LayersSnapshotProto{}; };
+ mTakeLayersSnapshotProto = [](uint32_t, const OnLayersSnapshotCallback&) {};
LayerDataSource::Initialize(*this);
}
@@ -45,7 +45,7 @@
}
void LayerTracing::setTakeLayersSnapshotProtoFunction(
- const std::function<perfetto::protos::LayersSnapshotProto(uint32_t)>& callback) {
+ const std::function<void(uint32_t, const OnLayersSnapshotCallback&)>& callback) {
mTakeLayersSnapshotProto = callback;
}
@@ -62,7 +62,10 @@
// It might take a while before a layers change occurs and a "spontaneous" snapshot is
// taken. Let's manually take a snapshot, so that the trace's first entry will contain
// the current layers state.
- addProtoSnapshotToOstream(mTakeLayersSnapshotProto(flags), Mode::MODE_ACTIVE);
+ auto onLayersSnapshot = [this](perfetto::protos::LayersSnapshotProto&& snapshot) {
+ addProtoSnapshotToOstream(std::move(snapshot), Mode::MODE_ACTIVE);
+ };
+ mTakeLayersSnapshotProto(flags, onLayersSnapshot);
ALOGD("Started active tracing (traced initial snapshot)");
break;
}
@@ -89,9 +92,7 @@
break;
}
case Mode::MODE_DUMP: {
- auto snapshot = mTakeLayersSnapshotProto(flags);
- addProtoSnapshotToOstream(std::move(snapshot), Mode::MODE_DUMP);
- ALOGD("Started dump tracing (dumped single snapshot)");
+ ALOGD("Started dump tracing");
break;
}
default: {
@@ -125,10 +126,27 @@
ALOGD("Flushed generated tracing");
}
-void LayerTracing::onStop(Mode mode) {
- if (mode == Mode::MODE_ACTIVE) {
- mIsActiveTracingStarted.store(false);
- ALOGD("Stopped active tracing");
+void LayerTracing::onStop(Mode mode, uint32_t flags, std::function<void()>&& deferredStopDone) {
+ switch (mode) {
+ case Mode::MODE_ACTIVE: {
+ mIsActiveTracingStarted.store(false);
+ deferredStopDone();
+ ALOGD("Stopped active tracing");
+ break;
+ }
+ case Mode::MODE_DUMP: {
+ auto onLayersSnapshot = [this, deferredStopDone = std::move(deferredStopDone)](
+ perfetto::protos::LayersSnapshotProto&& snapshot) {
+ addProtoSnapshotToOstream(std::move(snapshot), Mode::MODE_DUMP);
+ deferredStopDone();
+ ALOGD("Stopped dump tracing (written single snapshot)");
+ };
+ mTakeLayersSnapshotProto(flags, onLayersSnapshot);
+ break;
+ }
+ default: {
+ deferredStopDone();
+ }
}
}
diff --git a/services/surfaceflinger/Tracing/LayerTracing.h b/services/surfaceflinger/Tracing/LayerTracing.h
index 2895ba7..e99fe4c 100644
--- a/services/surfaceflinger/Tracing/LayerTracing.h
+++ b/services/surfaceflinger/Tracing/LayerTracing.h
@@ -87,6 +87,7 @@
class LayerTracing {
public:
using Mode = perfetto::protos::pbzero::SurfaceFlingerLayersConfig::Mode;
+ using OnLayersSnapshotCallback = std::function<void(perfetto::protos::LayersSnapshotProto&&)>;
enum Flag : uint32_t {
TRACE_INPUT = 1 << 1,
@@ -102,7 +103,7 @@
LayerTracing(std::ostream&);
~LayerTracing();
void setTakeLayersSnapshotProtoFunction(
- const std::function<perfetto::protos::LayersSnapshotProto(uint32_t)>&);
+ const std::function<void(uint32_t, const OnLayersSnapshotCallback&)>&);
void setTransactionTracing(TransactionTracing&);
// Start event from perfetto data source
@@ -110,7 +111,7 @@
// Flush event from perfetto data source
void onFlush(Mode mode, uint32_t flags, bool isBugreport);
// Stop event from perfetto data source
- void onStop(Mode mode);
+ void onStop(Mode mode, uint32_t flags, std::function<void()>&& deferredStopDone);
void addProtoSnapshotToOstream(perfetto::protos::LayersSnapshotProto&& snapshot, Mode mode);
bool isActiveTracingStarted() const;
@@ -123,7 +124,7 @@
void writeSnapshotToPerfetto(const perfetto::protos::LayersSnapshotProto& snapshot, Mode mode);
bool checkAndUpdateLastVsyncIdWrittenToPerfetto(Mode mode, std::int64_t vsyncId);
- std::function<perfetto::protos::LayersSnapshotProto(uint32_t)> mTakeLayersSnapshotProto;
+ std::function<void(uint32_t, const OnLayersSnapshotCallback&)> mTakeLayersSnapshotProto;
TransactionTracing* mTransactionTracing;
std::atomic<bool> mIsActiveTracingStarted{false};