Fix deadlock while stopping network trace poller
Calling reset on mTaskRunner waits for any in progress tasks to finish.
Since Stop takes mMutex, it's possible that in-progress task is waiting
for mMutex in order to finish, resulting in a deadlock.
This breaks the loop by using try_lock to remove the blocking dependency
on the mutex. Since we _always_ want to re-schedule ourselves, the poll
duration and runner are taken as arguments (if rescheduling is also in
the try_lock, any class interactions could stop the polling sequence).
Test: local start/stop stress test and atest libnetworkstats_test
Bug: 285411033
Change-Id: I4b8bea9c5474a37e8c081bcfe542d4fe57f5206c
diff --git a/service-t/native/libs/libnetworkstats/NetworkTracePoller.cpp b/service-t/native/libs/libnetworkstats/NetworkTracePoller.cpp
index d538368..80c315a 100644
--- a/service-t/native/libs/libnetworkstats/NetworkTracePoller.cpp
+++ b/service-t/native/libs/libnetworkstats/NetworkTracePoller.cpp
@@ -29,16 +29,16 @@
namespace bpf {
namespace internal {
-void NetworkTracePoller::SchedulePolling() {
- // Schedules another run of ourselves to recursively poll periodically.
- mTaskRunner->PostDelayedTask(
- [this]() {
- mMutex.lock();
- SchedulePolling();
- ConsumeAllLocked();
- mMutex.unlock();
- },
- mPollMs);
+void NetworkTracePoller::PollAndSchedule(perfetto::base::TaskRunner* runner,
+ uint32_t poll_ms) {
+ // Always schedule another run of ourselves to recursively poll periodically.
+ // The task runner is sequential so these can't run on top of each other.
+ runner->PostDelayedTask([=]() { PollAndSchedule(runner, poll_ms); }, poll_ms);
+
+ if (mMutex.try_lock()) {
+ ConsumeAllLocked();
+ mMutex.unlock();
+ }
}
bool NetworkTracePoller::Start(uint32_t pollMs) {
@@ -81,7 +81,7 @@
// Start a task runner to run ConsumeAll every mPollMs milliseconds.
mTaskRunner = perfetto::Platform::GetDefaultPlatform()->CreateTaskRunner({});
mPollMs = pollMs;
- SchedulePolling();
+ PollAndSchedule(mTaskRunner.get(), mPollMs);
mSessionCount++;
return true;
diff --git a/service-t/native/libs/libnetworkstats/include/netdbpf/NetworkTracePoller.h b/service-t/native/libs/libnetworkstats/include/netdbpf/NetworkTracePoller.h
index adde51e..8433934 100644
--- a/service-t/native/libs/libnetworkstats/include/netdbpf/NetworkTracePoller.h
+++ b/service-t/native/libs/libnetworkstats/include/netdbpf/NetworkTracePoller.h
@@ -53,7 +53,12 @@
bool ConsumeAll() EXCLUDES(mMutex);
private:
- void SchedulePolling() REQUIRES(mMutex);
+ // Poll the ring buffer for new data and schedule another run of ourselves
+ // after poll_ms (essentially polling periodically until stopped). This takes
+ // in the runner and poll duration to prevent a hard requirement on the lock
+ // and thus a deadlock while resetting the TaskRunner. The runner pointer is
+ // always valid within tasks run by that runner.
+ void PollAndSchedule(perfetto::base::TaskRunner* runner, uint32_t poll_ms);
bool ConsumeAllLocked() REQUIRES(mMutex);
std::mutex mMutex;