AudioFlinger: synchronize OutputTracks start on duplicating threads

When playback starts on a duplicating thread and target output
mixer threads exit standby, it is possible that one output stream takes
a long time to actually start playback (e.g. for Bluetooth software
encoder path). In this case, the OutputTrack on the other mixer thread
will underrun and audio will get choppy on this output stream.

In order to avoid this, an actual stream start detection mechanism is
added on the mixer thread and the OutputTrack write method will wait
for the target thread to actually start when exiting standby before
sending actual audio.

Bug: 218803475
Test: repro steps in bug
Change-Id: I0eec8c5817c97f7b7643a7151037aff5dc73f3ae
Merged-In: I0eec8c5817c97f7b7643a7151037aff5dc73f3ae
diff --git a/services/audioflinger/PlaybackTracks.h b/services/audioflinger/PlaybackTracks.h
index 9560609..ad191b0 100644
--- a/services/audioflinger/PlaybackTracks.h
+++ b/services/audioflinger/PlaybackTracks.h
@@ -426,6 +426,7 @@
 private:
     status_t            obtainBuffer(AudioBufferProvider::Buffer* buffer,
                                      uint32_t waitTimeMs);
+    void                queueBuffer(Buffer& inBuffer);
     void                clearBufferQueue();
 
     void                restartIfDisabled();
diff --git a/services/audioflinger/Threads.cpp b/services/audioflinger/Threads.cpp
index de0abf0..6a2d59c 100644
--- a/services/audioflinger/Threads.cpp
+++ b/services/audioflinger/Threads.cpp
@@ -4023,7 +4023,7 @@
                         LOG_AUDIO_STATE();
                         mThreadMetrics.logEndInterval();
                         mThreadSnapshot.onEnd();
-                        mStandby = true;
+                        setStandby_l();
                     }
                     sendStatistics(false /* force */);
                 }
@@ -4103,6 +4103,14 @@
             activeTracks.insert(activeTracks.end(), mActiveTracks.begin(), mActiveTracks.end());
 
             setHalLatencyMode_l();
+
+            // signal actual start of output stream when the render position reported by the kernel
+            // starts moving.
+            if (!mStandby && !mHalStarted && mKernelPositionOnStandby !=
+                    mTimestamp.mPosition[ExtendedTimestamp::LOCATION_KERNEL]) {
+                mHalStarted = true;
+                mWaitHalStartCV.broadcast();
+            }
         } // mLock scope ends
 
         if (mBytesRemaining == 0) {
@@ -4488,7 +4496,7 @@
 
     if (!mStandby) {
         threadLoop_standby();
-        mStandby = true;
+        setStandby();
     }
 
     releaseWakeLock();
@@ -6239,7 +6247,7 @@
             if (!mStandby) {
                 mThreadMetrics.logEndInterval();
                 mThreadSnapshot.onEnd();
-                mStandby = true;
+                setStandby_l();
             }
             mBytesWritten = 0;
             status = mOutput->stream->setParameters(keyValuePair);
@@ -6889,7 +6897,7 @@
             if (!mStandby) {
                 mThreadMetrics.logEndInterval();
                 mThreadSnapshot.onEnd();
-                mStandby = true;
+                setStandby_l();
             }
             mBytesWritten = 0;
             status = mOutput->stream->setParameters(keyValuePair);
diff --git a/services/audioflinger/Threads.h b/services/audioflinger/Threads.h
index 7b4c150..e88134b 100644
--- a/services/audioflinger/Threads.h
+++ b/services/audioflinger/Threads.h
@@ -1116,6 +1116,32 @@
                 void startMelComputation_l(const sp<audio_utils::MelProcessor>& processor) override;
                 void stopMelComputation_l() override;
 
+                void setStandby() {
+                    Mutex::Autolock _l(mLock);
+                    setStandby_l();
+                }
+
+                void setStandby_l() {
+                    mStandby = true;
+                    mHalStarted = false;
+                    mKernelPositionOnStandby =
+                        mTimestamp.mPosition[ExtendedTimestamp::LOCATION_KERNEL];
+                }
+
+                bool waitForHalStart() {
+                    Mutex::Autolock _l(mLock);
+                    static const nsecs_t kWaitHalTimeoutNs = seconds(2);
+                    nsecs_t endWaitTimetNs = systemTime() + kWaitHalTimeoutNs;
+                    while (!mHalStarted) {
+                        nsecs_t timeNs = systemTime();
+                        if (timeNs >= endWaitTimetNs) {
+                            break;
+                        }
+                        nsecs_t waitTimeLeftNs = endWaitTimetNs - timeNs;
+                        mWaitHalStartCV.waitRelative(mLock, waitTimeLeftNs);
+                    }
+                    return mHalStarted;
+                }
 protected:
     // updated by readOutputParameters_l()
     size_t                          mNormalFrameCount;  // normal mixer and effects
@@ -1415,6 +1441,14 @@
     // Downstream patch latency, available if mDownstreamLatencyStatMs.getN() > 0.
     audio_utils::Statistics<double> mDownstreamLatencyStatMs{0.999};
 
+    // output stream start detection based on render position returned by the kernel
+    // condition signalled when the output stream has started
+    Condition                mWaitHalStartCV;
+    // true when the output stream render position has moved, reset to false in standby
+    bool                     mHalStarted = false;
+    // last kernel render position saved when entering standby
+    int64_t                  mKernelPositionOnStandby = 0;
+
 public:
     virtual     bool        hasFastMixer() const = 0;
     virtual     FastTrackUnderruns getFastTrackUnderruns(size_t fastIndex __unused) const
diff --git a/services/audioflinger/Tracks.cpp b/services/audioflinger/Tracks.cpp
index 8faaffe..6b5ddce 100644
--- a/services/audioflinger/Tracks.cpp
+++ b/services/audioflinger/Tracks.cpp
@@ -2056,17 +2056,50 @@
 
 ssize_t AudioFlinger::PlaybackThread::OutputTrack::write(void* data, uint32_t frames)
 {
+    if (!mActive && frames != 0) {
+        sp<ThreadBase> thread = mThread.promote();
+        if (thread != nullptr && thread->standby()) {
+            // preload one silent buffer to trigger mixer on start()
+            ClientProxy::Buffer buf { .mFrameCount = mClientProxy->getStartThresholdInFrames() };
+            status_t status = mClientProxy->obtainBuffer(&buf);
+            if (status != NO_ERROR && status != NOT_ENOUGH_DATA && status != WOULD_BLOCK) {
+                ALOGE("%s(%d): could not obtain buffer on start", __func__, mId);
+                return 0;
+            }
+            memset(buf.mRaw, 0, buf.mFrameCount * mFrameSize);
+            mClientProxy->releaseBuffer(&buf);
+
+            (void) start();
+
+            // wait for HAL stream to start before sending actual audio. Doing this on each
+            // OutputTrack makes that playback start on all output streams is synchronized.
+            // If another OutputTrack has already started it can underrun but this is OK
+            // as only silence has been played so far and the retry count is very high on
+            // OutputTrack.
+            auto pt = static_cast<PlaybackThread *>(thread.get());
+            if (!pt->waitForHalStart()) {
+                ALOGW("%s(%d): timeout waiting for thread to exit standby", __func__, mId);
+                stop();
+                return 0;
+            }
+
+            // enqueue the first buffer and exit so that other OutputTracks will also start before
+            // write() is called again and this buffer actually consumed.
+            Buffer firstBuffer;
+            firstBuffer.frameCount = frames;
+            firstBuffer.raw = data;
+            queueBuffer(firstBuffer);
+            return frames;
+        } else {
+            (void) start();
+        }
+    }
+
     Buffer *pInBuffer;
     Buffer inBuffer;
     inBuffer.frameCount = frames;
     inBuffer.raw = data;
-
     uint32_t waitTimeLeftMs = mSourceThread->waitTimeMs();
-
-    if (!mActive && frames != 0) {
-        (void) start();
-    }
-
     while (waitTimeLeftMs) {
         // First write pending buffers, then new data
         if (mBufferQueue.size()) {
@@ -2134,25 +2167,7 @@
     if (inBuffer.frameCount) {
         sp<ThreadBase> thread = mThread.promote();
         if (thread != 0 && !thread->standby()) {
-            if (mBufferQueue.size() < kMaxOverFlowBuffers) {
-                pInBuffer = new Buffer;
-                const size_t bufferSize = inBuffer.frameCount * mFrameSize;
-                pInBuffer->mBuffer = malloc(bufferSize);
-                LOG_ALWAYS_FATAL_IF(pInBuffer->mBuffer == nullptr,
-                        "%s: Unable to malloc size %zu", __func__, bufferSize);
-                pInBuffer->frameCount = inBuffer.frameCount;
-                pInBuffer->raw = pInBuffer->mBuffer;
-                memcpy(pInBuffer->raw, inBuffer.raw, inBuffer.frameCount * mFrameSize);
-                mBufferQueue.add(pInBuffer);
-                ALOGV("%s(%d): thread %d adding overflow buffer %zu", __func__, mId,
-                        (int)mThreadIoHandle, mBufferQueue.size());
-                // audio data is consumed (stored locally); set frameCount to 0.
-                inBuffer.frameCount = 0;
-            } else {
-                ALOGW("%s(%d): thread %d no more overflow buffers",
-                        __func__, mId, (int)mThreadIoHandle);
-                // TODO: return error for this.
-            }
+            queueBuffer(inBuffer);
         }
     }
 
@@ -2165,6 +2180,29 @@
     return frames - inBuffer.frameCount;  // number of frames consumed.
 }
 
+void AudioFlinger::PlaybackThread::OutputTrack::queueBuffer(Buffer& inBuffer) {
+
+    if (mBufferQueue.size() < kMaxOverFlowBuffers) {
+        Buffer *pInBuffer = new Buffer;
+        const size_t bufferSize = inBuffer.frameCount * mFrameSize;
+        pInBuffer->mBuffer = malloc(bufferSize);
+        LOG_ALWAYS_FATAL_IF(pInBuffer->mBuffer == nullptr,
+                "%s: Unable to malloc size %zu", __func__, bufferSize);
+        pInBuffer->frameCount = inBuffer.frameCount;
+        pInBuffer->raw = pInBuffer->mBuffer;
+        memcpy(pInBuffer->raw, inBuffer.raw, inBuffer.frameCount * mFrameSize);
+        mBufferQueue.add(pInBuffer);
+        ALOGV("%s(%d): thread %d adding overflow buffer %zu", __func__, mId,
+                (int)mThreadIoHandle, mBufferQueue.size());
+        // audio data is consumed (stored locally); set frameCount to 0.
+        inBuffer.frameCount = 0;
+    } else {
+        ALOGW("%s(%d): thread %d no more overflow buffers",
+                __func__, mId, (int)mThreadIoHandle);
+        // TODO: return error for this.
+    }
+}
+
 void AudioFlinger::PlaybackThread::OutputTrack::copyMetadataTo(MetadataInserter& backInserter) const
 {
     std::lock_guard<std::mutex> lock(mTrackMetadatasMutex);