Add recognition session tokens

A recognition event can race with a start/stop/start in quick
succession. Attributing the event to the incorrect startReco leads to
state mismatch between lower/upper layers.

- Associate each startRecognition with an IBinder token at the STModule
  layer
- Invalidate the token field on downwards stop calls, and drop callbacks
  when no session is active
- Add the token as a field to framework recognition event, so async
  clients can drop stale events
- Additionally, add protection against a startRecognition while the lock
  is dropped in stopRecognition
- Call new startReco version in STService, and drop callbacks for stale
  sessions
- Drop wtf on stale callback in the HAL enforcer, since we handle it
  gracefully at a higher layer

Bug: 236826280
Fixes: 275080257
Test: SoundTriggerManagerTest#
    testStartTriggerStopRecognitionRace_doesNotFail
Test: CtsVoiceInteractionTestCases
Test: FrameworksVoiceInteractionTests
Test: Smoke tests
Change-Id: I8a613b5f6821636e545309c09e6dfbb67626ea2b
diff --git a/core/java/android/hardware/soundtrigger/ConversionUtil.java b/core/java/android/hardware/soundtrigger/ConversionUtil.java
index 21fe686..5c07fa4 100644
--- a/core/java/android/hardware/soundtrigger/ConversionUtil.java
+++ b/core/java/android/hardware/soundtrigger/ConversionUtil.java
@@ -232,7 +232,8 @@
                 recognitionEvent.captureAvailable, captureSession, recognitionEvent.captureDelayMs,
                 recognitionEvent.capturePreambleMs, recognitionEvent.triggerInData, audioFormat,
                 recognitionEvent.data,
-                recognitionEvent.recognitionStillActive, aidlEvent.halEventReceivedMillis);
+                recognitionEvent.recognitionStillActive, aidlEvent.halEventReceivedMillis,
+                aidlEvent.token);
     }
 
     public static SoundTrigger.RecognitionEvent aidl2apiPhraseRecognitionEvent(
@@ -254,7 +255,8 @@
                 recognitionEvent.common.captureDelayMs,
                 recognitionEvent.common.capturePreambleMs, recognitionEvent.common.triggerInData,
                 audioFormat,
-                recognitionEvent.common.data, apiExtras, aidlEvent.halEventReceivedMillis);
+                recognitionEvent.common.data, apiExtras, aidlEvent.halEventReceivedMillis,
+                aidlEvent.token);
     }
 
     // In case of a null input returns a non-null valid output.
diff --git a/core/java/android/hardware/soundtrigger/SoundTrigger.java b/core/java/android/hardware/soundtrigger/SoundTrigger.java
index 6d43ddf..301b412 100644
--- a/core/java/android/hardware/soundtrigger/SoundTrigger.java
+++ b/core/java/android/hardware/soundtrigger/SoundTrigger.java
@@ -63,6 +63,7 @@
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Locale;
+import java.util.Objects;
 import java.util.UUID;
 
 /**
@@ -1226,6 +1227,14 @@
         @ElapsedRealtimeLong
         public final long halEventReceivedMillis;
 
+        /**
+         * Binder token returned by {@link SoundTriggerModule#startRecognitionWithToken(
+         * int soundModelHandle, SoundTrigger.RecognitionConfig config)}
+         * @hide
+         */
+        public final IBinder token;
+
+
         /** @hide */
         @TestApi
         @UnsupportedAppUsage(maxTargetSdk = Build.VERSION_CODES.R, trackingBug = 170729553)
@@ -1235,14 +1244,16 @@
                 @ElapsedRealtimeLong long halEventReceivedMillis) {
             this(status, soundModelHandle, captureAvailable,
                     captureSession, captureDelayMs, capturePreambleMs, triggerInData, captureFormat,
-                    data, status == RECOGNITION_STATUS_GET_STATE_RESPONSE, halEventReceivedMillis);
+                    data, status == RECOGNITION_STATUS_GET_STATE_RESPONSE, halEventReceivedMillis,
+                    null);
         }
 
         /** @hide */
         public RecognitionEvent(int status, int soundModelHandle, boolean captureAvailable,
                 int captureSession, int captureDelayMs, int capturePreambleMs,
                 boolean triggerInData, @NonNull AudioFormat captureFormat, @Nullable byte[] data,
-                boolean recognitionStillActive, @ElapsedRealtimeLong long halEventReceivedMillis) {
+                boolean recognitionStillActive, @ElapsedRealtimeLong long halEventReceivedMillis,
+                IBinder token) {
             this.status = status;
             this.soundModelHandle = soundModelHandle;
             this.captureAvailable = captureAvailable;
@@ -1254,6 +1265,7 @@
             this.data = data != null ? data : new byte[0];
             this.recognitionStillActive = recognitionStillActive;
             this.halEventReceivedMillis = halEventReceivedMillis;
+            this.token = token;
         }
 
         /**
@@ -1311,6 +1323,16 @@
             return halEventReceivedMillis;
         }
 
+        /**
+         * Get token associated with this recognition session returned by
+         *{@link SoundTriggerModule#startRecognitionWithToken(
+         * int soundModelHandle, SoundTrigger.RecognitionConfig config)}
+         * @hide
+         */
+        public IBinder getToken() {
+            return token;
+        }
+
         /** @hide */
         public static final @android.annotation.NonNull Parcelable.Creator<RecognitionEvent> CREATOR
                 = new Parcelable.Creator<RecognitionEvent>() {
@@ -1346,9 +1368,10 @@
             byte[] data = in.readBlob();
             boolean recognitionStillActive = in.readBoolean();
             long halEventReceivedMillis = in.readLong();
+            IBinder token = in.readStrongBinder();
             return new RecognitionEvent(status, soundModelHandle, captureAvailable, captureSession,
                     captureDelayMs, capturePreambleMs, triggerInData, captureFormat, data,
-                    recognitionStillActive, halEventReceivedMillis);
+                    recognitionStillActive, halEventReceivedMillis, token);
         }
 
         /** @hide */
@@ -1376,6 +1399,7 @@
             dest.writeBlob(data);
             dest.writeBoolean(recognitionStillActive);
             dest.writeLong(halEventReceivedMillis);
+            dest.writeStrongBinder(token);
         }
         @Override
         public int hashCode() {
@@ -1396,6 +1420,7 @@
             result = prime * result + status;
             result = result + (recognitionStillActive ? 1289 : 1291);
             result = prime * result + Long.hashCode(halEventReceivedMillis);
+            result = prime * result +  Objects.hashCode(token);
             return result;
         }
 
@@ -1425,6 +1450,9 @@
             if (halEventReceivedMillis != other.halEventReceivedMillis) {
                 return false;
             }
+            if (!Objects.equals(token, other.token)) {
+                return false;
+            }
             if (status != other.status)
                 return false;
             if (triggerInData != other.triggerInData)
@@ -1462,8 +1490,8 @@
                     + ", data=" + (data == null ? 0 : data.length)
                     + ", recognitionStillActive=" + recognitionStillActive
                     + ", halEventReceivedMillis=" + halEventReceivedMillis
-                    + "]";
-        }
+                    + ", token=" + token
+                    + "]"; }
     }
 
     /**
@@ -1886,10 +1914,12 @@
                 int captureSession, int captureDelayMs, int capturePreambleMs,
                 boolean triggerInData, @NonNull AudioFormat captureFormat, @Nullable byte[] data,
                 @Nullable KeyphraseRecognitionExtra[] keyphraseExtras,
-                @ElapsedRealtimeLong long halEventReceivedMillis) {
+                @ElapsedRealtimeLong long halEventReceivedMillis,
+                IBinder token) {
             this(status, soundModelHandle, captureAvailable, captureSession, captureDelayMs,
                     capturePreambleMs, triggerInData, captureFormat, data, keyphraseExtras,
-                    status == RECOGNITION_STATUS_GET_STATE_RESPONSE, halEventReceivedMillis);
+                    status == RECOGNITION_STATUS_GET_STATE_RESPONSE, halEventReceivedMillis,
+                    token);
         }
 
         public KeyphraseRecognitionEvent(int status, int soundModelHandle,
@@ -1897,10 +1927,11 @@
                 int captureSession, int captureDelayMs, int capturePreambleMs,
                 boolean triggerInData, @NonNull AudioFormat captureFormat, @Nullable byte[] data,
                 @Nullable KeyphraseRecognitionExtra[] keyphraseExtras,
-                boolean recognitionStillActive, @ElapsedRealtimeLong long halEventReceivedMillis) {
+                boolean recognitionStillActive, @ElapsedRealtimeLong long halEventReceivedMillis,
+                IBinder token) {
             super(status, soundModelHandle, captureAvailable,
                     captureSession, captureDelayMs, capturePreambleMs, triggerInData, captureFormat,
-                    data, recognitionStillActive, halEventReceivedMillis);
+                    data, recognitionStillActive, halEventReceivedMillis, token);
             this.keyphraseExtras =
                     keyphraseExtras != null ? keyphraseExtras : new KeyphraseRecognitionExtra[0];
         }
@@ -1938,12 +1969,13 @@
             byte[] data = in.readBlob();
             boolean recognitionStillActive = in.readBoolean();
             long halEventReceivedMillis = in.readLong();
+            IBinder token = in.readStrongBinder();
             KeyphraseRecognitionExtra[] keyphraseExtras =
                     in.createTypedArray(KeyphraseRecognitionExtra.CREATOR);
             return new KeyphraseRecognitionEvent(status, soundModelHandle,
                     captureAvailable, captureSession, captureDelayMs, capturePreambleMs,
                     triggerInData, captureFormat, data, keyphraseExtras, recognitionStillActive,
-                    halEventReceivedMillis);
+                    halEventReceivedMillis, token);
         }
 
         @Override
@@ -1966,6 +1998,7 @@
             dest.writeBlob(data);
             dest.writeBoolean(recognitionStillActive);
             dest.writeLong(halEventReceivedMillis);
+            dest.writeStrongBinder(token);
             dest.writeTypedArray(keyphraseExtras, flags);
         }
 
@@ -2015,6 +2048,7 @@
                     + ", data=" + (data == null ? 0 : data.length)
                     + ", recognitionStillActive=" + recognitionStillActive
                     + ", halEventReceivedMillis=" + halEventReceivedMillis
+                    + ", token=" + token
                     + "]";
         }
     }
@@ -2030,20 +2064,23 @@
         public GenericRecognitionEvent(int status, int soundModelHandle, boolean captureAvailable,
                 int captureSession, int captureDelayMs, int capturePreambleMs,
                 boolean triggerInData, @NonNull AudioFormat captureFormat, @Nullable byte[] data,
-                @ElapsedRealtimeLong long halEventReceivedMillis) {
+                @ElapsedRealtimeLong long halEventReceivedMillis,
+                IBinder token) {
             this(status, soundModelHandle, captureAvailable,
                     captureSession, captureDelayMs,
                     capturePreambleMs, triggerInData, captureFormat, data,
-                    status == RECOGNITION_STATUS_GET_STATE_RESPONSE, halEventReceivedMillis);
+                    status == RECOGNITION_STATUS_GET_STATE_RESPONSE,
+                    halEventReceivedMillis, token);
         }
 
         public GenericRecognitionEvent(int status, int soundModelHandle, boolean captureAvailable,
                 int captureSession, int captureDelayMs, int capturePreambleMs,
                 boolean triggerInData, @NonNull AudioFormat captureFormat, @Nullable byte[] data,
-                boolean recognitionStillActive, @ElapsedRealtimeLong long halEventReceivedMillis) {
+                boolean recognitionStillActive, @ElapsedRealtimeLong long halEventReceivedMillis,
+                IBinder token) {
             super(status, soundModelHandle, captureAvailable,
                     captureSession, captureDelayMs, capturePreambleMs, triggerInData, captureFormat,
-                    data, recognitionStillActive, halEventReceivedMillis);
+                    data, recognitionStillActive, halEventReceivedMillis, token);
         }
 
         public static final @android.annotation.NonNull Parcelable.Creator<GenericRecognitionEvent> CREATOR
@@ -2062,7 +2099,7 @@
             return new GenericRecognitionEvent(event.status, event.soundModelHandle,
                     event.captureAvailable, event.captureSession, event.captureDelayMs,
                     event.capturePreambleMs, event.triggerInData, event.captureFormat, event.data,
-                    event.recognitionStillActive, event.halEventReceivedMillis);
+                    event.recognitionStillActive, event.halEventReceivedMillis, event.token);
         }
 
         @Override
@@ -2092,7 +2129,7 @@
      *
      * @hide
      */
-    static int handleException(Exception e) {
+    public static int handleException(Exception e) {
         Log.w(TAG, "Exception caught", e);
         if (e instanceof RemoteException) {
             return STATUS_DEAD_OBJECT;
diff --git a/core/java/android/hardware/soundtrigger/SoundTriggerModule.java b/core/java/android/hardware/soundtrigger/SoundTriggerModule.java
index 5cdbe23..48d4ea4 100644
--- a/core/java/android/hardware/soundtrigger/SoundTriggerModule.java
+++ b/core/java/android/hardware/soundtrigger/SoundTriggerModule.java
@@ -247,6 +247,16 @@
     }
 
     /**
+     * Same as above, but return a binder token associated with the session.
+     * @hide
+     */
+    public synchronized IBinder startRecognitionWithToken(int soundModelHandle,
+            SoundTrigger.RecognitionConfig config) throws RemoteException {
+        return mService.startRecognition(soundModelHandle,
+                ConversionUtil.api2aidlRecognitionConfig(config));
+    }
+
+    /**
      * Stop listening to all key phrases in a {@link SoundTrigger.SoundModel}
      * @param soundModelHandle The sound model handle to stop listening to
      * @return - {@link SoundTrigger#STATUS_OK} in case of success
diff --git a/core/java/android/service/voice/AlwaysOnHotwordDetector.java b/core/java/android/service/voice/AlwaysOnHotwordDetector.java
index 91c350a..5fa1a1e 100644
--- a/core/java/android/service/voice/AlwaysOnHotwordDetector.java
+++ b/core/java/android/service/voice/AlwaysOnHotwordDetector.java
@@ -970,7 +970,8 @@
                         new KeyphraseRecognitionEvent(status, soundModelHandle, captureAvailable,
                                 captureSession, captureDelayMs, capturePreambleMs, triggerInData,
                                 captureFormat, data, keyphraseRecognitionExtras.toArray(
-                                new KeyphraseRecognitionExtra[0]), halEventReceivedMillis),
+                                new KeyphraseRecognitionExtra[0]), halEventReceivedMillis,
+                                new Binder()),
                         mInternalCallback);
             } catch (RemoteException e) {
                 throw e.rethrowFromSystemServer();
diff --git a/media/aidl/android/media/soundtrigger_middleware/ISoundTriggerModule.aidl b/media/aidl/android/media/soundtrigger_middleware/ISoundTriggerModule.aidl
index 18688ce..4bdefd0 100644
--- a/media/aidl/android/media/soundtrigger_middleware/ISoundTriggerModule.aidl
+++ b/media/aidl/android/media/soundtrigger_middleware/ISoundTriggerModule.aidl
@@ -79,8 +79,9 @@
      *
      * May throw a ServiceSpecificException with an RESOURCE_CONTENTION status to indicate that
      * resources required for starting the model are currently consumed by other clients.
+     * @return - A token delivered along with future recognition events.
      */
-    void startRecognition(int modelHandle, in RecognitionConfig config);
+    IBinder startRecognition(int modelHandle, in RecognitionConfig config);
 
     /**
      * Stop a recognition of a previously active recognition. Will NOT generate a recognition event.
diff --git a/media/aidl/android/media/soundtrigger_middleware/PhraseRecognitionEventSys.aidl b/media/aidl/android/media/soundtrigger_middleware/PhraseRecognitionEventSys.aidl
index 6c912ed..d9d16ec 100644
--- a/media/aidl/android/media/soundtrigger_middleware/PhraseRecognitionEventSys.aidl
+++ b/media/aidl/android/media/soundtrigger_middleware/PhraseRecognitionEventSys.aidl
@@ -33,4 +33,9 @@
      */
     // @ElapsedRealtimeLong
     long halEventReceivedMillis = -1;
+    /**
+     * Token relating this event to a particular recognition session, returned by
+     * {@link ISoundTriggerModule.startRecognition(int, RecognitionConfig}
+     */
+    IBinder token;
 }
diff --git a/media/aidl/android/media/soundtrigger_middleware/RecognitionEventSys.aidl b/media/aidl/android/media/soundtrigger_middleware/RecognitionEventSys.aidl
index 84e327d..20ec8c2 100644
--- a/media/aidl/android/media/soundtrigger_middleware/RecognitionEventSys.aidl
+++ b/media/aidl/android/media/soundtrigger_middleware/RecognitionEventSys.aidl
@@ -33,4 +33,9 @@
      */
     // @ElapsedRealtimeLong
     long halEventReceivedMillis = -1;
+    /**
+     * Token relating this event to a particular recognition session, returned by
+     * {@link ISoundTriggerModule.startRecognition(int, RecognitionConfig}
+     */
+    IBinder token;
 }
diff --git a/services/tests/voiceinteractiontests/src/com/android/server/soundtrigger/SoundTriggerTest.java b/services/tests/voiceinteractiontests/src/com/android/server/soundtrigger/SoundTriggerTest.java
index e6a1be8..35170b3 100644
--- a/services/tests/voiceinteractiontests/src/com/android/server/soundtrigger/SoundTriggerTest.java
+++ b/services/tests/voiceinteractiontests/src/com/android/server/soundtrigger/SoundTriggerTest.java
@@ -28,6 +28,7 @@
 import android.test.InstrumentationTestCase;
 import android.test.suitebuilder.annotation.LargeTest;
 import android.test.suitebuilder.annotation.SmallTest;
+import android.os.Binder;
 
 import java.util.Arrays;
 import java.util.Locale;
@@ -346,7 +347,8 @@
                         .build(),
                 null /* data */,
                 null /* keyphraseExtras */,
-                12345678 /* halEventReceivedMillis */);
+                12345678 /* halEventReceivedMillis */,
+                new Binder() /* token */);
 
         // Write to a parcel
         Parcel parcel = Parcel.obtain();
@@ -379,7 +381,8 @@
                         .build(),
                 new byte[1] /* data */,
                 kpExtra,
-                12345678 /* halEventReceivedMillis */);
+                12345678 /* halEventReceivedMillis */,
+                new Binder() /* token */);
 
         // Write to a parcel
         Parcel parcel = Parcel.obtain();
@@ -428,7 +431,8 @@
                         .build(),
                 data,
                 kpExtra,
-                12345678 /* halEventReceivedMillis */);
+                12345678 /* halEventReceivedMillis */,
+                new Binder() /* token */);
 
         // Write to a parcel
         Parcel parcel = Parcel.obtain();
diff --git a/services/voiceinteraction/java/com/android/server/soundtrigger/SoundTriggerHelper.java b/services/voiceinteraction/java/com/android/server/soundtrigger/SoundTriggerHelper.java
index c485501..4404ae6 100644
--- a/services/voiceinteraction/java/com/android/server/soundtrigger/SoundTriggerHelper.java
+++ b/services/voiceinteraction/java/com/android/server/soundtrigger/SoundTriggerHelper.java
@@ -40,6 +40,7 @@
 import android.os.Binder;
 import android.os.DeadObjectException;
 import android.os.Handler;
+import android.os.IBinder;
 import android.os.Looper;
 import android.os.Message;
 import android.os.RemoteException;
@@ -769,6 +770,10 @@
             return;
         }
         ModelData model = getModelDataForLocked(event.soundModelHandle);
+        if (!Objects.equals(event.getToken(), model.getToken())) {
+            // Stale event, do nothing
+            return;
+        }
         if (model == null || !model.isGenericModel()) {
             Slog.w(TAG, "Generic recognition event: Model does not exist for handle: "
                     + event.soundModelHandle);
@@ -851,6 +856,10 @@
         Slog.w(TAG, "Recognition aborted");
         MetricsLogger.count(mContext, "sth_recognition_aborted", 1);
         ModelData modelData = getModelDataForLocked(event.soundModelHandle);
+        if (!Objects.equals(event.getToken(), modelData.getToken())) {
+            // Stale event, do nothing
+            return;
+        }
         if (modelData != null && modelData.isModelStarted()) {
             modelData.setStopped();
             try {
@@ -888,6 +897,10 @@
         MetricsLogger.count(mContext, "sth_keyphrase_recognition_event", 1);
         int keyphraseId = getKeyphraseIdFromEvent(event);
         ModelData modelData = getKeyphraseModelDataLocked(keyphraseId);
+        if (!Objects.equals(event.getToken(), modelData.getToken())) {
+            // Stale event, do nothing
+            return;
+        }
 
         if (modelData == null || !modelData.isKeyphraseModel()) {
             Slog.e(TAG, "Keyphase model data does not exist for ID:" + keyphraseId);
@@ -1184,7 +1197,12 @@
         if (mModule == null) {
             return STATUS_ERROR;
         }
-        int status = mModule.startRecognition(modelData.getHandle(), config);
+        int status = STATUS_OK;
+        try {
+            modelData.setToken(mModule.startRecognitionWithToken(modelData.getHandle(), config));
+        } catch (Exception e) {
+            status = SoundTrigger.handleException(e);
+        }
         if (status != SoundTrigger.STATUS_OK) {
             Slog.w(TAG, "startRecognition failed with " + status);
             MetricsLogger.count(mContext, "sth_start_recognition_error", 1);
@@ -1339,6 +1357,9 @@
         // The SoundModel instance, one of KeyphraseSoundModel or GenericSoundModel.
         private SoundModel mSoundModel = null;
 
+        // Token used to disambiguate recognition sessions.
+        private IBinder mRecognitionToken = null;
+
         private ModelData(UUID modelId, int modelType) {
             mModelId = modelId;
             // Private constructor, since we require modelType to be one of TYPE_GENERIC,
@@ -1381,6 +1402,9 @@
         }
 
         synchronized void setStopped() {
+            // If we are moving to the stopped state, we should clear out our
+            // startRecognition token
+            mRecognitionToken = null;
             mModelState = MODEL_LOADED;
         }
 
@@ -1452,6 +1476,14 @@
             return mSoundModel;
         }
 
+        synchronized IBinder getToken() {
+            return mRecognitionToken;
+        }
+
+        synchronized void setToken(IBinder token) {
+            mRecognitionToken = token;
+        }
+
         synchronized int getModelType() {
             return mModelType;
         }
diff --git a/services/voiceinteraction/java/com/android/server/soundtrigger_middleware/SoundTriggerHalEnforcer.java b/services/voiceinteraction/java/com/android/server/soundtrigger_middleware/SoundTriggerHalEnforcer.java
index bac2466..c3e0a3c 100644
--- a/services/voiceinteraction/java/com/android/server/soundtrigger_middleware/SoundTriggerHalEnforcer.java
+++ b/services/voiceinteraction/java/com/android/server/soundtrigger_middleware/SoundTriggerHalEnforcer.java
@@ -256,7 +256,7 @@
         public void recognitionCallback(int model, RecognitionEventSys event) {
             synchronized (mModelStates) {
                 ModelState state = mModelStates.get(model);
-                if (state == null || state == ModelState.INACTIVE) {
+                if (state == null) {
                     Log.wtfStack(TAG, "Unexpected recognition event for model: " + model);
                     reboot();
                     return;
@@ -282,7 +282,7 @@
         public void phraseRecognitionCallback(int model, PhraseRecognitionEventSys event) {
             synchronized (mModelStates) {
                 ModelState state = mModelStates.get(model);
-                if (state == null || state == ModelState.INACTIVE) {
+                if (state == null) {
                     Log.wtfStack(TAG, "Unexpected recognition event for model: " + model);
                     reboot();
                     return;
diff --git a/services/voiceinteraction/java/com/android/server/soundtrigger_middleware/SoundTriggerMiddlewareLogging.java b/services/voiceinteraction/java/com/android/server/soundtrigger_middleware/SoundTriggerMiddlewareLogging.java
index 2ee4e3c..ecd65ae 100644
--- a/services/voiceinteraction/java/com/android/server/soundtrigger_middleware/SoundTriggerMiddlewareLogging.java
+++ b/services/voiceinteraction/java/com/android/server/soundtrigger_middleware/SoundTriggerMiddlewareLogging.java
@@ -241,13 +241,14 @@
         }
 
         @Override
-        public void startRecognition(int modelHandle, RecognitionConfig config)
+        public IBinder startRecognition(int modelHandle, RecognitionConfig config)
                 throws RemoteException {
             try {
-                mDelegate.startRecognition(modelHandle, config);
-                mEventLogger.enqueue(SessionEvent.createForVoid(
-                            START_RECOGNITION, modelHandle, config)
+                var result = mDelegate.startRecognition(modelHandle, config);
+                mEventLogger.enqueue(SessionEvent.createForReturn(
+                            START_RECOGNITION, result, modelHandle, config)
                         .printLog(ALOGI, TAG));
+                return result;
             } catch (Exception e) {
                 mEventLogger.enqueue(SessionEvent.createForException(
                             START_RECOGNITION, e, modelHandle, config)
diff --git a/services/voiceinteraction/java/com/android/server/soundtrigger_middleware/SoundTriggerMiddlewarePermission.java b/services/voiceinteraction/java/com/android/server/soundtrigger_middleware/SoundTriggerMiddlewarePermission.java
index 00b894e..6b724de 100644
--- a/services/voiceinteraction/java/com/android/server/soundtrigger_middleware/SoundTriggerMiddlewarePermission.java
+++ b/services/voiceinteraction/java/com/android/server/soundtrigger_middleware/SoundTriggerMiddlewarePermission.java
@@ -241,10 +241,10 @@
         }
 
         @Override
-        public void startRecognition(int modelHandle, @NonNull RecognitionConfig config)
+        public IBinder startRecognition(int modelHandle, @NonNull RecognitionConfig config)
                 throws RemoteException {
             enforcePermissions();
-            mDelegate.startRecognition(modelHandle, config);
+            return mDelegate.startRecognition(modelHandle, config);
         }
 
         @Override
diff --git a/services/voiceinteraction/java/com/android/server/soundtrigger_middleware/SoundTriggerMiddlewareService.java b/services/voiceinteraction/java/com/android/server/soundtrigger_middleware/SoundTriggerMiddlewareService.java
index 91e5466..1558acf 100644
--- a/services/voiceinteraction/java/com/android/server/soundtrigger_middleware/SoundTriggerMiddlewareService.java
+++ b/services/voiceinteraction/java/com/android/server/soundtrigger_middleware/SoundTriggerMiddlewareService.java
@@ -34,6 +34,7 @@
 import android.media.soundtrigger_middleware.ISoundTriggerMiddlewareService;
 import android.media.soundtrigger_middleware.ISoundTriggerModule;
 import android.media.soundtrigger_middleware.SoundTriggerModuleDescriptor;
+import android.os.IBinder;
 import android.os.RemoteException;
 
 import com.android.server.SystemService;
@@ -176,10 +177,10 @@
         }
 
         @Override
-        public void startRecognition(int modelHandle, RecognitionConfig config)
+        public IBinder startRecognition(int modelHandle, RecognitionConfig config)
                 throws RemoteException {
             try (SafeCloseable ignored = ClearCallingIdentityContext.create()) {
-                mDelegate.startRecognition(modelHandle, config);
+                return mDelegate.startRecognition(modelHandle, config);
             }
         }
 
diff --git a/services/voiceinteraction/java/com/android/server/soundtrigger_middleware/SoundTriggerMiddlewareValidation.java b/services/voiceinteraction/java/com/android/server/soundtrigger_middleware/SoundTriggerMiddlewareValidation.java
index f208c03..2924c12 100644
--- a/services/voiceinteraction/java/com/android/server/soundtrigger_middleware/SoundTriggerMiddlewareValidation.java
+++ b/services/voiceinteraction/java/com/android/server/soundtrigger_middleware/SoundTriggerMiddlewareValidation.java
@@ -434,7 +434,7 @@
         }
 
         @Override
-        public void startRecognition(int modelHandle, @NonNull RecognitionConfig config) {
+        public IBinder startRecognition(int modelHandle, @NonNull RecognitionConfig config) {
             // Input validation.
             ValidationUtil.validateRecognitionConfig(config);
 
@@ -458,9 +458,10 @@
 
                 // From here on, every exception isn't client's fault.
                 try {
-                    mDelegate.startRecognition(modelHandle, config);
+                    var result = mDelegate.startRecognition(modelHandle, config);
                     modelState.config = config;
                     modelState.activityState = ModelState.Activity.ACTIVE;
+                    return result;
                 } catch (Exception e) {
                     throw handleException(e);
                 }
diff --git a/services/voiceinteraction/java/com/android/server/soundtrigger_middleware/SoundTriggerModule.java b/services/voiceinteraction/java/com/android/server/soundtrigger_middleware/SoundTriggerModule.java
index 177b4da..083211c 100644
--- a/services/voiceinteraction/java/com/android/server/soundtrigger_middleware/SoundTriggerModule.java
+++ b/services/voiceinteraction/java/com/android/server/soundtrigger_middleware/SoundTriggerModule.java
@@ -303,10 +303,10 @@
         }
 
         @Override
-        public void startRecognition(int modelHandle, @NonNull RecognitionConfig config) {
+        public IBinder startRecognition(int modelHandle, @NonNull RecognitionConfig config) {
             synchronized (SoundTriggerModule.this) {
                 checkValid();
-                mLoadedModels.get(modelHandle).startRecognition(config);
+                return mLoadedModels.get(modelHandle).startRecognition(config);
             }
         }
 
@@ -385,6 +385,8 @@
             public int mHandle;
             private ModelState mState = ModelState.INIT;
             private SoundTriggerMiddlewareImpl.AudioSessionProvider.AudioSession mSession;
+            private IBinder mRecognitionToken = null;
+            private boolean mIsStopping = false;
 
             private @NonNull
             ModelState getState() {
@@ -425,10 +427,15 @@
                 return mSession.mSessionHandle;
             }
 
-            private void startRecognition(@NonNull RecognitionConfig config) {
+            private IBinder startRecognition(@NonNull RecognitionConfig config) {
+                if (mIsStopping == true) {
+                    throw new RecoverableException(Status.INTERNAL_ERROR, "Race occurred");
+                }
                 mHalService.startRecognition(mHandle, mSession.mDeviceHandle,
                         mSession.mIoHandle, config);
+                mRecognitionToken = new Binder();
                 setState(ModelState.ACTIVE);
+                return mRecognitionToken;
             }
 
             private void stopRecognition() {
@@ -437,9 +444,12 @@
                         // This call is idempotent in order to avoid races.
                         return;
                     }
+                    mRecognitionToken = null;
+                    mIsStopping = true;
                 }
                 mHalService.stopRecognition(mHandle);
                 synchronized (SoundTriggerModule.this) {
+                    mIsStopping = false;
                     setState(ModelState.LOADED);
                 }
             }
@@ -474,9 +484,13 @@
                     @NonNull RecognitionEventSys event) {
                 ISoundTriggerCallback callback;
                 synchronized (SoundTriggerModule.this) {
+                    if (mRecognitionToken == null) {
+                        return;
+                    }
                     if (!event.recognitionEvent.recognitionStillActive) {
                         setState(ModelState.LOADED);
                     }
+                    event.token = mRecognitionToken;
                     callback = mCallback;
                 }
                 // The callback must be invoked outside of the lock.
@@ -495,12 +509,15 @@
                     @NonNull PhraseRecognitionEventSys event) {
                 ISoundTriggerCallback callback;
                 synchronized (SoundTriggerModule.this) {
+                    if (mRecognitionToken == null) {
+                        return;
+                    }
                     if (!event.phraseRecognitionEvent.common.recognitionStillActive) {
                         setState(ModelState.LOADED);
                     }
+                    event.token = mRecognitionToken;
                     callback = mCallback;
                 }
-
                 // The callback must be invoked outside of the lock.
                 try {
                     if (callback != null) {