Update neuralnetworks HAL to allow collecting execution duration.

Test: VtsHalNeuralnetworksV1_0TargetTest --hal_service_instance=android.hardware.neuralnetworks@1.0::IDevice/sample-all
Test: VtsHalNeuralnetworksV1_1TargetTest --hal_service_instance=android.hardware.neuralnetworks@1.1::IDevice/sample-all
Test: VtsHalNeuralnetworksV1_2TargetTest --hal_service_instance=android.hardware.neuralnetworks@1.2::IDevice/sample-all

Bug: 115390094

Change-Id: If67a5ffe39cfdd78498e01f26251734fdc8e66c7
diff --git a/neuralnetworks/1.0/vts/functional/Callbacks.cpp b/neuralnetworks/1.0/vts/functional/Callbacks.cpp
index 03afcd0..c30702c 100644
--- a/neuralnetworks/1.0/vts/functional/Callbacks.cpp
+++ b/neuralnetworks/1.0/vts/functional/Callbacks.cpp
@@ -135,14 +135,18 @@
 
 Return<void> ExecutionCallback::notify(ErrorStatus errorStatus) {
     mErrorStatus = errorStatus;
+    mOutputShapes = {};
+    mTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX};
     CallbackBase::notify();
     return Void();
 }
 
 Return<void> ExecutionCallback::notify_1_2(ErrorStatus errorStatus,
-                                           const hidl_vec<OutputShape>& outputShapes) {
+                                           const hidl_vec<OutputShape>& outputShapes,
+                                           const Timing& timing) {
     mErrorStatus = errorStatus;
     mOutputShapes = outputShapes;
+    mTiming = timing;
     CallbackBase::notify();
     return Void();
 }
@@ -157,6 +161,11 @@
     return mOutputShapes;
 }
 
+Timing ExecutionCallback::getTiming() {
+    wait();
+    return mTiming;
+}
+
 }  // namespace implementation
 }  // namespace V1_2
 }  // namespace neuralnetworks
diff --git a/neuralnetworks/1.0/vts/functional/Callbacks.h b/neuralnetworks/1.0/vts/functional/Callbacks.h
index 46f29a6..4707d0a 100644
--- a/neuralnetworks/1.0/vts/functional/Callbacks.h
+++ b/neuralnetworks/1.0/vts/functional/Callbacks.h
@@ -308,8 +308,20 @@
      *                     of the output operand in the Request outputs vector.
      *                     outputShapes must be empty unless the status is either
      *                     NONE or OUTPUT_INSUFFICIENT_SIZE.
+     * @return Timing Duration of execution. Unless MeasureTiming::YES was passed when
+     *                launching the execution and status is NONE, all times must
+     *                be reported as UINT64_MAX. A driver may choose to report
+     *                any time as UINT64_MAX, indicating that particular measurement is
+     *                not available.
      */
-    Return<void> notify_1_2(ErrorStatus status, const hidl_vec<OutputShape>& outputShapes) override;
+    Return<void> notify_1_2(ErrorStatus status, const hidl_vec<OutputShape>& outputShapes,
+                            const Timing& timing) override;
+
+    // An overload of the latest notify interface to hide the version from ExecutionBuilder.
+    Return<void> notify(ErrorStatus status, const hidl_vec<OutputShape>& outputShapes,
+                        const Timing& timing) {
+        return notify_1_2(status, outputShapes, timing);
+    }
 
     /**
      * Retrieves the error status returned from the asynchronous task launched
@@ -350,9 +362,24 @@
      */
     const std::vector<OutputShape>& getOutputShapes();
 
+    /**
+     * Retrieves the duration of execution ofthe asynchronous task launched
+     * by IPreparedModel::execute_1_2. If IPreparedModel::execute_1_2 has not finished
+     * asynchronously executing, this call will block until the asynchronous task
+     * notifies the object.
+     *
+     * If the asynchronous task was launched by IPreparedModel::execute, every time
+     * must be UINT64_MAX.
+     *
+     * @return timing Duration of the execution. Every time must be UINT64_MAX unless
+     *                the status is NONE.
+     */
+    Timing getTiming();
+
    private:
-    ErrorStatus mErrorStatus;
-    std::vector<OutputShape> mOutputShapes;
+    ErrorStatus mErrorStatus = ErrorStatus::GENERAL_FAILURE;
+    std::vector<OutputShape> mOutputShapes = {};
+    Timing mTiming = {};
 };
 
 
diff --git a/neuralnetworks/1.0/vts/functional/GeneratedTestHarness.cpp b/neuralnetworks/1.0/vts/functional/GeneratedTestHarness.cpp
index d45922e..65c425e 100644
--- a/neuralnetworks/1.0/vts/functional/GeneratedTestHarness.cpp
+++ b/neuralnetworks/1.0/vts/functional/GeneratedTestHarness.cpp
@@ -77,29 +77,33 @@
 // Top level driver for models and examples generated by test_generator.py
 // Test driver for those generated from ml/nn/runtime/test/spec
 static Return<ErrorStatus> ExecutePreparedModel(sp<V1_0::IPreparedModel>& preparedModel,
-                                                const Request& request,
+                                                const Request& request, MeasureTiming,
                                                 sp<ExecutionCallback>& callback) {
     return preparedModel->execute(request, callback);
 }
 static Return<ErrorStatus> ExecutePreparedModel(sp<V1_2::IPreparedModel>& preparedModel,
-                                                const Request& request,
+                                                const Request& request, MeasureTiming measure,
                                                 sp<ExecutionCallback>& callback) {
-    return preparedModel->execute_1_2(request, callback);
+    return preparedModel->execute_1_2(request, measure, callback);
 }
 static Return<ErrorStatus> ExecutePreparedModel(sp<V1_0::IPreparedModel>&, const Request&,
-                                                hidl_vec<OutputShape>*) {
+                                                MeasureTiming, hidl_vec<OutputShape>*, Timing*) {
     ADD_FAILURE() << "asking for synchronous execution at V1_0";
     return ErrorStatus::GENERAL_FAILURE;
 }
 static Return<ErrorStatus> ExecutePreparedModel(sp<V1_2::IPreparedModel>& preparedModel,
-                                                const Request& request,
-                                                hidl_vec<OutputShape>* outputShapes) {
+                                                const Request& request, MeasureTiming measure,
+                                                hidl_vec<OutputShape>* outputShapes,
+                                                Timing* timing) {
     ErrorStatus result;
     Return<void> ret = preparedModel->executeSynchronously(
-        request, [&result, &outputShapes](ErrorStatus error, const hidl_vec<OutputShape>& shapes) {
-            result = error;
-            *outputShapes = shapes;
-        });
+            request, measure,
+            [&result, outputShapes, timing](ErrorStatus error, const hidl_vec<OutputShape>& shapes,
+                                            const Timing& time) {
+                result = error;
+                *outputShapes = shapes;
+                *timing = time;
+            });
     if (!ret.isOk()) {
         return ErrorStatus::GENERAL_FAILURE;
     }
@@ -111,9 +115,8 @@
 template <typename T_IPreparedModel>
 void EvaluatePreparedModel(sp<T_IPreparedModel>& preparedModel, std::function<bool(int)> is_ignored,
                            const std::vector<MixedTypedExample>& examples,
-                           bool hasRelaxedFloat32Model = false, float fpAtol = kDefaultAtol,
-                           float fpRtol = kDefaultRtol, Synchronously sync = Synchronously::NO,
-                           bool testDynamicOutputShape = false) {
+                           bool hasRelaxedFloat32Model, float fpAtol, float fpRtol,
+                           Synchronously sync, MeasureTiming measure, bool testDynamicOutputShape) {
     const uint32_t INPUT = 0;
     const uint32_t OUTPUT = 1;
 
@@ -208,6 +211,7 @@
 
         ErrorStatus executionStatus;
         hidl_vec<OutputShape> outputShapes;
+        Timing timing;
         if (sync == Synchronously::NO) {
             SCOPED_TRACE("asynchronous");
 
@@ -215,8 +219,8 @@
             sp<ExecutionCallback> executionCallback = new ExecutionCallback();
             ASSERT_NE(nullptr, executionCallback.get());
             Return<ErrorStatus> executionLaunchStatus = ExecutePreparedModel(
-                preparedModel, {.inputs = inputs_info, .outputs = outputs_info, .pools = pools},
-                executionCallback);
+                    preparedModel, {.inputs = inputs_info, .outputs = outputs_info, .pools = pools},
+                    measure, executionCallback);
             ASSERT_TRUE(executionLaunchStatus.isOk());
             EXPECT_EQ(ErrorStatus::NONE, static_cast<ErrorStatus>(executionLaunchStatus));
 
@@ -224,13 +228,14 @@
             executionCallback->wait();
             executionStatus = executionCallback->getStatus();
             outputShapes = executionCallback->getOutputShapes();
+            timing = executionCallback->getTiming();
         } else {
             SCOPED_TRACE("synchronous");
 
             // execute
             Return<ErrorStatus> executionReturnStatus = ExecutePreparedModel(
-                preparedModel, {.inputs = inputs_info, .outputs = outputs_info, .pools = pools},
-                &outputShapes);
+                    preparedModel, {.inputs = inputs_info, .outputs = outputs_info, .pools = pools},
+                    measure, &outputShapes, &timing);
             ASSERT_TRUE(executionReturnStatus.isOk());
             executionStatus = static_cast<ErrorStatus>(executionReturnStatus);
         }
@@ -244,6 +249,14 @@
             return;
         }
         ASSERT_EQ(ErrorStatus::NONE, executionStatus);
+        if (measure == MeasureTiming::NO) {
+            EXPECT_EQ(UINT64_MAX, timing.timeOnDevice);
+            EXPECT_EQ(UINT64_MAX, timing.timeInDriver);
+        } else {
+            if (timing.timeOnDevice != UINT64_MAX && timing.timeInDriver != UINT64_MAX) {
+                EXPECT_LE(timing.timeOnDevice, timing.timeInDriver);
+            }
+        }
 
         // Go through all outputs, overwrite output dimensions with returned output shapes
         if (testDynamicOutputShape) {
@@ -273,10 +286,10 @@
 template <typename T_IPreparedModel>
 void EvaluatePreparedModel(sp<T_IPreparedModel>& preparedModel, std::function<bool(int)> is_ignored,
                            const std::vector<MixedTypedExample>& examples,
-                           bool hasRelaxedFloat32Model, Synchronously sync,
+                           bool hasRelaxedFloat32Model, Synchronously sync, MeasureTiming measure,
                            bool testDynamicOutputShape) {
     EvaluatePreparedModel(preparedModel, is_ignored, examples, hasRelaxedFloat32Model, kDefaultAtol,
-                          kDefaultRtol, sync, testDynamicOutputShape);
+                          kDefaultRtol, sync, measure, testDynamicOutputShape);
 }
 
 static void getPreparedModel(sp<PreparedModelCallback> callback,
@@ -333,7 +346,7 @@
     float fpAtol = 1e-5f, fpRtol = 5.0f * 1.1920928955078125e-7f;
     EvaluatePreparedModel(preparedModel, is_ignored, examples,
                           /*hasRelaxedFloat32Model=*/false, fpAtol, fpRtol, Synchronously::NO,
-                          /*testDynamicOutputShape=*/false);
+                          MeasureTiming::NO, /*testDynamicOutputShape=*/false);
 }
 
 void Execute(const sp<V1_1::IDevice>& device, std::function<V1_1::Model(void)> create_model,
@@ -380,7 +393,7 @@
 
     EvaluatePreparedModel(preparedModel, is_ignored, examples,
                           model.relaxComputationFloat32toFloat16, 1e-5f, 1e-5f, Synchronously::NO,
-                          /*testDynamicOutputShape=*/false);
+                          MeasureTiming::NO, /*testDynamicOutputShape=*/false);
 }
 
 // TODO: Reduce code duplication.
@@ -429,10 +442,16 @@
 
     EvaluatePreparedModel(preparedModel, is_ignored, examples,
                           model.relaxComputationFloat32toFloat16, Synchronously::NO,
-                          testDynamicOutputShape);
+                          MeasureTiming::NO, testDynamicOutputShape);
     EvaluatePreparedModel(preparedModel, is_ignored, examples,
                           model.relaxComputationFloat32toFloat16, Synchronously::YES,
-                          testDynamicOutputShape);
+                          MeasureTiming::NO, testDynamicOutputShape);
+    EvaluatePreparedModel(preparedModel, is_ignored, examples,
+                          model.relaxComputationFloat32toFloat16, Synchronously::NO,
+                          MeasureTiming::YES, testDynamicOutputShape);
+    EvaluatePreparedModel(preparedModel, is_ignored, examples,
+                          model.relaxComputationFloat32toFloat16, Synchronously::YES,
+                          MeasureTiming::YES, testDynamicOutputShape);
 }
 
 }  // namespace generated_tests
diff --git a/neuralnetworks/1.2/IExecutionCallback.hal b/neuralnetworks/1.2/IExecutionCallback.hal
index 47de1b6..7f6c9ee 100644
--- a/neuralnetworks/1.2/IExecutionCallback.hal
+++ b/neuralnetworks/1.2/IExecutionCallback.hal
@@ -18,7 +18,6 @@
 
 import @1.0::ErrorStatus;
 import @1.0::IExecutionCallback;
-import OutputShape;
 
 /**
  * IExecutionCallback must be used to return the error status result from an
@@ -50,6 +49,11 @@
      *                     of the output operand in the Request outputs vector.
      *                     outputShapes must be empty unless the status is either
      *                     NONE or OUTPUT_INSUFFICIENT_SIZE.
+     * @return Timing Duration of execution. Unless MeasureTiming::YES was passed when
+     *                launching the execution and status is NONE, all times must
+     *                be reported as UINT64_MAX. A driver may choose to report
+     *                any time as UINT64_MAX, indicating that particular measurement is
+     *                not available.
      */
-    oneway notify_1_2(ErrorStatus status, vec<OutputShape> outputShapes);
+  oneway notify_1_2(ErrorStatus status, vec<OutputShape> outputShapes, Timing timing);
 };
diff --git a/neuralnetworks/1.2/IPreparedModel.hal b/neuralnetworks/1.2/IPreparedModel.hal
index 2d4e572..5d2d80f 100644
--- a/neuralnetworks/1.2/IPreparedModel.hal
+++ b/neuralnetworks/1.2/IPreparedModel.hal
@@ -59,6 +59,10 @@
      *
      * @param request The input and output information on which the prepared
      *                model is to be executed.
+     * @param measure Specifies whether or not to measure duration of the execution.
+     *                The duration runs from the time the driver sees the call
+     *                to the execute_1_2 function to the time the driver invokes
+     *                the callback.
      * @param callback A callback object used to return the error status of
      *                 the execution. The callback object's notify function must
      *                 be called exactly once, even if the execution was
@@ -72,7 +76,7 @@
      *                - INVALID_ARGUMENT if one of the input arguments is
      *                  invalid
      */
-    execute_1_2(Request request, IExecutionCallback callback)
+    execute_1_2(Request request, MeasureTiming measure, IExecutionCallback callback)
         generates (ErrorStatus status);
 
     /**
@@ -98,6 +102,10 @@
      *
      * @param request The input and output information on which the prepared
      *                model is to be executed.
+     * @param measure Specifies whether or not to measure duration of the execution.
+     *                The duration runs from the time the driver sees the call
+     *                to the executeSynchronously function to the time the driver
+     *                returns from the function.
      * @return status Error status of the execution, must be:
      *                - NONE if execution is performed successfully
      *                - DEVICE_UNAVAILABLE if driver is offline or busy
@@ -112,9 +120,13 @@
      *                      of the output operand in the Request outputs vector.
      *                      outputShapes must be empty unless the status is either
      *                      NONE or OUTPUT_INSUFFICIENT_SIZE.
+     * @return Timing Duration of execution. Unless measure is YES and status is
+     *                NONE, all times must be reported as UINT64_MAX. A driver may
+     *                choose to report any time as UINT64_MAX, indicating that
+     *                measurement is not available.
      */
-    executeSynchronously(Request request)
-        generates (ErrorStatus status, vec<OutputShape> outputShapes);
+    executeSynchronously(Request request, MeasureTiming measure)
+            generates (ErrorStatus status, vec<OutputShape> outputShapes, Timing timing);
 
     /**
      * Configure a Burst object used to execute multiple inferences on a
diff --git a/neuralnetworks/1.2/types.hal b/neuralnetworks/1.2/types.hal
index ce993d7..8bc28b4 100644
--- a/neuralnetworks/1.2/types.hal
+++ b/neuralnetworks/1.2/types.hal
@@ -447,8 +447,34 @@
 };
 
 /**
- * FmqRequestDatum is a single element of a serialized representation of a
- * {@link @1.0::Request} object which is sent across FastMessageQueue.
+ * Specifies whether or not to measure timing information during execution.
+ */
+enum MeasureTiming : int32_t {
+    NO  = 0,
+    YES = 1,
+};
+
+/**
+
+ * Timing information measured during execution. Each time is a duration from
+ * the beginning of some task to the end of that task, including time when that
+ * task is not active (for example, preempted by some other task, or
+ * waiting for some resource to become available).
+ *
+ * Times are measured in microseconds.
+ * When a time is not available, it must be reported as UINT64_MAX.
+ */
+struct Timing {
+    /** Execution time on device (not driver, which runs on host processor). */
+    uint64_t timeOnDevice;
+    /** Execution time in driver (including time on device). */
+    uint64_t timeInDriver;
+};
+
+/**
+ * FmqRequestDatum is a single element of a serialized representation of an
+ * execution request (a {@link @1.0::Request} object and a {@link MeasureTiming}
+ * value) which is sent across FastMessageQueue.
  *
  * The serialized representation for a particular execution is referred to later
  * in these descriptions as a 'packet'.
@@ -456,7 +482,7 @@
  * FastMessageQueue can only pass HIDL-defined types that do not involve nested
  * buffers, handles, or interfaces.
  *
- * The {@link @1.0::Request} is serialized as follows:
+ * The request is serialized as follows:
  * 1) 'packetInformation'
  * 2) For each input operand:
  *    2.1) 'inputOperandInformation'
@@ -468,6 +494,7 @@
  *         3.2.1) 'outputOperandDimensionValue'
  * 4) For each pool:
  *    4.1) 'poolIdentifier'
+ * 5) 'measureTiming'
  */
 safe_union FmqRequestDatum {
     /**
@@ -561,12 +588,21 @@
      * identifier.
      */
     int32_t poolIdentifier;
+
+    /**
+     * Specifies whether or not to measure duration of the execution. The
+     * duration runs from the time the driver dequeues the request from a
+     * FastMessageQueue to the time the driver enqueues results to a
+     * FastMessageQueue.
+     */
+    MeasureTiming measureTiming;
 };
 
 /**
  * FmqResultDatum is a single element of a serialized representation of the
- * values returned from an execution ({@link @1.0::ErrorStatus} and
- * vec<{@link OutputShape}>) which is returned via FastMessageQueue.
+ * values returned from an execution ({@link @1.0::ErrorStatus},
+ * vec<{@link OutputShape}>, and {@link Timing}) which is returned via
+ * FastMessageQueue.
  *
  * The serialized representation for a particular execution is referred to later
  * in these descriptions as a 'packet'.
@@ -581,6 +617,7 @@
  *    2.1) 'operandInformation'
  *    2.2) For each dimension element of the operand:
  *         2.2.1) 'operandDimensionValue'
+ * 3) 'executionTiming'
  */
 safe_union FmqResultDatum {
     /**
@@ -636,4 +673,12 @@
      * Element of the dimensions vector.
      */
     uint32_t operandDimensionValue;
+
+    /**
+     * Duration of execution. Unless measurement was requested and execution
+     * succeeds, all times must be reported as UINT64_MAX. A driver may choose
+     * to report any time as UINT64_MAX, indicating that measurement is not
+     * available.
+     */
+    Timing executionTiming;
 };
diff --git a/neuralnetworks/1.2/vts/functional/ValidateRequest.cpp b/neuralnetworks/1.2/vts/functional/ValidateRequest.cpp
index 1eaea4b..00a7c3e 100644
--- a/neuralnetworks/1.2/vts/functional/ValidateRequest.cpp
+++ b/neuralnetworks/1.2/vts/functional/ValidateRequest.cpp
@@ -42,6 +42,10 @@
 
 ///////////////////////// UTILITY FUNCTIONS /////////////////////////
 
+static bool badTiming(Timing timing) {
+    return timing.timeOnDevice == UINT64_MAX && timing.timeInDriver == UINT64_MAX;
+}
+
 static void createPreparedModel(const sp<IDevice>& device, const Model& model,
                                 sp<IPreparedModel>* preparedModel) {
     ASSERT_NE(nullptr, preparedModel);
@@ -98,31 +102,46 @@
                      Request request, const std::function<void(Request*)>& mutation) {
     mutation(&request);
 
+    // We'd like to test both with timing requested and without timing
+    // requested. Rather than running each test both ways, we'll decide whether
+    // to request timing by hashing the message. We do not use std::hash because
+    // it is not guaranteed stable across executions.
+    char hash = 0;
+    for (auto c : message) {
+        hash ^= c;
+    };
+    MeasureTiming measure = (hash & 1) ? MeasureTiming::YES : MeasureTiming::NO;
+
     {
         SCOPED_TRACE(message + " [execute_1_2]");
 
         sp<ExecutionCallback> executionCallback = new ExecutionCallback();
         ASSERT_NE(nullptr, executionCallback.get());
         Return<ErrorStatus> executeLaunchStatus =
-            preparedModel->execute_1_2(request, executionCallback);
+                preparedModel->execute_1_2(request, measure, executionCallback);
         ASSERT_TRUE(executeLaunchStatus.isOk());
         ASSERT_EQ(ErrorStatus::INVALID_ARGUMENT, static_cast<ErrorStatus>(executeLaunchStatus));
 
         executionCallback->wait();
         ErrorStatus executionReturnStatus = executionCallback->getStatus();
         const auto& outputShapes = executionCallback->getOutputShapes();
+        Timing timing = executionCallback->getTiming();
         ASSERT_EQ(ErrorStatus::INVALID_ARGUMENT, executionReturnStatus);
         ASSERT_EQ(outputShapes.size(), 0);
+        ASSERT_TRUE(badTiming(timing));
     }
 
     {
         SCOPED_TRACE(message + " [executeSynchronously]");
 
         Return<void> executeStatus = preparedModel->executeSynchronously(
-            request, [](ErrorStatus error, const hidl_vec<OutputShape>& outputShapes) {
-                ASSERT_EQ(ErrorStatus::INVALID_ARGUMENT, error);
-                EXPECT_EQ(outputShapes.size(), 0);
-            });
+                request, measure,
+                [](ErrorStatus error, const hidl_vec<OutputShape>& outputShapes,
+                   const Timing& timing) {
+                    ASSERT_EQ(ErrorStatus::INVALID_ARGUMENT, error);
+                    EXPECT_EQ(outputShapes.size(), 0);
+                    EXPECT_TRUE(badTiming(timing));
+                });
         ASSERT_TRUE(executeStatus.isOk());
     }
 }