Update neuralnetworks HAL to allow collecting execution duration.

Test: VtsHalNeuralnetworksV1_0TargetTest --hal_service_instance=android.hardware.neuralnetworks@1.0::IDevice/sample-all
Test: VtsHalNeuralnetworksV1_1TargetTest --hal_service_instance=android.hardware.neuralnetworks@1.1::IDevice/sample-all
Test: VtsHalNeuralnetworksV1_2TargetTest --hal_service_instance=android.hardware.neuralnetworks@1.2::IDevice/sample-all

Bug: 115390094

Change-Id: If67a5ffe39cfdd78498e01f26251734fdc8e66c7
diff --git a/neuralnetworks/1.0/vts/functional/GeneratedTestHarness.cpp b/neuralnetworks/1.0/vts/functional/GeneratedTestHarness.cpp
index d45922e..65c425e 100644
--- a/neuralnetworks/1.0/vts/functional/GeneratedTestHarness.cpp
+++ b/neuralnetworks/1.0/vts/functional/GeneratedTestHarness.cpp
@@ -77,29 +77,33 @@
 // Top level driver for models and examples generated by test_generator.py
 // Test driver for those generated from ml/nn/runtime/test/spec
 static Return<ErrorStatus> ExecutePreparedModel(sp<V1_0::IPreparedModel>& preparedModel,
-                                                const Request& request,
+                                                const Request& request, MeasureTiming,
                                                 sp<ExecutionCallback>& callback) {
     return preparedModel->execute(request, callback);
 }
 static Return<ErrorStatus> ExecutePreparedModel(sp<V1_2::IPreparedModel>& preparedModel,
-                                                const Request& request,
+                                                const Request& request, MeasureTiming measure,
                                                 sp<ExecutionCallback>& callback) {
-    return preparedModel->execute_1_2(request, callback);
+    return preparedModel->execute_1_2(request, measure, callback);
 }
 static Return<ErrorStatus> ExecutePreparedModel(sp<V1_0::IPreparedModel>&, const Request&,
-                                                hidl_vec<OutputShape>*) {
+                                                MeasureTiming, hidl_vec<OutputShape>*, Timing*) {
     ADD_FAILURE() << "asking for synchronous execution at V1_0";
     return ErrorStatus::GENERAL_FAILURE;
 }
 static Return<ErrorStatus> ExecutePreparedModel(sp<V1_2::IPreparedModel>& preparedModel,
-                                                const Request& request,
-                                                hidl_vec<OutputShape>* outputShapes) {
+                                                const Request& request, MeasureTiming measure,
+                                                hidl_vec<OutputShape>* outputShapes,
+                                                Timing* timing) {
     ErrorStatus result;
     Return<void> ret = preparedModel->executeSynchronously(
-        request, [&result, &outputShapes](ErrorStatus error, const hidl_vec<OutputShape>& shapes) {
-            result = error;
-            *outputShapes = shapes;
-        });
+            request, measure,
+            [&result, outputShapes, timing](ErrorStatus error, const hidl_vec<OutputShape>& shapes,
+                                            const Timing& time) {
+                result = error;
+                *outputShapes = shapes;
+                *timing = time;
+            });
     if (!ret.isOk()) {
         return ErrorStatus::GENERAL_FAILURE;
     }
@@ -111,9 +115,8 @@
 template <typename T_IPreparedModel>
 void EvaluatePreparedModel(sp<T_IPreparedModel>& preparedModel, std::function<bool(int)> is_ignored,
                            const std::vector<MixedTypedExample>& examples,
-                           bool hasRelaxedFloat32Model = false, float fpAtol = kDefaultAtol,
-                           float fpRtol = kDefaultRtol, Synchronously sync = Synchronously::NO,
-                           bool testDynamicOutputShape = false) {
+                           bool hasRelaxedFloat32Model, float fpAtol, float fpRtol,
+                           Synchronously sync, MeasureTiming measure, bool testDynamicOutputShape) {
     const uint32_t INPUT = 0;
     const uint32_t OUTPUT = 1;
 
@@ -208,6 +211,7 @@
 
         ErrorStatus executionStatus;
         hidl_vec<OutputShape> outputShapes;
+        Timing timing;
         if (sync == Synchronously::NO) {
             SCOPED_TRACE("asynchronous");
 
@@ -215,8 +219,8 @@
             sp<ExecutionCallback> executionCallback = new ExecutionCallback();
             ASSERT_NE(nullptr, executionCallback.get());
             Return<ErrorStatus> executionLaunchStatus = ExecutePreparedModel(
-                preparedModel, {.inputs = inputs_info, .outputs = outputs_info, .pools = pools},
-                executionCallback);
+                    preparedModel, {.inputs = inputs_info, .outputs = outputs_info, .pools = pools},
+                    measure, executionCallback);
             ASSERT_TRUE(executionLaunchStatus.isOk());
             EXPECT_EQ(ErrorStatus::NONE, static_cast<ErrorStatus>(executionLaunchStatus));
 
@@ -224,13 +228,14 @@
             executionCallback->wait();
             executionStatus = executionCallback->getStatus();
             outputShapes = executionCallback->getOutputShapes();
+            timing = executionCallback->getTiming();
         } else {
             SCOPED_TRACE("synchronous");
 
             // execute
             Return<ErrorStatus> executionReturnStatus = ExecutePreparedModel(
-                preparedModel, {.inputs = inputs_info, .outputs = outputs_info, .pools = pools},
-                &outputShapes);
+                    preparedModel, {.inputs = inputs_info, .outputs = outputs_info, .pools = pools},
+                    measure, &outputShapes, &timing);
             ASSERT_TRUE(executionReturnStatus.isOk());
             executionStatus = static_cast<ErrorStatus>(executionReturnStatus);
         }
@@ -244,6 +249,14 @@
             return;
         }
         ASSERT_EQ(ErrorStatus::NONE, executionStatus);
+        if (measure == MeasureTiming::NO) {
+            EXPECT_EQ(UINT64_MAX, timing.timeOnDevice);
+            EXPECT_EQ(UINT64_MAX, timing.timeInDriver);
+        } else {
+            if (timing.timeOnDevice != UINT64_MAX && timing.timeInDriver != UINT64_MAX) {
+                EXPECT_LE(timing.timeOnDevice, timing.timeInDriver);
+            }
+        }
 
         // Go through all outputs, overwrite output dimensions with returned output shapes
         if (testDynamicOutputShape) {
@@ -273,10 +286,10 @@
 template <typename T_IPreparedModel>
 void EvaluatePreparedModel(sp<T_IPreparedModel>& preparedModel, std::function<bool(int)> is_ignored,
                            const std::vector<MixedTypedExample>& examples,
-                           bool hasRelaxedFloat32Model, Synchronously sync,
+                           bool hasRelaxedFloat32Model, Synchronously sync, MeasureTiming measure,
                            bool testDynamicOutputShape) {
     EvaluatePreparedModel(preparedModel, is_ignored, examples, hasRelaxedFloat32Model, kDefaultAtol,
-                          kDefaultRtol, sync, testDynamicOutputShape);
+                          kDefaultRtol, sync, measure, testDynamicOutputShape);
 }
 
 static void getPreparedModel(sp<PreparedModelCallback> callback,
@@ -333,7 +346,7 @@
     float fpAtol = 1e-5f, fpRtol = 5.0f * 1.1920928955078125e-7f;
     EvaluatePreparedModel(preparedModel, is_ignored, examples,
                           /*hasRelaxedFloat32Model=*/false, fpAtol, fpRtol, Synchronously::NO,
-                          /*testDynamicOutputShape=*/false);
+                          MeasureTiming::NO, /*testDynamicOutputShape=*/false);
 }
 
 void Execute(const sp<V1_1::IDevice>& device, std::function<V1_1::Model(void)> create_model,
@@ -380,7 +393,7 @@
 
     EvaluatePreparedModel(preparedModel, is_ignored, examples,
                           model.relaxComputationFloat32toFloat16, 1e-5f, 1e-5f, Synchronously::NO,
-                          /*testDynamicOutputShape=*/false);
+                          MeasureTiming::NO, /*testDynamicOutputShape=*/false);
 }
 
 // TODO: Reduce code duplication.
@@ -429,10 +442,16 @@
 
     EvaluatePreparedModel(preparedModel, is_ignored, examples,
                           model.relaxComputationFloat32toFloat16, Synchronously::NO,
-                          testDynamicOutputShape);
+                          MeasureTiming::NO, testDynamicOutputShape);
     EvaluatePreparedModel(preparedModel, is_ignored, examples,
                           model.relaxComputationFloat32toFloat16, Synchronously::YES,
-                          testDynamicOutputShape);
+                          MeasureTiming::NO, testDynamicOutputShape);
+    EvaluatePreparedModel(preparedModel, is_ignored, examples,
+                          model.relaxComputationFloat32toFloat16, Synchronously::NO,
+                          MeasureTiming::YES, testDynamicOutputShape);
+    EvaluatePreparedModel(preparedModel, is_ignored, examples,
+                          model.relaxComputationFloat32toFloat16, Synchronously::YES,
+                          MeasureTiming::YES, testDynamicOutputShape);
 }
 
 }  // namespace generated_tests