Add Quality of Service to NNAPI HAL This CL makes the following changes: * introduces a new Priority enum * extends ErrorStatus with new error codes * adds "supportsDeadline" method to IDevice * adds priority and deadline arguments to IDevice::prepareModel* * adds deadline argument to IPreparedModel::execute* * updates IExecutionCallback with new ErrorStatus * updates current.txt accordingly Bug: 136739795 Bug: 142902514 Bug: 145300530 Test: mma Change-Id: Iaa7877bde1f463635b8bbdb4e8a001d7b79b9c65

commit: c2499ecda1f398238d5fd5164c3fee96b68fc1f5 [log] [tgz]
author: Michael Butler <butlermichael@google.com> Wed Dec 11 18:31:12 2019 -0800
committer: Michael Butler <butlermichael@google.com> Tue Jan 21 15:55:49 2020 -0800
tree: 6b2c41319ad4cccb77c55da8eb7cd30e1f920654
parent: fdb6b518e9b330a600100409b6d3630c83438a21 [diff]
diff --git a/neuralnetworks/1.3/Android.bp b/neuralnetworks/1.3/Android.bp
index 08e824d..56011e2 100644
--- a/neuralnetworks/1.3/Android.bp
+++ b/neuralnetworks/1.3/Android.bp

@@ -10,6 +10,7 @@
         "types.hal",
         "IBuffer.hal",
         "IDevice.hal",
+        "IExecutionCallback.hal",
         "IPreparedModel.hal",
         "IPreparedModelCallback.hal",
     ],

diff --git a/neuralnetworks/1.3/IBuffer.hal b/neuralnetworks/1.3/IBuffer.hal
index 84241c5..dfc57fe 100644
--- a/neuralnetworks/1.3/IBuffer.hal
+++ b/neuralnetworks/1.3/IBuffer.hal

@@ -16,7 +16,7 @@
 
 package android.hardware.neuralnetworks@1.3;
 
-import @1.0::ErrorStatus;
+import ErrorStatus;
 
 /**
  * This interface represents a device memory buffer.

diff --git a/neuralnetworks/1.3/IDevice.hal b/neuralnetworks/1.3/IDevice.hal
index 8dc41f7..610db79 100644
--- a/neuralnetworks/1.3/IDevice.hal
+++ b/neuralnetworks/1.3/IDevice.hal

@@ -16,7 +16,6 @@
 
 package android.hardware.neuralnetworks@1.3;
 
-import @1.0::ErrorStatus;
 import @1.1::ExecutionPreference;
 import @1.2::Constant;
 import @1.2::DeviceType;
@@ -25,7 +24,10 @@
 import BufferDesc;
 import BufferRole;
 import Capabilities;
+import ErrorStatus;
 import Model;
+import OptionalTimePoint;
+import Priority;
 import IBuffer;
 import IPreparedModel;
 import IPreparedModelCallback;
@@ -46,6 +48,19 @@
     getCapabilities_1_3() generates (ErrorStatus status, Capabilities capabilities);
 
     /**
+     * Returns whether the device is able to complete or abort a task within a
+     * specified duration.
+     *
+     * @return prepareModelDeadline 'true' if the device supports completing or
+     *     aborting model preparation by the deadline when the deadline is supplied,
+     *     'false' otherwise.
+     * @return executionDeadline 'true' if the device supports completing or
+     *     aborting an execution by the deadline when the deadline is supplied,
+     *     'false' otherwise.
+     */
+    supportsDeadlines() generates (bool prepareModelDeadline, bool executionDeadline);
+
+    /**
      * Gets the supported operations in a model.
      *
      * getSupportedOperations indicates which operations of the top-level
@@ -118,6 +133,22 @@
      * the callback object must be invoked with the appropriate ErrorStatus
      * value and nullptr for the IPreparedModel.
      *
+     * The model is prepared with a priority. This priority is relative to other
+     * prepared models owned by the same client. Higher priority executions may
+     * use more compute resources than lower priority executions, and may
+     * preempt or starve lower priority executions.
+     *
+     * prepareModel_1_3 can be called with an optional deadline. If the model
+     * is not able to be prepared before the provided deadline, the model
+     * preparation must be aborted, and either {@link
+     * ErrorStatus::MISSED_DEADLINE_TRANSIENT} or {@link
+     * ErrorStatus::MISSED_DEADLINE_PERSISTENT} must be returned. The error due
+     * to an abort must be sent the same way as other errors, described above.
+     * If the service reports that it does not support preparation deadlines via
+     * IDevice::supportsDeadlines, and prepareModel_1_3 is called with a
+     * deadline, then the argument is invalid, and {@link
+     * ErrorStatus::INVALID_ARGUMENT} must be returned.
+     *
      * Optionally, the driver may save the prepared model to cache during the
      * asynchronous preparation. Any error that occurs when saving to cache must
      * not affect the status of preparing the model. Even if the input arguments
@@ -139,6 +170,11 @@
      * @param model The model to be prepared for execution.
      * @param preference Indicates the intended execution behavior of a prepared
      *     model.
+     * @param priority The priority of the prepared model relative to other
+     *     prepared models owned by the client.
+     * @param deadline The time by which the model must be prepared. If the
+     *     model cannot be prepared by the deadline, the preparation must be
+     *     aborted.
      * @param modelCache A vector of handles with each entry holding exactly one
      *     cache file descriptor for the security-sensitive cache. The length of
      *     the vector must either be 0 indicating that caching information is
@@ -173,8 +209,12 @@
      *     - GENERAL_FAILURE if there is an unspecified error
      *     - INVALID_ARGUMENT if one of the input arguments related to preparing
      *       the model is invalid
+     *     - MISSED_DEADLINE_* if the deadline for preparing a model cannot be
+     *       met
+     *     - RESOURCE_EXHAUSTED_* if the task was aborted by the driver
      */
     prepareModel_1_3(Model model, ExecutionPreference preference,
+                     Priority priority, OptionalTimePoint deadline,
                      vec<handle> modelCache, vec<handle> dataCache,
                      uint8_t[Constant:BYTE_SIZE_OF_CACHE_TOKEN] token,
                      IPreparedModelCallback callback)
@@ -220,6 +260,22 @@
      * the model, the callback object must be invoked with the appropriate
      * ErrorStatus value and nullptr for the IPreparedModel.
      *
+     * The model is prepared with a priority. This priority is relative to other
+     * prepared models owned by the same client. Higher priority executions may
+     * use more compute resources than lower priority executions, and may
+     * preempt or starve lower priority executions.
+     *
+     * prepareModelFromCache_1_3 can be called with an optional deadline. If the
+     * model is not able to prepared before the provided deadline, the model
+     * preparation must be aborted, and either {@link
+     * ErrorStatus::MISSED_DEADLINE_TRANSIENT}
+     * or {@link ErrorStatus::MISSED_DEADLINE_PERSISTENT} must be returned. The
+     * error due to an abort must be sent the same way as other errors,
+     * described above. If the service reports that it does not support
+     * preparation deadlines via IDevice::supportsDeadlines, and
+     * prepareModelFromCache_1_3 is called with a deadline, then the argument is
+     * invalid, and {@link ErrorStatus::INVALID_ARGUMENT} must be returned.
+     *
      * The only information that may be unknown to the model at this stage is
      * the shape of the tensors, which may only be known at execution time. As
      * such, some driver services may return partially prepared models, where
@@ -228,6 +284,11 @@
      * used with different shapes of inputs on different (possibly concurrent)
      * executions.
      *
+     * @param priority The priority of the prepared model relative to other
+     *     prepared models owned by the client.
+     * @param deadline The time by which the model must be prepared. If the
+     *     model cannot be prepared by the deadline, the preparation must be
+     *     aborted.
      * @param modelCache A vector of handles with each entry holding exactly one
      *     cache file descriptor for the security-sensitive cache. The length of
      *     the vector must match the numModelCache returned from getNumberOfCacheFilesNeeded.
@@ -253,8 +314,12 @@
      *     - GENERAL_FAILURE if caching is not supported or if there is an
      *       unspecified error
      *     - INVALID_ARGUMENT if one of the input arguments is invalid
+     *     - MISSED_DEADLINE_* if the deadline for preparing a model cannot be
+     *       met
+     *     - RESOURCE_EXHAUSTED_* if the task was aborted by the driver
      */
-    prepareModelFromCache_1_3(vec<handle> modelCache, vec<handle> dataCache,
+    prepareModelFromCache_1_3(Priority priority, OptionalTimePoint deadline,
+                              vec<handle> modelCache, vec<handle> dataCache,
                               uint8_t[Constant:BYTE_SIZE_OF_CACHE_TOKEN] token,
                               IPreparedModelCallback callback)
             generates (ErrorStatus status);

diff --git a/neuralnetworks/1.3/IExecutionCallback.hal b/neuralnetworks/1.3/IExecutionCallback.hal
new file mode 100644
index 0000000..439428a
--- /dev/null
+++ b/neuralnetworks/1.3/IExecutionCallback.hal

@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.hardware.neuralnetworks@1.3;
+
+import @1.2::IExecutionCallback;
+import @1.2::OutputShape;
+import @1.2::Timing;
+
+/**
+ * IExecutionCallback must be used to return the error status result from an
+ * execution asynchronously launched from IPreparedModel::execute*.
+ */
+interface IExecutionCallback extends @1.2::IExecutionCallback {
+
+    /**
+     * There are three notify methods declared for the IExecutionCallback
+     * interface: notify_1_3, notify_1_2, and notify. One of the three notify
+     * methods must be invoked immediately after the asynchronous task has
+     * finished performing the execution. One of the notify methods must be
+     * provided with the ErrorStatus from the execution. If the asynchronous
+     * task is not launched, one of the notify methods must be invoked with the
+     * appropriate error.
+     *
+     * @param status Error status returned from launching the asynchronous task
+     *               (if the launch fails) or from the asynchronous task itself
+     *               (if the launch succeeds). Must be:
+     *               - NONE if the asynchronous execution was successful
+     *               - DEVICE_UNAVAILABLE if driver is offline or busy
+     *               - GENERAL_FAILURE if the asynchronous task resulted in an
+     *                 unspecified error
+     *               - OUTPUT_INSUFFICIENT_SIZE if at least one output
+     *                 operand buffer is not large enough to store the
+     *                 corresponding output
+     *               - INVALID_ARGUMENT if one of the input arguments to
+     *                 prepareModel is invalid
+     *               - MISSED_DEADLINE_* if the deadline could not be met
+     *               - RESOURCE_EXHAUSTED_* if the task was aborted by the driver
+     * @param outputShapes A list of shape information of model output operands.
+     *                     The index into "outputShapes" corresponds with to index
+     *                     of the output operand in the Request outputs vector.
+     *                     outputShapes must be empty unless the status is either
+     *                     NONE or OUTPUT_INSUFFICIENT_SIZE.
+     * @param timing Duration of execution. Unless MeasureTiming::YES was passed when
+     *               launching the execution and status is NONE, all times must
+     *               be reported as UINT64_MAX. A driver may choose to report
+     *               any time as UINT64_MAX, indicating that particular measurement is
+     *               not available.
+     */
+  oneway notify_1_3(ErrorStatus status, vec<OutputShape> outputShapes, Timing timing);
+};

diff --git a/neuralnetworks/1.3/IPreparedModel.hal b/neuralnetworks/1.3/IPreparedModel.hal
index 00adc1f..bce6ee2 100644
--- a/neuralnetworks/1.3/IPreparedModel.hal
+++ b/neuralnetworks/1.3/IPreparedModel.hal

@@ -16,13 +16,14 @@
 
 package android.hardware.neuralnetworks@1.3;
 
-import @1.0::ErrorStatus;
-import @1.2::IExecutionCallback;
 import @1.2::IPreparedModel;
 import @1.2::MeasureTiming;
 import @1.2::OutputShape;
 import @1.2::Timing;
+import ErrorStatus;
+import OptionalTimePoint;
 import Request;
+import IExecutionCallback;
 
 /**
  * IPreparedModel describes a model that has been prepared for execution and
@@ -65,6 +66,17 @@
      *   values, the execution should complete successfully (ErrorStatus::NONE):
      *   There must be no failure unless the device itself is in a bad state.
      *
+     * execute_1_3 can be called with an optional deadline. If the execution
+     * is not able to completed before the provided deadline, the execution
+     * must be aborted, and either {@link
+     * ErrorStatus::MISSED_DEADLINE_TRANSIENT} or {@link
+     * ErrorStatus::MISSED_DEADLINE_PERSISTENT} must be returned. The error due
+     * to an abort must be sent the same way as other errors, described above.
+     * If the service reports that it does not support execution deadlines via
+     * IDevice::supportsDeadlines, and execute_1_3 is called with a deadline,
+     * then the argument is invalid, and {@link ErrorStatus::INVALID_ARGUMENT}
+     * must be returned.
+     *
      * Any number of calls to the execute* and executeSynchronously* functions,
      * in any combination, may be made concurrently, even on the same
      * IPreparedModel object.
@@ -75,6 +87,9 @@
      *                The duration runs from the time the driver sees the call
      *                to the execute_1_3 function to the time the driver invokes
      *                the callback.
+     * @param deadline The time by which execution must complete. If the
+     *                 execution cannot be finished by the deadline, the
+     *                 execution must be aborted.
      * @param callback A callback object used to return the error status of
      *                 the execution. The callback object's notify function must
      *                 be called exactly once, even if the execution was
@@ -87,8 +102,13 @@
      *                  not large enough to store the resultant values
      *                - INVALID_ARGUMENT if one of the input arguments is
      *                  invalid
+     *                - MISSED_DEADLINE_* if the deadline for executing a model
+     *                  cannot be met
+     *                - RESOURCE_EXHAUSTED_* if the task was aborted by the
+     *                  driver
      */
-    execute_1_3(Request request, MeasureTiming measure, IExecutionCallback callback)
+    execute_1_3(Request request, MeasureTiming measure, OptionalTimePoint deadline,
+                IExecutionCallback callback)
         generates (ErrorStatus status);
 
     /**
@@ -116,6 +136,17 @@
      * (ErrorStatus::NONE): There must be no failure unless the device itself is
      * in a bad state.
      *
+     * executeSynchronously_1_3 can be called with an optional deadline. If the
+     * execution is not able to completed before the provided deadline, the
+     * execution must be aborted, and either {@link
+     * ErrorStatus::MISSED_DEADLINE_TRANSIENT} or {@link
+     * ErrorStatus::MISSED_DEADLINE_PERSISTENT} must be returned. The error due
+     * to an abort must be sent the same way as other errors, described above.
+     * If the service reports that it does not support execution deadlines via
+     * IDevice::supportsDeadlines, and executeSynchronously_1_3 is called with a
+     * deadline, then the argument is invalid, and
+     * {@link ErrorStatus::INVALID_ARGUMENT} must be returned.
+     *
      * Any number of calls to the execute* and executeSynchronously* functions,
      * in any combination, may be made concurrently, even on the same
      * IPreparedModel object.
@@ -126,6 +157,9 @@
      *                The duration runs from the time the driver sees the call
      *                to the executeSynchronously_1_3 function to the time the driver
      *                returns from the function.
+     * @param deadline The time by which execution must complete. If the
+     *                 execution cannot be finished by the deadline, the
+     *                 execution must be aborted.
      * @return status Error status of the execution, must be:
      *                - NONE if execution is performed successfully
      *                - DEVICE_UNAVAILABLE if driver is offline or busy
@@ -135,16 +169,22 @@
      *                  corresponding output
      *                - INVALID_ARGUMENT if one of the input arguments is
      *                  invalid
+     *                - MISSED_DEADLINE_* if the deadline for executing a model
+     *                  cannot be met
+     *                - RESOURCE_EXHAUSTED_* if the task was aborted by the
+     *                  driver
      * @return outputShapes A list of shape information of model output operands.
      *                      The index into "outputShapes" corresponds to the index
      *                      of the output operand in the Request outputs vector.
      *                      outputShapes must be empty unless the status is either
      *                      NONE or OUTPUT_INSUFFICIENT_SIZE.
-     * @return Timing Duration of execution. Unless measure is YES and status is
+     * @return timing Duration of execution. Unless measure is YES and status is
      *                NONE, all times must be reported as UINT64_MAX. A driver may
      *                choose to report any time as UINT64_MAX, indicating that
      *                measurement is not available.
      */
-    executeSynchronously_1_3(Request request, MeasureTiming measure)
-            generates (ErrorStatus status, vec<OutputShape> outputShapes, Timing timing);
+    executeSynchronously_1_3(Request request, MeasureTiming measure,
+                             OptionalTimePoint deadline)
+                  generates (ErrorStatus status, vec<OutputShape> outputShapes,
+                             Timing timing);
 };

diff --git a/neuralnetworks/1.3/IPreparedModelCallback.hal b/neuralnetworks/1.3/IPreparedModelCallback.hal
index ff295a2..11ebbf4 100644
--- a/neuralnetworks/1.3/IPreparedModelCallback.hal
+++ b/neuralnetworks/1.3/IPreparedModelCallback.hal

@@ -16,7 +16,6 @@
 
 package android.hardware.neuralnetworks@1.3;
 
-import @1.0::ErrorStatus;
 import @1.2::IPreparedModelCallback;
 import IPreparedModel;
 
@@ -48,6 +47,10 @@
      *                 unspecified error
      *               - INVALID_ARGUMENT if one of the input arguments to
      *                 prepareModel is invalid
+     *               - MISSED_DEADLINE_* if the deadline for executing a model
+     *                 cannot be met
+     *               - RESOURCE_EXHAUSTED_* if the task was aborted by the
+     *                 driver
      * @param preparedModel A model that has been asynchronously prepared for
      *                      execution. If the model was unable to be prepared
      *                      due to an error, nullptr must be passed in place of

diff --git a/neuralnetworks/1.3/types.hal b/neuralnetworks/1.3/types.hal
index a6d274a..b330b50 100644
--- a/neuralnetworks/1.3/types.hal
+++ b/neuralnetworks/1.3/types.hal

@@ -17,6 +17,7 @@
 package android.hardware.neuralnetworks@1.3;
 
 import @1.0::DataLocation;
+import @1.0::ErrorStatus;
 import @1.0::PerformanceInfo;
 import @1.0::RequestArgument;
 import @1.2::Model.ExtensionNameAndPrefix;
@@ -4999,6 +5000,16 @@
 };
 
 /**
+ * Priority given to a prepared model for execution.
+ */
+enum Priority : int32_t {
+    LOW,
+    MEDIUM,
+    HIGH,
+};
+
+
+/**
  * The capabilities of a driver.
  *
  * Performance of an operation comes from the type of its first operand.
@@ -5434,3 +5445,49 @@
      */
     vec<MemoryPool> pools;
 };
+
+/**
+ * Optional time point of the steady clock (as from std::chrono::steady_clock)
+ * measured in nanoseconds.
+ */
+safe_union OptionalTimePoint {
+    /** No time point provided. */
+    Monostate none;
+
+    /**
+     * Time point of the steady clock (as from std::chrono::steady_clock)
+     * measured in nanoseconds.
+     */
+    uint64_t nanoseconds;
+};
+
+/**
+ * Return status of a function.
+ */
+enum ErrorStatus : @1.0::ErrorStatus {
+    /**
+     * Failure because a deadline could not be met for a task, but future
+     * deadlines may still be met for the same task after a short delay.
+     */
+    MISSED_DEADLINE_TRANSIENT,
+
+    /**
+     * Failure because a deadline could not be met for a task, and future
+     * deadlines will likely also not be met for the same task even after a
+     * short delay.
+     */
+    MISSED_DEADLINE_PERSISTENT,
+
+    /**
+     * Failure because of a resource limitation within the driver, but future
+     * calls for the same task may still succeed after a short delay.
+     */
+    RESOURCE_EXHAUSTED_TRANSIENT,
+
+    /**
+     * Failure because of a resource limitation within the driver, and future
+     * calls for the same task will likely also fail even after a short
+     * delay.
+     */
+    RESOURCE_EXHAUSTED_PERSISTENT,
+};

diff --git a/neuralnetworks/1.3/types.t b/neuralnetworks/1.3/types.t
index f3319e5..a973923 100644
--- a/neuralnetworks/1.3/types.t
+++ b/neuralnetworks/1.3/types.t

@@ -19,6 +19,7 @@
 package android.hardware.neuralnetworks@1.3;
 
 import @1.0::DataLocation;
+import @1.0::ErrorStatus;
 import @1.0::PerformanceInfo;
 import @1.0::RequestArgument;
 import @1.2::Model.ExtensionNameAndPrefix;
@@ -90,6 +91,16 @@
 };
 
 /**
+ * Priority given to a prepared model for execution.
+ */
+enum Priority : int32_t {
+    LOW,
+    MEDIUM,
+    HIGH,
+};
+
+
+/**
  * The capabilities of a driver.
  *
  * Performance of an operation comes from the type of its first operand.
@@ -525,3 +536,49 @@
      */
     vec<MemoryPool> pools;
 };
+
+/**
+ * Optional time point of the steady clock (as from std::chrono::steady_clock)
+ * measured in nanoseconds.
+ */
+safe_union OptionalTimePoint {
+    /** No time point provided. */
+    Monostate none;
+
+    /**
+     * Time point of the steady clock (as from std::chrono::steady_clock)
+     * measured in nanoseconds.
+     */
+    uint64_t nanoseconds;
+};
+
+/**
+ * Return status of a function.
+ */
+enum ErrorStatus : @1.0::ErrorStatus {
+    /**
+     * Failure because a deadline could not be met for a task, but future
+     * deadlines may still be met for the same task after a short delay.
+     */
+    MISSED_DEADLINE_TRANSIENT,
+
+    /**
+     * Failure because a deadline could not be met for a task, and future
+     * deadlines will likely also not be met for the same task even after a
+     * short delay.
+     */
+    MISSED_DEADLINE_PERSISTENT,
+
+    /**
+     * Failure because of a resource limitation within the driver, but future
+     * calls for the same task may still succeed after a short delay.
+     */
+    RESOURCE_EXHAUSTED_TRANSIENT,
+
+    /**
+     * Failure because of a resource limitation within the driver, and future
+     * calls for the same task will likely also fail even after a short
+     * delay.
+     */
+    RESOURCE_EXHAUSTED_PERSISTENT,
+};
commit	c2499ecda1f398238d5fd5164c3fee96b68fc1f5	[log] [tgz]
author	Michael Butler <butlermichael@google.com>	Wed Dec 11 18:31:12 2019 -0800
committer	Michael Butler <butlermichael@google.com>	Tue Jan 21 15:55:49 2020 -0800
tree	6b2c41319ad4cccb77c55da8eb7cd30e1f920654
parent	fdb6b518e9b330a600100409b6d3630c83438a21 [diff]