Add Quality of Service to NNAPI HAL
This CL makes the following changes:
* introduces a new Priority enum
* extends ErrorStatus with new error codes
* adds "supportsDeadline" method to IDevice
* adds priority and deadline arguments to IDevice::prepareModel*
* adds deadline argument to IPreparedModel::execute*
* updates IExecutionCallback with new ErrorStatus
* updates current.txt accordingly
Bug: 136739795
Bug: 142902514
Bug: 145300530
Test: mma
Change-Id: Iaa7877bde1f463635b8bbdb4e8a001d7b79b9c65
diff --git a/neuralnetworks/1.3/Android.bp b/neuralnetworks/1.3/Android.bp
index 08e824d..56011e2 100644
--- a/neuralnetworks/1.3/Android.bp
+++ b/neuralnetworks/1.3/Android.bp
@@ -10,6 +10,7 @@
"types.hal",
"IBuffer.hal",
"IDevice.hal",
+ "IExecutionCallback.hal",
"IPreparedModel.hal",
"IPreparedModelCallback.hal",
],
diff --git a/neuralnetworks/1.3/IBuffer.hal b/neuralnetworks/1.3/IBuffer.hal
index 84241c5..dfc57fe 100644
--- a/neuralnetworks/1.3/IBuffer.hal
+++ b/neuralnetworks/1.3/IBuffer.hal
@@ -16,7 +16,7 @@
package android.hardware.neuralnetworks@1.3;
-import @1.0::ErrorStatus;
+import ErrorStatus;
/**
* This interface represents a device memory buffer.
diff --git a/neuralnetworks/1.3/IDevice.hal b/neuralnetworks/1.3/IDevice.hal
index 8dc41f7..610db79 100644
--- a/neuralnetworks/1.3/IDevice.hal
+++ b/neuralnetworks/1.3/IDevice.hal
@@ -16,7 +16,6 @@
package android.hardware.neuralnetworks@1.3;
-import @1.0::ErrorStatus;
import @1.1::ExecutionPreference;
import @1.2::Constant;
import @1.2::DeviceType;
@@ -25,7 +24,10 @@
import BufferDesc;
import BufferRole;
import Capabilities;
+import ErrorStatus;
import Model;
+import OptionalTimePoint;
+import Priority;
import IBuffer;
import IPreparedModel;
import IPreparedModelCallback;
@@ -46,6 +48,19 @@
getCapabilities_1_3() generates (ErrorStatus status, Capabilities capabilities);
/**
+ * Returns whether the device is able to complete or abort a task within a
+ * specified duration.
+ *
+ * @return prepareModelDeadline 'true' if the device supports completing or
+ * aborting model preparation by the deadline when the deadline is supplied,
+ * 'false' otherwise.
+ * @return executionDeadline 'true' if the device supports completing or
+ * aborting an execution by the deadline when the deadline is supplied,
+ * 'false' otherwise.
+ */
+ supportsDeadlines() generates (bool prepareModelDeadline, bool executionDeadline);
+
+ /**
* Gets the supported operations in a model.
*
* getSupportedOperations indicates which operations of the top-level
@@ -118,6 +133,22 @@
* the callback object must be invoked with the appropriate ErrorStatus
* value and nullptr for the IPreparedModel.
*
+ * The model is prepared with a priority. This priority is relative to other
+ * prepared models owned by the same client. Higher priority executions may
+ * use more compute resources than lower priority executions, and may
+ * preempt or starve lower priority executions.
+ *
+ * prepareModel_1_3 can be called with an optional deadline. If the model
+ * is not able to be prepared before the provided deadline, the model
+ * preparation must be aborted, and either {@link
+ * ErrorStatus::MISSED_DEADLINE_TRANSIENT} or {@link
+ * ErrorStatus::MISSED_DEADLINE_PERSISTENT} must be returned. The error due
+ * to an abort must be sent the same way as other errors, described above.
+ * If the service reports that it does not support preparation deadlines via
+ * IDevice::supportsDeadlines, and prepareModel_1_3 is called with a
+ * deadline, then the argument is invalid, and {@link
+ * ErrorStatus::INVALID_ARGUMENT} must be returned.
+ *
* Optionally, the driver may save the prepared model to cache during the
* asynchronous preparation. Any error that occurs when saving to cache must
* not affect the status of preparing the model. Even if the input arguments
@@ -139,6 +170,11 @@
* @param model The model to be prepared for execution.
* @param preference Indicates the intended execution behavior of a prepared
* model.
+ * @param priority The priority of the prepared model relative to other
+ * prepared models owned by the client.
+ * @param deadline The time by which the model must be prepared. If the
+ * model cannot be prepared by the deadline, the preparation must be
+ * aborted.
* @param modelCache A vector of handles with each entry holding exactly one
* cache file descriptor for the security-sensitive cache. The length of
* the vector must either be 0 indicating that caching information is
@@ -173,8 +209,12 @@
* - GENERAL_FAILURE if there is an unspecified error
* - INVALID_ARGUMENT if one of the input arguments related to preparing
* the model is invalid
+ * - MISSED_DEADLINE_* if the deadline for preparing a model cannot be
+ * met
+ * - RESOURCE_EXHAUSTED_* if the task was aborted by the driver
*/
prepareModel_1_3(Model model, ExecutionPreference preference,
+ Priority priority, OptionalTimePoint deadline,
vec<handle> modelCache, vec<handle> dataCache,
uint8_t[Constant:BYTE_SIZE_OF_CACHE_TOKEN] token,
IPreparedModelCallback callback)
@@ -220,6 +260,22 @@
* the model, the callback object must be invoked with the appropriate
* ErrorStatus value and nullptr for the IPreparedModel.
*
+ * The model is prepared with a priority. This priority is relative to other
+ * prepared models owned by the same client. Higher priority executions may
+ * use more compute resources than lower priority executions, and may
+ * preempt or starve lower priority executions.
+ *
+ * prepareModelFromCache_1_3 can be called with an optional deadline. If the
+ * model is not able to prepared before the provided deadline, the model
+ * preparation must be aborted, and either {@link
+ * ErrorStatus::MISSED_DEADLINE_TRANSIENT}
+ * or {@link ErrorStatus::MISSED_DEADLINE_PERSISTENT} must be returned. The
+ * error due to an abort must be sent the same way as other errors,
+ * described above. If the service reports that it does not support
+ * preparation deadlines via IDevice::supportsDeadlines, and
+ * prepareModelFromCache_1_3 is called with a deadline, then the argument is
+ * invalid, and {@link ErrorStatus::INVALID_ARGUMENT} must be returned.
+ *
* The only information that may be unknown to the model at this stage is
* the shape of the tensors, which may only be known at execution time. As
* such, some driver services may return partially prepared models, where
@@ -228,6 +284,11 @@
* used with different shapes of inputs on different (possibly concurrent)
* executions.
*
+ * @param priority The priority of the prepared model relative to other
+ * prepared models owned by the client.
+ * @param deadline The time by which the model must be prepared. If the
+ * model cannot be prepared by the deadline, the preparation must be
+ * aborted.
* @param modelCache A vector of handles with each entry holding exactly one
* cache file descriptor for the security-sensitive cache. The length of
* the vector must match the numModelCache returned from getNumberOfCacheFilesNeeded.
@@ -253,8 +314,12 @@
* - GENERAL_FAILURE if caching is not supported or if there is an
* unspecified error
* - INVALID_ARGUMENT if one of the input arguments is invalid
+ * - MISSED_DEADLINE_* if the deadline for preparing a model cannot be
+ * met
+ * - RESOURCE_EXHAUSTED_* if the task was aborted by the driver
*/
- prepareModelFromCache_1_3(vec<handle> modelCache, vec<handle> dataCache,
+ prepareModelFromCache_1_3(Priority priority, OptionalTimePoint deadline,
+ vec<handle> modelCache, vec<handle> dataCache,
uint8_t[Constant:BYTE_SIZE_OF_CACHE_TOKEN] token,
IPreparedModelCallback callback)
generates (ErrorStatus status);
diff --git a/neuralnetworks/1.3/IExecutionCallback.hal b/neuralnetworks/1.3/IExecutionCallback.hal
new file mode 100644
index 0000000..439428a
--- /dev/null
+++ b/neuralnetworks/1.3/IExecutionCallback.hal
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.hardware.neuralnetworks@1.3;
+
+import @1.2::IExecutionCallback;
+import @1.2::OutputShape;
+import @1.2::Timing;
+
+/**
+ * IExecutionCallback must be used to return the error status result from an
+ * execution asynchronously launched from IPreparedModel::execute*.
+ */
+interface IExecutionCallback extends @1.2::IExecutionCallback {
+
+ /**
+ * There are three notify methods declared for the IExecutionCallback
+ * interface: notify_1_3, notify_1_2, and notify. One of the three notify
+ * methods must be invoked immediately after the asynchronous task has
+ * finished performing the execution. One of the notify methods must be
+ * provided with the ErrorStatus from the execution. If the asynchronous
+ * task is not launched, one of the notify methods must be invoked with the
+ * appropriate error.
+ *
+ * @param status Error status returned from launching the asynchronous task
+ * (if the launch fails) or from the asynchronous task itself
+ * (if the launch succeeds). Must be:
+ * - NONE if the asynchronous execution was successful
+ * - DEVICE_UNAVAILABLE if driver is offline or busy
+ * - GENERAL_FAILURE if the asynchronous task resulted in an
+ * unspecified error
+ * - OUTPUT_INSUFFICIENT_SIZE if at least one output
+ * operand buffer is not large enough to store the
+ * corresponding output
+ * - INVALID_ARGUMENT if one of the input arguments to
+ * prepareModel is invalid
+ * - MISSED_DEADLINE_* if the deadline could not be met
+ * - RESOURCE_EXHAUSTED_* if the task was aborted by the driver
+ * @param outputShapes A list of shape information of model output operands.
+ * The index into "outputShapes" corresponds with to index
+ * of the output operand in the Request outputs vector.
+ * outputShapes must be empty unless the status is either
+ * NONE or OUTPUT_INSUFFICIENT_SIZE.
+ * @param timing Duration of execution. Unless MeasureTiming::YES was passed when
+ * launching the execution and status is NONE, all times must
+ * be reported as UINT64_MAX. A driver may choose to report
+ * any time as UINT64_MAX, indicating that particular measurement is
+ * not available.
+ */
+ oneway notify_1_3(ErrorStatus status, vec<OutputShape> outputShapes, Timing timing);
+};
diff --git a/neuralnetworks/1.3/IPreparedModel.hal b/neuralnetworks/1.3/IPreparedModel.hal
index 00adc1f..bce6ee2 100644
--- a/neuralnetworks/1.3/IPreparedModel.hal
+++ b/neuralnetworks/1.3/IPreparedModel.hal
@@ -16,13 +16,14 @@
package android.hardware.neuralnetworks@1.3;
-import @1.0::ErrorStatus;
-import @1.2::IExecutionCallback;
import @1.2::IPreparedModel;
import @1.2::MeasureTiming;
import @1.2::OutputShape;
import @1.2::Timing;
+import ErrorStatus;
+import OptionalTimePoint;
import Request;
+import IExecutionCallback;
/**
* IPreparedModel describes a model that has been prepared for execution and
@@ -65,6 +66,17 @@
* values, the execution should complete successfully (ErrorStatus::NONE):
* There must be no failure unless the device itself is in a bad state.
*
+ * execute_1_3 can be called with an optional deadline. If the execution
+ * is not able to completed before the provided deadline, the execution
+ * must be aborted, and either {@link
+ * ErrorStatus::MISSED_DEADLINE_TRANSIENT} or {@link
+ * ErrorStatus::MISSED_DEADLINE_PERSISTENT} must be returned. The error due
+ * to an abort must be sent the same way as other errors, described above.
+ * If the service reports that it does not support execution deadlines via
+ * IDevice::supportsDeadlines, and execute_1_3 is called with a deadline,
+ * then the argument is invalid, and {@link ErrorStatus::INVALID_ARGUMENT}
+ * must be returned.
+ *
* Any number of calls to the execute* and executeSynchronously* functions,
* in any combination, may be made concurrently, even on the same
* IPreparedModel object.
@@ -75,6 +87,9 @@
* The duration runs from the time the driver sees the call
* to the execute_1_3 function to the time the driver invokes
* the callback.
+ * @param deadline The time by which execution must complete. If the
+ * execution cannot be finished by the deadline, the
+ * execution must be aborted.
* @param callback A callback object used to return the error status of
* the execution. The callback object's notify function must
* be called exactly once, even if the execution was
@@ -87,8 +102,13 @@
* not large enough to store the resultant values
* - INVALID_ARGUMENT if one of the input arguments is
* invalid
+ * - MISSED_DEADLINE_* if the deadline for executing a model
+ * cannot be met
+ * - RESOURCE_EXHAUSTED_* if the task was aborted by the
+ * driver
*/
- execute_1_3(Request request, MeasureTiming measure, IExecutionCallback callback)
+ execute_1_3(Request request, MeasureTiming measure, OptionalTimePoint deadline,
+ IExecutionCallback callback)
generates (ErrorStatus status);
/**
@@ -116,6 +136,17 @@
* (ErrorStatus::NONE): There must be no failure unless the device itself is
* in a bad state.
*
+ * executeSynchronously_1_3 can be called with an optional deadline. If the
+ * execution is not able to completed before the provided deadline, the
+ * execution must be aborted, and either {@link
+ * ErrorStatus::MISSED_DEADLINE_TRANSIENT} or {@link
+ * ErrorStatus::MISSED_DEADLINE_PERSISTENT} must be returned. The error due
+ * to an abort must be sent the same way as other errors, described above.
+ * If the service reports that it does not support execution deadlines via
+ * IDevice::supportsDeadlines, and executeSynchronously_1_3 is called with a
+ * deadline, then the argument is invalid, and
+ * {@link ErrorStatus::INVALID_ARGUMENT} must be returned.
+ *
* Any number of calls to the execute* and executeSynchronously* functions,
* in any combination, may be made concurrently, even on the same
* IPreparedModel object.
@@ -126,6 +157,9 @@
* The duration runs from the time the driver sees the call
* to the executeSynchronously_1_3 function to the time the driver
* returns from the function.
+ * @param deadline The time by which execution must complete. If the
+ * execution cannot be finished by the deadline, the
+ * execution must be aborted.
* @return status Error status of the execution, must be:
* - NONE if execution is performed successfully
* - DEVICE_UNAVAILABLE if driver is offline or busy
@@ -135,16 +169,22 @@
* corresponding output
* - INVALID_ARGUMENT if one of the input arguments is
* invalid
+ * - MISSED_DEADLINE_* if the deadline for executing a model
+ * cannot be met
+ * - RESOURCE_EXHAUSTED_* if the task was aborted by the
+ * driver
* @return outputShapes A list of shape information of model output operands.
* The index into "outputShapes" corresponds to the index
* of the output operand in the Request outputs vector.
* outputShapes must be empty unless the status is either
* NONE or OUTPUT_INSUFFICIENT_SIZE.
- * @return Timing Duration of execution. Unless measure is YES and status is
+ * @return timing Duration of execution. Unless measure is YES and status is
* NONE, all times must be reported as UINT64_MAX. A driver may
* choose to report any time as UINT64_MAX, indicating that
* measurement is not available.
*/
- executeSynchronously_1_3(Request request, MeasureTiming measure)
- generates (ErrorStatus status, vec<OutputShape> outputShapes, Timing timing);
+ executeSynchronously_1_3(Request request, MeasureTiming measure,
+ OptionalTimePoint deadline)
+ generates (ErrorStatus status, vec<OutputShape> outputShapes,
+ Timing timing);
};
diff --git a/neuralnetworks/1.3/IPreparedModelCallback.hal b/neuralnetworks/1.3/IPreparedModelCallback.hal
index ff295a2..11ebbf4 100644
--- a/neuralnetworks/1.3/IPreparedModelCallback.hal
+++ b/neuralnetworks/1.3/IPreparedModelCallback.hal
@@ -16,7 +16,6 @@
package android.hardware.neuralnetworks@1.3;
-import @1.0::ErrorStatus;
import @1.2::IPreparedModelCallback;
import IPreparedModel;
@@ -48,6 +47,10 @@
* unspecified error
* - INVALID_ARGUMENT if one of the input arguments to
* prepareModel is invalid
+ * - MISSED_DEADLINE_* if the deadline for executing a model
+ * cannot be met
+ * - RESOURCE_EXHAUSTED_* if the task was aborted by the
+ * driver
* @param preparedModel A model that has been asynchronously prepared for
* execution. If the model was unable to be prepared
* due to an error, nullptr must be passed in place of
diff --git a/neuralnetworks/1.3/types.hal b/neuralnetworks/1.3/types.hal
index a6d274a..b330b50 100644
--- a/neuralnetworks/1.3/types.hal
+++ b/neuralnetworks/1.3/types.hal
@@ -17,6 +17,7 @@
package android.hardware.neuralnetworks@1.3;
import @1.0::DataLocation;
+import @1.0::ErrorStatus;
import @1.0::PerformanceInfo;
import @1.0::RequestArgument;
import @1.2::Model.ExtensionNameAndPrefix;
@@ -4999,6 +5000,16 @@
};
/**
+ * Priority given to a prepared model for execution.
+ */
+enum Priority : int32_t {
+ LOW,
+ MEDIUM,
+ HIGH,
+};
+
+
+/**
* The capabilities of a driver.
*
* Performance of an operation comes from the type of its first operand.
@@ -5434,3 +5445,49 @@
*/
vec<MemoryPool> pools;
};
+
+/**
+ * Optional time point of the steady clock (as from std::chrono::steady_clock)
+ * measured in nanoseconds.
+ */
+safe_union OptionalTimePoint {
+ /** No time point provided. */
+ Monostate none;
+
+ /**
+ * Time point of the steady clock (as from std::chrono::steady_clock)
+ * measured in nanoseconds.
+ */
+ uint64_t nanoseconds;
+};
+
+/**
+ * Return status of a function.
+ */
+enum ErrorStatus : @1.0::ErrorStatus {
+ /**
+ * Failure because a deadline could not be met for a task, but future
+ * deadlines may still be met for the same task after a short delay.
+ */
+ MISSED_DEADLINE_TRANSIENT,
+
+ /**
+ * Failure because a deadline could not be met for a task, and future
+ * deadlines will likely also not be met for the same task even after a
+ * short delay.
+ */
+ MISSED_DEADLINE_PERSISTENT,
+
+ /**
+ * Failure because of a resource limitation within the driver, but future
+ * calls for the same task may still succeed after a short delay.
+ */
+ RESOURCE_EXHAUSTED_TRANSIENT,
+
+ /**
+ * Failure because of a resource limitation within the driver, and future
+ * calls for the same task will likely also fail even after a short
+ * delay.
+ */
+ RESOURCE_EXHAUSTED_PERSISTENT,
+};
diff --git a/neuralnetworks/1.3/types.t b/neuralnetworks/1.3/types.t
index f3319e5..a973923 100644
--- a/neuralnetworks/1.3/types.t
+++ b/neuralnetworks/1.3/types.t
@@ -19,6 +19,7 @@
package android.hardware.neuralnetworks@1.3;
import @1.0::DataLocation;
+import @1.0::ErrorStatus;
import @1.0::PerformanceInfo;
import @1.0::RequestArgument;
import @1.2::Model.ExtensionNameAndPrefix;
@@ -90,6 +91,16 @@
};
/**
+ * Priority given to a prepared model for execution.
+ */
+enum Priority : int32_t {
+ LOW,
+ MEDIUM,
+ HIGH,
+};
+
+
+/**
* The capabilities of a driver.
*
* Performance of an operation comes from the type of its first operand.
@@ -525,3 +536,49 @@
*/
vec<MemoryPool> pools;
};
+
+/**
+ * Optional time point of the steady clock (as from std::chrono::steady_clock)
+ * measured in nanoseconds.
+ */
+safe_union OptionalTimePoint {
+ /** No time point provided. */
+ Monostate none;
+
+ /**
+ * Time point of the steady clock (as from std::chrono::steady_clock)
+ * measured in nanoseconds.
+ */
+ uint64_t nanoseconds;
+};
+
+/**
+ * Return status of a function.
+ */
+enum ErrorStatus : @1.0::ErrorStatus {
+ /**
+ * Failure because a deadline could not be met for a task, but future
+ * deadlines may still be met for the same task after a short delay.
+ */
+ MISSED_DEADLINE_TRANSIENT,
+
+ /**
+ * Failure because a deadline could not be met for a task, and future
+ * deadlines will likely also not be met for the same task even after a
+ * short delay.
+ */
+ MISSED_DEADLINE_PERSISTENT,
+
+ /**
+ * Failure because of a resource limitation within the driver, but future
+ * calls for the same task may still succeed after a short delay.
+ */
+ RESOURCE_EXHAUSTED_TRANSIENT,
+
+ /**
+ * Failure because of a resource limitation within the driver, and future
+ * calls for the same task will likely also fail even after a short
+ * delay.
+ */
+ RESOURCE_EXHAUSTED_PERSISTENT,
+};