NNAPI Burst -- HAL interface
FastMessageQueue is a Treble-compliant data structure that enables fast
communication between two processes. The FMQ object itself is an atomic
circular buffer that is optionally synchronized with a futex. However,
FMQ has no notion of ownership or lifetime across processes, so it must
be paired with higher-level constructs to manage the lifetime and
ownership.
The NNAPI is introducing the notion of an "Execution Burst" object (or
more simply a "Burst" object), which is similar to an
ANeuralNetworksExecution, but is intended to be reused across multiple
executions and has lower IPC overheads. It achieves this low IPC
overhead by replacing HIDL HwBinder calls with FMQ messages.
Specifically, it replaces IPreparedModel::executeSynchronously's call
from the client into the service with fmq_sync<FmqRequestDatum> (an FMQ
channel used to pass a serialized Request object) and it replaces
the return from the service into the client with
fmq_sync<FmqResultDatum> (an FMQ channel used to return serialized
result status and OutputShapes information).
Each channel is a unidirectional flow of information with exactly one
producer and exactly one consumer. The channels are created by the NN
runtime and passed to the service via
IPreparedModel::configureExecutionBurst.
This CL defines the FmqRequestDatum and FmqResultDatum types in
types.hal. IBurstContext.hal defines IBurstContext, a HIDL object used
by the service to manage the resources of a Burst. IBurstCallback.hal
defines IBurstCallback, a HIDL callback object that can be used to
retrieve the handle to a resource the service has either not yet seen or
has evicted from its cache. Finally, IPreparedModel.hal is extended with
IPreparedModel::configureExecutionBurst to create the burst object.
Bug: 119570067
Test: mma
Change-Id: I333da70201531b1396efc714d096c277e8e1d47b
Merged-In: I333da70201531b1396efc714d096c277e8e1d47b
(cherry picked from commit 7e91e24fe155de15c677c48ca5a2c141ba2246dc)
diff --git a/neuralnetworks/1.2/Android.bp b/neuralnetworks/1.2/Android.bp
index 7d13104..9057b94 100644
--- a/neuralnetworks/1.2/Android.bp
+++ b/neuralnetworks/1.2/Android.bp
@@ -8,6 +8,8 @@
},
srcs: [
"types.hal",
+ "IBurstCallback.hal",
+ "IBurstContext.hal",
"IDevice.hal",
"IExecutionCallback.hal",
"IPreparedModel.hal",
@@ -20,6 +22,9 @@
"android.hidl.safe_union@1.0",
],
types: [
+ "DeviceType",
+ "FmqRequestDatum",
+ "FmqResultDatum",
"Model",
"Operand",
"OperandType",
@@ -27,6 +32,8 @@
"Operation",
"OperationType",
"OperationTypeRange",
+ "OutputShape",
+ "SymmPerChannelQuantParams",
],
gen_java: false,
}
diff --git a/neuralnetworks/1.2/IBurstCallback.hal b/neuralnetworks/1.2/IBurstCallback.hal
new file mode 100644
index 0000000..3f82e31
--- /dev/null
+++ b/neuralnetworks/1.2/IBurstCallback.hal
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.hardware.neuralnetworks@1.2;
+
+import @1.0::ErrorStatus;
+
+/**
+ * Callback object used by a service to retreive memory objects based on unique
+ * identifiers ("slots").
+ */
+interface IBurstCallback {
+ /**
+ * Get the memory regions that correspond to slot ids. The slot ids are are
+ * unique to the burst object.
+ *
+ * @param slots Values uniquely identifying memory regions within a Burst.
+ * @return status Indicates whether the memories were successfully returned;
+ * must be:
+ * - NONE if the memory region was successfully retrieved
+ * - GENERAL_FAILURE if there is an unspecified error
+ * - INVALID_ARGUMENT if a slot number is invalid
+ * @return buffers Memory buffers corresponding to the slot numbers. If an
+ * error occurs, an empty vector must be returned for
+ * buffers, otherwise slots.size() == buffers.size().
+ */
+ getMemories(vec<int32_t> slots) generates (ErrorStatus status, vec<memory> buffers);
+};
diff --git a/neuralnetworks/1.2/IBurstContext.hal b/neuralnetworks/1.2/IBurstContext.hal
new file mode 100644
index 0000000..60bf53b
--- /dev/null
+++ b/neuralnetworks/1.2/IBurstContext.hal
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.hardware.neuralnetworks@1.2;
+
+/**
+ * Context object to manage the resources of a burst.
+ */
+interface IBurstContext {
+ /**
+ * freeMemory is used by the client to signal to the service that a memory
+ * buffer corresponding to a slot number is no longer needed by the client.
+ *
+ * The slot ids are unique to the burst object.
+ *
+ * @param slot Value uniquely identifying a memory region.
+ */
+ freeMemory(int32_t slot);
+};
diff --git a/neuralnetworks/1.2/IPreparedModel.hal b/neuralnetworks/1.2/IPreparedModel.hal
index 4e91c67..2d4e572 100644
--- a/neuralnetworks/1.2/IPreparedModel.hal
+++ b/neuralnetworks/1.2/IPreparedModel.hal
@@ -19,6 +19,8 @@
import @1.0::ErrorStatus;
import @1.0::IPreparedModel;
import @1.0::Request;
+import IBurstCallback;
+import IBurstContext;
import IExecutionCallback;
/**
@@ -100,11 +102,47 @@
* - NONE if execution is performed successfully
* - DEVICE_UNAVAILABLE if driver is offline or busy
* - GENERAL_FAILURE if there is an unspecified error
- * - OUTPUT_INSUFFICIENT_SIZE if provided output buffer is
- * not large enough to store the resultant values
+ * - OUTPUT_INSUFFICIENT_SIZE if at least one output
+ * operand buffer is not large enough to store the
+ * corresponding output
* - INVALID_ARGUMENT if one of the input arguments is
* invalid
+ * @return outputShapes A list of shape information of model output operands.
+ * The index into "outputShapes" corresponds to the index
+ * of the output operand in the Request outputs vector.
+ * outputShapes must be empty unless the status is either
+ * NONE or OUTPUT_INSUFFICIENT_SIZE.
*/
executeSynchronously(Request request)
- generates (ErrorStatus status);
+ generates (ErrorStatus status, vec<OutputShape> outputShapes);
+
+ /**
+ * Configure a Burst object used to execute multiple inferences on a
+ * prepared model in rapid succession.
+ *
+ * @param callback A callback object used to retrieve memory resources
+ * corresponding to a unique identifiers ("slots").
+ * @param requestChannel Used by the client to send a serialized Request to
+ * the Burst for execution. requestChannel must not be
+ * used to pass a second Request object until a result
+ * has been received from resultChannel.
+ * @param resultChannel Used by the service to return the results of an
+ * execution to the client: the status of the execution
+ * and OutputShape of all output tensors. resultChannel
+ * must be used to return the results if a Request was
+ * sent through the requestChannel.
+ * @return status Error status of configuring the execution burst, must be:
+ * - NONE if the burst is successfully configured
+ * - DEVICE_UNAVAILABLE if driver is offline or busy
+ * - GENERAL_FAILURE if there is an unspecified error
+ * - INVALID_ARGUMENT if one of the input arguments is
+ * invalid
+ * @return context Object containing all resources (such as cached
+ * hidl_memory) related to a Burst if successful, otherwise
+ * nullptr.
+ */
+ configureExecutionBurst(IBurstCallback callback,
+ fmq_sync<FmqRequestDatum> requestChannel,
+ fmq_sync<FmqResultDatum> resultChannel)
+ generates (ErrorStatus status, IBurstContext context);
};
diff --git a/neuralnetworks/1.2/types.hal b/neuralnetworks/1.2/types.hal
index 2bdfb09..ce993d7 100644
--- a/neuralnetworks/1.2/types.hal
+++ b/neuralnetworks/1.2/types.hal
@@ -17,6 +17,7 @@
package android.hardware.neuralnetworks@1.2;
import @1.0::DataLocation;
+import @1.0::ErrorStatus;
import @1.0::OperandLifeTime;
import @1.0::OperandType;
import @1.0::PerformanceInfo;
@@ -76,6 +77,18 @@
* where C is an index in the Channel dimension.
*/
TENSOR_QUANT8_SYMM_PER_CHANNEL = 11,
+ /**
+ * A tensor of 16 bit unsigned integers that represent real numbers.
+ *
+ * Attached to this tensor are two numbers that can be used to convert the
+ * 16 bit integer to the real value and vice versa. These two numbers are:
+ * - scale: a 32 bit floating point value greater than zero.
+ * - zeroPoint: a 32 bit integer, in range [0, 65535].
+ *
+ * The formula is:
+ * real_value = (integer_value - zeroPoint) * scale.
+ */
+ TENSOR_QUANT16_ASYMM = 12,
/* ADDING A NEW FUNDAMENTAL TYPE REQUIRES UPDATING THE VALUE OF
* OperandTypeRange::OPERAND_FUNDAMENTAL_MAX.
*/
@@ -89,7 +102,7 @@
*/
enum OperandTypeRange : uint32_t {
OPERAND_FUNDAMENTAL_MIN = 0,
- OPERAND_FUNDAMENTAL_MAX = 11,
+ OPERAND_FUNDAMENTAL_MAX = 12,
OPERAND_OEM_MIN = 10000,
OPERAND_OEM_MAX = 10001,
};
@@ -150,7 +163,7 @@
TRANSPOSE_CONV_2D = 84,
UNIDIRECTIONAL_SEQUENCE_LSTM = 85,
UNIDIRECTIONAL_SEQUENCE_RNN = 86,
- ROTATED_BBOX_TRANSFORM = 87,
+ DETECTION_POSTPROCESSING = 87,
ABS = 88,
ROI_POOLING = 89,
EQUAL = 90,
@@ -254,9 +267,6 @@
*
* For a scalar operand, dimensions.size() must be 0.
*
- * For a tensor operand, dimensions.size() must be at least 1;
- * however, any of the dimensions may be unspecified.
- *
* A tensor operand with all dimensions specified has "fully
* specified" dimensions. Whenever possible (i.e., whenever the
* dimensions are known at model construction time), a tensor
@@ -275,17 +285,20 @@
* . The operand has lifetime CONSTANT_COPY or
* CONSTANT_REFERENCE.
*
- * . The operand has lifetime MODEL_INPUT or MODEL_OUTPUT. Fully
+ * . The operand has lifetime MODEL_INPUT. Fully
* specified dimensions must either be present in the
* Operand or they must be provided in the corresponding
* RequestArgument.
- * EXCEPTION: If the input or output is optional and omitted
+ * EXCEPTION: If the input is optional and omitted
* (by setting the hasNoValue field of the corresponding
* RequestArgument to true) then it need not have fully
* specified dimensions.
*
* A tensor operand with some number of unspecified dimensions is
* represented by setting each unspecified dimension to 0.
+ *
+ * A tensor operand with unspecified rank is represented by providing
+ * an empty dimensions vector.
*/
vec<uint32_t> dimensions;
@@ -417,3 +430,210 @@
*/
bool relaxComputationFloat32toFloat16;
};
+
+/**
+ * Describes the shape information of an output operand after execution.
+ */
+struct OutputShape {
+ /**
+ * Dimensions of the operand.
+ */
+ vec<uint32_t> dimensions;
+
+ /**
+ * Whether the provided buffer size is sufficient for the output.
+ */
+ bool isSufficient;
+};
+
+/**
+ * FmqRequestDatum is a single element of a serialized representation of a
+ * {@link @1.0::Request} object which is sent across FastMessageQueue.
+ *
+ * The serialized representation for a particular execution is referred to later
+ * in these descriptions as a 'packet'.
+ *
+ * FastMessageQueue can only pass HIDL-defined types that do not involve nested
+ * buffers, handles, or interfaces.
+ *
+ * The {@link @1.0::Request} is serialized as follows:
+ * 1) 'packetInformation'
+ * 2) For each input operand:
+ * 2.1) 'inputOperandInformation'
+ * 2.2) For each dimension element of the operand:
+ * 2.2.1) 'inputOperandDimensionValue'
+ * 3) For each output operand:
+ * 3.1) 'outputOperandInformation'
+ * 3.2) For each dimension element of the operand:
+ * 3.2.1) 'outputOperandDimensionValue'
+ * 4) For each pool:
+ * 4.1) 'poolIdentifier'
+ */
+safe_union FmqRequestDatum {
+ /**
+ * Type to describe the high-level layout of the packet.
+ */
+ struct PacketInformation {
+ /**
+ * How many elements the packet contains, including the
+ * "packetInformation" datum.
+ */
+ uint32_t packetSize;
+
+ /**
+ * Number of input operands.
+ */
+ uint32_t numberOfInputOperands;
+
+ /**
+ * Number of output operands.
+ */
+ uint32_t numberOfOutputOperands;
+
+ /**
+ * Number of pool identifiers.
+ */
+ uint32_t numberOfPools;
+ };
+
+ /**
+ * Type representing the information for each operand.
+ */
+ struct OperandInformation {
+ /**
+ * If true, the argument does not have a value. This can be used for
+ * operations that take optional arguments. If true, the fields of
+ * 'location' are set to 0, 'numberOfDimensions' is set to 0, and the
+ * dimensions information is omitted from the serialization.
+ */
+ bool hasNoValue;
+
+ /**
+ * The location within one of the memory pools passed in the Request.
+ */
+ DataLocation location;
+
+ /**
+ * Number of subsequent elements that belong to the dimensions vector.
+ */
+ uint32_t numberOfDimensions;
+ };
+
+ /**
+ * packetInformation is the first element of the packet and describes the
+ * remainder of the packet.
+ */
+ PacketInformation packetInformation;
+
+ /**
+ * Information for each input operand.
+ */
+ OperandInformation inputOperandInformation;
+
+ /**
+ * Element of the dimensions vector.
+ */
+ uint32_t inputOperandDimensionValue;
+
+ /**
+ * Information for each output operand.
+ */
+ OperandInformation outputOperandInformation;
+
+ /**
+ * Element of the dimensions vector.
+ */
+ uint32_t outputOperandDimensionValue;
+
+ /**
+ * Unique identifier for a pool.
+ *
+ * A {@link @1.0::Request} passes across one or more pools of shared memory
+ * for the inputs and outputs of an execution. However, these memory pools
+ * are not able to be sent across FastMessageQueue directly. Instead, the
+ * producing side of the FMQ represents each different pool with a unique
+ * identifier, and sends this identifier across the FMQ. Whenever the
+ * consuming side of the FMQ needs the memory corresponding to this unique
+ * identifier, it can pass the identifier to
+ * {@link IBurstCallback::getMemories} to retreive the memory. Although this
+ * HIDL Binder call is expensive compared to communication across FMQ, it is
+ * only needed in the cases when the consumer does not recognize the unique
+ * identifier.
+ */
+ int32_t poolIdentifier;
+};
+
+/**
+ * FmqResultDatum is a single element of a serialized representation of the
+ * values returned from an execution ({@link @1.0::ErrorStatus} and
+ * vec<{@link OutputShape}>) which is returned via FastMessageQueue.
+ *
+ * The serialized representation for a particular execution is referred to later
+ * in these descriptions as a 'packet'.
+ *
+ * FastMessageQueue can only pass HIDL-defined types that do not involve nested
+ * buffers, handles, or interfaces.
+ *
+ * The execution return values ({@link @1.0::ErrorStatus} and
+ * vec<{@link OutputShape}>) are serialized as follows:
+ * 1) 'packetInformation'
+ * 2) For each returned operand:
+ * 2.1) 'operandInformation'
+ * 2.2) For each dimension element of the operand:
+ * 2.2.1) 'operandDimensionValue'
+ */
+safe_union FmqResultDatum {
+ /**
+ * Type to describe the high-level layout of the packet.
+ */
+ struct PacketInformation {
+ /**
+ * How many elements the packet contains, including the
+ * "packetInformation" datum.
+ */
+ uint32_t packetSize;
+
+ /**
+ * Status of the execution.
+ */
+ ErrorStatus errorStatus;
+
+ /**
+ * Number of returned operands.
+ */
+ uint32_t numberOfOperands;
+ };
+
+ /**
+ * Type representing the information for each operand.
+ */
+ struct OperandInformation {
+ /**
+ * Indicates whether the operand's output buffer is large enough to
+ * store the operand's result data.
+ */
+ bool isSufficient;
+
+ /**
+ * Number of subsequent elements that belong to the dimensions vector.
+ */
+ uint32_t numberOfDimensions;
+ };
+
+ /**
+ * packetInformation is the first element of the packet and describes the
+ * remainder of the packet. It additionally includes the status of the
+ * execution.
+ */
+ PacketInformation packetInformation;
+
+ /**
+ * Information for each returned operand.
+ */
+ OperandInformation operandInformation;
+
+ /**
+ * Element of the dimensions vector.
+ */
+ uint32_t operandDimensionValue;
+};