Create NNAPI HAL v1.3 and add TENSOR_QUANT8_ASYMM_SIGNED OperandType
Bug: 137828494
Bug: 139120468
Bug: 136735770
Test: mma
Change-Id: I28f74e4b364fec1d7431a96cf5687256b3106069
Merged-In: I28f74e4b364fec1d7431a96cf5687256b3106069
(cherry picked from commit 5a7b67ab8fafd171f07d0ba99338f85c42993e0f)
diff --git a/current.txt b/current.txt
index 21db232..4c5f155 100644
--- a/current.txt
+++ b/current.txt
@@ -586,6 +586,8 @@
# HALs released in Android R
07d0a252b2d8fa35887908a996ba395cf392968395fc30afab791f46e0c22a52 android.hardware.boot@1.1::IBootControl
74049a402be913963edfdd80828a53736570e9d8124a1bf18166b6ed46a6b0ab android.hardware.boot@1.1::types
+34515afa2bb792d3c6d8495a5f5d907d179c8507ca5e55c10050d02ae1d516ef android.hardware.neuralnetworks@1.3::IDevice
+e2d20d4eb24f40b44a3766d05f77052581cb3f4df35fb48c0cc5d9cdcf5c872e android.hardware.neuralnetworks@1.3::types
544049dcda3f943ad67d83d5277f06681a3782982a9af5a78b5d4e8d295d061a android.hardware.vibrator@1.4::IVibrator
5e1c12efbbba89c9143d10b1b90eceff8bc79aa079f5106215b528e104fef101 android.hardware.vibrator@1.4::IVibratorCallback
033eae03c09ebc75e82db37bc39995dfaa9086745577b44d9e14e9ccb48bd8cc android.hardware.vibrator@1.4::types
diff --git a/neuralnetworks/1.2/vts/functional/Android.bp b/neuralnetworks/1.2/vts/functional/Android.bp
index 89ab62a..9c50d36 100644
--- a/neuralnetworks/1.2/vts/functional/Android.bp
+++ b/neuralnetworks/1.2/vts/functional/Android.bp
@@ -37,6 +37,7 @@
"android.hardware.neuralnetworks@1.0",
"android.hardware.neuralnetworks@1.1",
"android.hardware.neuralnetworks@1.2",
+ "android.hardware.neuralnetworks@1.3",
"android.hidl.allocator@1.0",
"android.hidl.memory@1.0",
"libgmock",
diff --git a/neuralnetworks/1.3/Android.bp b/neuralnetworks/1.3/Android.bp
new file mode 100644
index 0000000..0615ec6
--- /dev/null
+++ b/neuralnetworks/1.3/Android.bp
@@ -0,0 +1,21 @@
+// This file is autogenerated by hidl-gen -Landroidbp.
+
+hidl_interface {
+ name: "android.hardware.neuralnetworks@1.3",
+ root: "android.hardware",
+ vndk: {
+ enabled: true,
+ },
+ srcs: [
+ "types.hal",
+ "IDevice.hal",
+ ],
+ interfaces: [
+ "android.hardware.neuralnetworks@1.0",
+ "android.hardware.neuralnetworks@1.1",
+ "android.hardware.neuralnetworks@1.2",
+ "android.hidl.base@1.0",
+ "android.hidl.safe_union@1.0",
+ ],
+ gen_java: false,
+}
diff --git a/neuralnetworks/1.3/IDevice.hal b/neuralnetworks/1.3/IDevice.hal
new file mode 100644
index 0000000..ee36fb4
--- /dev/null
+++ b/neuralnetworks/1.3/IDevice.hal
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.hardware.neuralnetworks@1.3;
+
+import @1.0::ErrorStatus;
+import @1.1::ExecutionPreference;
+import @1.2::Constant;
+import @1.2::DeviceType;
+import @1.2::Extension;
+import @1.2::IDevice;
+import @1.2::IPreparedModelCallback;
+
+/**
+ * This interface represents a device driver.
+ */
+interface IDevice extends @1.2::IDevice {
+ /**
+ * Gets the capabilities of a driver.
+ *
+ * @return status Error status of the call, must be:
+ * - NONE if successful
+ * - DEVICE_UNAVAILABLE if driver is offline or busy
+ * - GENERAL_FAILURE if there is an unspecified error
+ * @return capabilities Capabilities of the driver.
+ */
+ getCapabilities_1_3() generates (ErrorStatus status, Capabilities capabilities);
+
+ /**
+ * Gets the supported operations in a model.
+ *
+ * getSupportedOperations indicates which operations of a model are fully
+ * supported by the vendor driver. If an operation may not be supported for
+ * any reason, getSupportedOperations must return false for that operation.
+ *
+ * @param model A model whose operations--and their corresponding operands--
+ * are to be verified by the driver.
+ * @return status Error status of the call, must be:
+ * - NONE if successful
+ * - DEVICE_UNAVAILABLE if driver is offline or busy
+ * - GENERAL_FAILURE if there is an unspecified error
+ * - INVALID_ARGUMENT if provided model is invalid
+ * @return supportedOperations A list of supported operations, where true
+ * indicates the operation is supported and false indicates the
+ * operation is not supported. The index of "supported" corresponds with
+ * the index of the operation it is describing.
+ */
+ getSupportedOperations_1_3(Model model)
+ generates (ErrorStatus status, vec<bool> supportedOperations);
+
+ /**
+ * Asynchronously creates a prepared model for execution and optionally
+ * saves it into cache files.
+ *
+ * prepareModel is used to make any necessary transformations to or
+ * alternative representations to a model for execution, possibly including
+ * transformations on the constant data, optimization on the model's graph,
+ * or compilation into the device's native binary format. The model itself
+ * is not changed.
+ *
+ * Optionally, caching information may be provided for the driver to save
+ * the prepared model to cache files for faster model compilation time when
+ * the same model preparation is requested in the future. There are two
+ * types of cache file handles provided to the driver: model cache and data
+ * cache. For more information on the two types of cache handles, refer to
+ * getNumberOfCacheFilesNeeded.
+ *
+ * The file descriptors must be opened with read and write permission. A
+ * file may have any size, and the corresponding file descriptor may have
+ * any offset. The driver must truncate a file to zero size before writing
+ * to that file. The file descriptors may be closed by the client once the
+ * asynchronous preparation has finished. The driver must dup a file
+ * descriptor if it wants to get access to the cache file later.
+ *
+ * The model is prepared asynchronously with respect to the caller. The
+ * prepareModel function must verify the inputs to the preparedModel
+ * function related to preparing the model (as opposed to saving the
+ * prepared model to cache) are correct. If there is an error, prepareModel
+ * must immediately invoke the callback with the appropriate ErrorStatus
+ * value and nullptr for the IPreparedModel, then return with the same
+ * ErrorStatus. If the inputs to the prepareModel function that are related
+ * to preparing the model are valid and there is no error, prepareModel must
+ * launch an asynchronous task to prepare the model in the background, and
+ * immediately return from prepareModel with ErrorStatus::NONE. If the
+ * asynchronous task fails to launch, prepareModel must immediately invoke
+ * the callback with ErrorStatus::GENERAL_FAILURE and nullptr for the
+ * IPreparedModel, then return with ErrorStatus::GENERAL_FAILURE.
+ *
+ * When the asynchronous task has finished preparing the model, it must
+ * immediately invoke the callback function provided as an input to
+ * prepareModel. If the model was prepared successfully, the callback object
+ * must be invoked with an error status of ErrorStatus::NONE and the
+ * produced IPreparedModel object. If an error occurred preparing the model,
+ * the callback object must be invoked with the appropriate ErrorStatus
+ * value and nullptr for the IPreparedModel.
+ *
+ * Optionally, the driver may save the prepared model to cache during the
+ * asynchronous preparation. Any error that occurs when saving to cache must
+ * not affect the status of preparing the model. Even if the input arguments
+ * related to the cache may be invalid, or the driver may fail to save to
+ * cache, the prepareModel function must finish preparing the model. The
+ * driver may choose not to save to cache even if the caching information is
+ * provided and valid.
+ *
+ * The only information that may be unknown to the model at this stage is
+ * the shape of the tensors, which may only be known at execution time. As
+ * such, some driver services may return partially prepared models, where
+ * the prepared model may only be finished when it is paired with a set of
+ * inputs to the model. Note that the same prepared model object may be used
+ * with different shapes of inputs on different (possibly concurrent)
+ * executions.
+ *
+ * Multiple threads may call prepareModel on the same model concurrently.
+ *
+ * @param model The model to be prepared for execution.
+ * @param preference Indicates the intended execution behavior of a prepared
+ * model.
+ * @param modelCache A vector of handles with each entry holding exactly one
+ * cache file descriptor for the security-sensitive cache. The length of
+ * the vector must either be 0 indicating that caching information is
+ * not provided, or match the numModelCache returned from
+ * getNumberOfCacheFilesNeeded. The cache handles will be provided in
+ * the same order when retrieving the preparedModel from cache files
+ * with prepareModelFromCache.
+ * @param dataCache A vector of handles with each entry holding exactly one
+ * cache file descriptor for the constants' cache. The length of the
+ * vector must either be 0 indicating that caching information is not
+ * provided, or match the numDataCache returned from
+ * getNumberOfCacheFilesNeeded. The cache handles will be provided in
+ * the same order when retrieving the preparedModel from cache files
+ * with prepareModelFromCache.
+ * @param token A caching token of length Constant::BYTE_SIZE_OF_CACHE_TOKEN
+ * identifying the prepared model. The same token will be provided when
+ * retrieving the prepared model from the cache files with
+ * prepareModelFromCache. Tokens should be chosen to have a low rate of
+ * collision for a particular application. The driver cannot detect a
+ * collision; a collision will result in a failed execution or in a
+ * successful execution that produces incorrect output values. If both
+ * modelCache and dataCache are empty indicating that caching
+ * information is not provided, this token must be ignored.
+ * @param callback A callback object used to return the error status of
+ * preparing the model for execution and the prepared model if
+ * successful, nullptr otherwise. The callback object's notify function
+ * must be called exactly once, even if the model could not be prepared.
+ * @return status Error status of launching a task which prepares the model
+ * in the background; must be:
+ * - NONE if preparation task is successfully launched
+ * - DEVICE_UNAVAILABLE if driver is offline or busy
+ * - GENERAL_FAILURE if there is an unspecified error
+ * - INVALID_ARGUMENT if one of the input arguments related to preparing
+ * the model is invalid
+ */
+ prepareModel_1_3(Model model, ExecutionPreference preference,
+ vec<handle> modelCache, vec<handle> dataCache,
+ uint8_t[Constant:BYTE_SIZE_OF_CACHE_TOKEN] token,
+ IPreparedModelCallback callback)
+ generates (ErrorStatus status);
+};
diff --git a/neuralnetworks/1.3/types.hal b/neuralnetworks/1.3/types.hal
new file mode 100644
index 0000000..db5dd51
--- /dev/null
+++ b/neuralnetworks/1.3/types.hal
@@ -0,0 +1,361 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.hardware.neuralnetworks@1.3;
+
+import @1.0::DataLocation;
+import @1.0::OperandLifeTime;
+import @1.0::PerformanceInfo;
+import @1.2::OperandType;
+import @1.2::OperationType;
+import @1.2::SymmPerChannelQuantParams;
+
+import android.hidl.safe_union@1.0::Monostate;
+
+/**
+ * NOTE: Since NNAPI 1.2, OEM operation and data type are deprecated. Extensions
+ * are the preferred alternative.
+ *
+ * NOTE: Adding a new fundamental type requires updating the value of
+ * OperandTypeRange::FUNDAMENTAL_MAX.
+ */
+enum OperandType : @1.2::OperandType {
+ /**
+ * A tensor of 8 bit signed integers that represent real numbers.
+ *
+ * Attached to this tensor are two numbers that can be used to convert the
+ * 8 bit integer to the real value and vice versa. These two numbers are:
+ * - scale: a 32 bit floating point value greater than zero.
+ * - zeroPoint: a 32 bit integer, in range [-128, 127].
+ *
+ * The formula is:
+ * real_value = (integer_value - zeroPoint) * scale.
+ *
+ * Available since API level 30.
+ */
+ TENSOR_QUANT8_ASYMM_SIGNED = 14,
+};
+
+/**
+ * The range of operand values in the OperandType enum.
+ */
+enum OperandTypeRange : uint32_t {
+ BASE_MIN = 0,
+ FUNDAMENTAL_MIN = 0,
+ FUNDAMENTAL_MAX = 14,
+ OEM_MIN = 10000,
+ OEM_MAX = 10001,
+ BASE_MAX = 0xFFFF,
+};
+
+
+/**
+ * The capabilities of a driver.
+ *
+ * Performance of an operation comes from the type of its first operand.
+ * This represents performance for non extension operand types.
+ */
+struct Capabilities {
+ /**
+ * Driver performance when operating on float32 data but performing
+ * calculations with range and/or precision as low as that of the IEEE
+ * 754 16-bit floating-point format.
+ */
+ PerformanceInfo relaxedFloat32toFloat16PerformanceScalar;
+ PerformanceInfo relaxedFloat32toFloat16PerformanceTensor;
+
+ /**
+ * Driver performance when operating on a particular data type.
+ * In the case of float32 data, this is used when the calculations
+ * are not relaxed.
+ */
+ struct OperandPerformance {
+ OperandType type;
+ PerformanceInfo info;
+ };
+
+ /**
+ * Performance by operand type. Must be sorted by OperandType.
+ * If a particular OperandType is not present in operandPerformance,
+ * its performance is treated as
+ * { .execTime = FLT_MAX, .powerUsage = FLT_MAX }.
+ */
+ vec<OperandPerformance> operandPerformance;
+};
+
+/**
+ * Describes one operand of the model's graph.
+ */
+struct Operand {
+ /**
+ * The data type.
+ *
+ * Besides the values listed in {@link OperandType}, any value above
+ * {@link OperandTypeRange::BASE_MAX} is possible and should be interpreted
+ * as an extension type according to {@link Model::extensionNameToPrefix}.
+ */
+ OperandType type;
+
+ /**
+ * Dimensions of the operand.
+ *
+ * For a scalar operand, dimensions.size() must be 0.
+ *
+ * A tensor operand with all dimensions specified has "fully
+ * specified" dimensions. Whenever possible (i.e., whenever the
+ * dimensions are known at model construction time), a tensor
+ * operand should have (but is not required to have) fully
+ * specified dimensions, in order to enable the best possible
+ * performance.
+ *
+ * If a tensor operand's dimensions are not fully specified, the
+ * dimensions of the operand are deduced from the operand
+ * dimensions and values of the operation for which that operand
+ * is an output.
+ *
+ * In the following situations, a tensor operand's dimensions must
+ * be fully specified:
+ *
+ * . The operand has lifetime CONSTANT_COPY or
+ * CONSTANT_REFERENCE.
+ *
+ * . The operand has lifetime MODEL_INPUT. Fully
+ * specified dimensions must either be present in the
+ * Operand or they must be provided in the corresponding
+ * RequestArgument.
+ * EXCEPTION: If the input is optional and omitted
+ * (by setting the hasNoValue field of the corresponding
+ * RequestArgument to true) then it need not have fully
+ * specified dimensions.
+ *
+ * A tensor operand with some number of unspecified dimensions is
+ * represented by setting each unspecified dimension to 0.
+ *
+ * A tensor operand with unspecified rank is represented by providing
+ * an empty dimensions vector.
+ */
+ vec<uint32_t> dimensions;
+
+ /**
+ * The number of times this operand appears as an operation input.
+ *
+ * (For example, if this operand appears once in one operation's
+ * input list, and three times in another operation's input list,
+ * then numberOfConsumers = 4.)
+ */
+ uint32_t numberOfConsumers;
+
+ /**
+ * Quantized scale of the operand.
+ *
+ * Only applicable if the operand is of type TENSOR_QUANT8_ASYMM or
+ * TENSOR_INT32.
+ */
+ float scale;
+
+ /**
+ * Quantized zero-point offset of the operand.
+ *
+ * Only applicable if the operand is of type TENSOR_QUANT8_ASYMM.
+ */
+ int32_t zeroPoint;
+
+ /**
+ * How the operand is used.
+ */
+ OperandLifeTime lifetime;
+
+ /**
+ * Where to find the data for this operand.
+ * If the lifetime is TEMPORARY_VARIABLE, MODEL_INPUT, MODEL_OUTPUT, or
+ * NO_VALUE:
+ * - All the fields must be 0.
+ * If the lifetime is CONSTANT_COPY:
+ * - location.poolIndex is 0.
+ * - location.offset is the offset in bytes into Model.operandValues.
+ * - location.length is set.
+ * If the lifetime is CONSTANT_REFERENCE:
+ * - location.poolIndex is set.
+ * - location.offset is the offset in bytes into the specified pool.
+ * - location.length is set.
+ */
+ DataLocation location;
+
+ /**
+ * Additional parameters specific to a particular operand type.
+ */
+ safe_union ExtraParams {
+ /**
+ * No additional parameters.
+ */
+ Monostate none;
+
+ /**
+ * Symmetric per-channel quantization parameters.
+ *
+ * Only applicable to operands of type TENSOR_QUANT8_SYMM_PER_CHANNEL.
+ */
+ SymmPerChannelQuantParams channelQuant;
+
+ /**
+ * Extension operand parameters.
+ *
+ * The framework treats this as an opaque data blob.
+ * The format is up to individual extensions.
+ */
+ vec<uint8_t> extension;
+ } extraParams;
+};
+
+/**
+ * Describes one operation of the model's graph.
+ */
+struct Operation {
+ /**
+ * The operation type.
+ */
+ OperationType type;
+
+ /**
+ * Describes the table that contains the indexes of the inputs of the
+ * operation. The offset is the index in the operandIndexes table.
+ */
+ vec<uint32_t> inputs;
+
+ /**
+ * Describes the table that contains the indexes of the outputs of the
+ * operation. The offset is the index in the operandIndexes table.
+ */
+ vec<uint32_t> outputs;
+};
+
+/**
+ * A Neural Network Model.
+ *
+ * This includes not only the execution graph, but also constant data such as
+ * weights or scalars added at construction time. The only information that
+ * may not be known is the shape of the input tensors.
+ */
+struct Model {
+ /**
+ * All operands included in the model.
+ */
+ vec<Operand> operands;
+
+ /**
+ * All operations included in the model.
+ *
+ * The operations are sorted into execution order. Every operand
+ * with lifetime MODEL_OUTPUT or TEMPORARY_VARIABLE must be
+ * written before it is read.
+ */
+ vec<Operation> operations;
+
+ /**
+ * Input indexes of the model. There must be at least one.
+ *
+ * Each value corresponds to the index of the operand in "operands".
+ */
+ vec<uint32_t> inputIndexes;
+
+ /**
+ * Output indexes of the model. There must be at least one.
+ *
+ * Each value corresponds to the index of the operand in "operands".
+ */
+ vec<uint32_t> outputIndexes;
+
+ /**
+ * A byte buffer containing operand data that were copied into the model.
+ *
+ * An operand's value must be located here if and only if Operand::lifetime
+ * equals OperandLifeTime::CONSTANT_COPY.
+ */
+ vec<uint8_t> operandValues;
+
+ /**
+ * A collection of shared memory pools containing operand values.
+ *
+ * An operand's value must be located here if and only if Operand::lifetime
+ * equals OperandLifeTime::CONSTANT_REFERENCE.
+ */
+ vec<memory> pools;
+
+ /**
+ * 'true' indicates TENSOR_FLOAT32 may be calculated with range and/or
+ * precision as low as that of the IEEE 754 16-bit floating-point format.
+ * 'false' indicates TENSOR_FLOAT32 must be calculated using at least the
+ * range and precision of the IEEE 754 32-bit floating-point format.
+ */
+ bool relaxComputationFloat32toFloat16;
+
+ /**
+ * The mapping between extension names and prefixes of operand and
+ * operation type values.
+ *
+ * An operand or operation whose numeric type value is above
+ * {@link OperandTypeRange::BASE_MAX} or
+ * {@link OperationTypeRange::BASE_MAX} respectively should be interpreted
+ * as an extension operand. The low
+ * {@link Model::ExtensionTypeEncoding::LOW_BITS_TYPE} bits of the value
+ * correspond to the type ID within the extension and the high
+ * {@link Model::ExtensionTypeEncoding::HIGH_BITS_PREFIX} bits encode
+ * the "prefix", which maps uniquely to the extension name.
+ *
+ * For example, if a model contains an operation whose value is
+ * 0xAAAABBBB and extensionNameToPrefix contains an entry with
+ * prefix=0xAAAA and name="vendor.test.test_extension", then
+ * the operation should be interpreted as the operation 0xBBBB
+ * of the extension named vendor.test.test_extension.
+ *
+ * This is a one-to-one correspondence. That is, there must be at most one
+ * prefix corresponding to each extension name and at most one extension
+ * name corresponding to each prefix.
+ */
+ vec<ExtensionNameAndPrefix> extensionNameToPrefix;
+
+ /**
+ * A correspondence between an extension name and a prefix of operand and
+ * operation type values.
+ */
+ struct ExtensionNameAndPrefix {
+ /**
+ * The extension name.
+ *
+ * See {@link Extension::name} for the format specification.
+ */
+ string name;
+
+ /**
+ * The unique extension identifier within the model.
+ *
+ * See {@link Model::extensionNameToPrefix}.
+ */
+ uint16_t prefix;
+ };
+
+ /**
+ * Numeric values of extension operand and operation types have the
+ * following structure:
+ * - 16 high bits represent the "prefix", which corresponds uniquely to the
+ * extension name.
+ * - 16 low bits represent the type ID within the extension.
+ */
+ enum ExtensionTypeEncoding : uint8_t {
+ HIGH_BITS_PREFIX = 16,
+ LOW_BITS_TYPE = 16,
+ };
+};