Implement VTS tests for NNAPI AIDL interface

The tests are copied from HIDL 1.0-3 VTS tests and updated to use AIDL.

Bug: 172922059
Test: VtsHalNeuralnetworksTargetTest
Change-Id: Ife08409e9b46420685a1ccb0b3256286c973dbf5
Merged-In: Ife08409e9b46420685a1ccb0b3256286c973dbf5
(cherry picked from commit b38bb4f12a1ceb33ebd0dd798650a74a8ef9d20e)
diff --git a/neuralnetworks/aidl/vts/functional/GeneratedTestHarness.cpp b/neuralnetworks/aidl/vts/functional/GeneratedTestHarness.cpp
new file mode 100644
index 0000000..86d5f3f
--- /dev/null
+++ b/neuralnetworks/aidl/vts/functional/GeneratedTestHarness.cpp
@@ -0,0 +1,925 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GeneratedTestHarness.h"
+
+#include <aidl/android/hardware/neuralnetworks/ErrorStatus.h>
+#include <android-base/logging.h>
+#include <android/binder_auto_utils.h>
+#include <android/sync.h>
+#include <gtest/gtest.h>
+
+#include <algorithm>
+#include <chrono>
+#include <iostream>
+#include <iterator>
+#include <numeric>
+#include <vector>
+
+#include <MemoryUtils.h>
+#include <android/binder_status.h>
+#include <nnapi/Result.h>
+#include <nnapi/SharedMemory.h>
+#include <nnapi/Types.h>
+#include <nnapi/hal/aidl/Conversions.h>
+#include <nnapi/hal/aidl/Utils.h>
+
+#include "Callbacks.h"
+#include "TestHarness.h"
+#include "Utils.h"
+#include "VtsHalNeuralnetworks.h"
+
+namespace aidl::android::hardware::neuralnetworks::vts::functional {
+
+namespace nn = ::android::nn;
+using namespace test_helper;
+using implementation::PreparedModelCallback;
+
+namespace {
+
+enum class OutputType { FULLY_SPECIFIED, UNSPECIFIED, INSUFFICIENT, MISSED_DEADLINE };
+
+struct TestConfig {
+    Executor executor;
+    bool measureTiming;
+    OutputType outputType;
+    MemoryType memoryType;
+    // `reportSkipping` indicates if a test should print an info message in case
+    // it is skipped. The field is set to true by default and is set to false in
+    // quantization coupling tests to suppress skipping a test
+    bool reportSkipping;
+    TestConfig(Executor executor, bool measureTiming, OutputType outputType, MemoryType memoryType)
+        : executor(executor),
+          measureTiming(measureTiming),
+          outputType(outputType),
+          memoryType(memoryType),
+          reportSkipping(true) {}
+    TestConfig(Executor executor, bool measureTiming, OutputType outputType, MemoryType memoryType,
+               bool reportSkipping)
+        : executor(executor),
+          measureTiming(measureTiming),
+          outputType(outputType),
+          memoryType(memoryType),
+          reportSkipping(reportSkipping) {}
+};
+
+enum class IOType { INPUT, OUTPUT };
+
+class DeviceMemoryAllocator {
+  public:
+    DeviceMemoryAllocator(const std::shared_ptr<IDevice>& device,
+                          const std::shared_ptr<IPreparedModel>& preparedModel,
+                          const TestModel& testModel)
+        : kDevice(device), kPreparedModel(preparedModel), kTestModel(testModel) {}
+
+    // Allocate device memory for a target input/output operand.
+    // Return {IBuffer object, token} if successful.
+    // Return {nullptr, 0} if device memory is not supported.
+    template <IOType ioType>
+    std::pair<std::shared_ptr<IBuffer>, int32_t> allocate(uint32_t index) {
+        std::pair<std::shared_ptr<IBuffer>, int32_t> buffer;
+        allocateInternal<ioType>(index, &buffer);
+        return buffer;
+    }
+
+  private:
+    template <IOType ioType>
+    void allocateInternal(int32_t index, std::pair<std::shared_ptr<IBuffer>, int32_t>* result) {
+        ASSERT_NE(result, nullptr);
+
+        // Prepare arguments.
+        BufferRole role = {.modelIndex = 0, .ioIndex = index, .frequency = 1.0f};
+        std::vector<BufferRole> inputRoles, outputRoles;
+        if constexpr (ioType == IOType::INPUT) {
+            inputRoles = {role};
+        } else {
+            outputRoles = {role};
+        }
+
+        // Allocate device memory.
+        DeviceBuffer buffer;
+        IPreparedModelParcel parcel;
+        parcel.preparedModel = kPreparedModel;
+        const auto ret = kDevice->allocate({}, {parcel}, inputRoles, outputRoles, &buffer);
+
+        // Check allocation results.
+        if (ret.isOk()) {
+            ASSERT_NE(buffer.buffer, nullptr);
+            ASSERT_GT(buffer.token, 0);
+        } else {
+            ASSERT_EQ(ret.getExceptionCode(), EX_SERVICE_SPECIFIC);
+            ASSERT_EQ(static_cast<ErrorStatus>(ret.getServiceSpecificError()),
+                      ErrorStatus::GENERAL_FAILURE);
+            buffer.buffer = nullptr;
+            buffer.token = 0;
+        }
+
+        // Initialize input data from TestBuffer.
+        if constexpr (ioType == IOType::INPUT) {
+            if (buffer.buffer != nullptr) {
+                // TestBuffer -> Shared memory.
+                const auto& testBuffer =
+                        kTestModel.main.operands[kTestModel.main.inputIndexes[index]].data;
+                ASSERT_GT(testBuffer.size(), 0);
+                const auto sharedMemory = nn::createSharedMemory(testBuffer.size()).value();
+                const auto memory = utils::convert(sharedMemory).value();
+                const auto mapping = nn::map(sharedMemory).value();
+                uint8_t* inputPtr = static_cast<uint8_t*>(std::get<void*>(mapping.pointer));
+                ASSERT_NE(inputPtr, nullptr);
+                const uint8_t* begin = testBuffer.get<uint8_t>();
+                const uint8_t* end = begin + testBuffer.size();
+                std::copy(begin, end, inputPtr);
+
+                // Shared memory -> IBuffer.
+                auto ret = buffer.buffer->copyFrom(memory, {});
+                ASSERT_TRUE(ret.isOk());
+            }
+        }
+        *result = {std::move(buffer.buffer), buffer.token};
+    }
+
+    const std::shared_ptr<IDevice> kDevice;
+    const std::shared_ptr<IPreparedModel> kPreparedModel;
+    const TestModel& kTestModel;
+};
+
+Subgraph createSubgraph(const TestSubgraph& testSubgraph, uint32_t* constCopySize,
+                        std::vector<const TestBuffer*>* constCopies, uint32_t* constRefSize,
+                        std::vector<const TestBuffer*>* constReferences) {
+    CHECK(constCopySize != nullptr);
+    CHECK(constCopies != nullptr);
+    CHECK(constRefSize != nullptr);
+    CHECK(constReferences != nullptr);
+
+    // Operands.
+    std::vector<Operand> operands(testSubgraph.operands.size());
+    for (uint32_t i = 0; i < testSubgraph.operands.size(); i++) {
+        const auto& op = testSubgraph.operands[i];
+
+        DataLocation loc = {};
+        if (op.lifetime == TestOperandLifeTime::CONSTANT_COPY) {
+            loc = {
+                    .poolIndex = 0,
+                    .offset = *constCopySize,
+                    .length = static_cast<int64_t>(op.data.size()),
+            };
+            constCopies->push_back(&op.data);
+            *constCopySize += op.data.alignedSize();
+        } else if (op.lifetime == TestOperandLifeTime::CONSTANT_REFERENCE) {
+            loc = {
+                    .poolIndex = 0,
+                    .offset = *constRefSize,
+                    .length = static_cast<int64_t>(op.data.size()),
+            };
+            constReferences->push_back(&op.data);
+            *constRefSize += op.data.alignedSize();
+        } else if (op.lifetime == TestOperandLifeTime::SUBGRAPH) {
+            loc = {
+                    .poolIndex = 0,
+                    .offset = *op.data.get<uint32_t>(),
+                    .length = 0,
+            };
+        }
+
+        std::optional<OperandExtraParams> extraParams;
+        if (op.type == TestOperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL) {
+            using Tag = OperandExtraParams::Tag;
+            extraParams = OperandExtraParams::make<Tag::channelQuant>(SymmPerChannelQuantParams{
+                    .scales = op.channelQuant.scales,
+                    .channelDim = static_cast<int32_t>(op.channelQuant.channelDim)});
+        }
+
+        operands[i] = {.type = static_cast<OperandType>(op.type),
+                       .dimensions = utils::toSigned(op.dimensions).value(),
+                       .scale = op.scale,
+                       .zeroPoint = op.zeroPoint,
+                       .lifetime = static_cast<OperandLifeTime>(op.lifetime),
+                       .location = loc,
+                       .extraParams = std::move(extraParams)};
+    }
+
+    // Operations.
+    std::vector<Operation> operations(testSubgraph.operations.size());
+    std::transform(testSubgraph.operations.begin(), testSubgraph.operations.end(),
+                   operations.begin(), [](const TestOperation& op) -> Operation {
+                       return {.type = static_cast<OperationType>(op.type),
+                               .inputs = utils::toSigned(op.inputs).value(),
+                               .outputs = utils::toSigned(op.outputs).value()};
+                   });
+
+    return {.operands = std::move(operands),
+            .operations = std::move(operations),
+            .inputIndexes = utils::toSigned(testSubgraph.inputIndexes).value(),
+            .outputIndexes = utils::toSigned(testSubgraph.outputIndexes).value()};
+}
+
+void copyTestBuffers(const std::vector<const TestBuffer*>& buffers, uint8_t* output) {
+    uint32_t offset = 0;
+    for (const TestBuffer* buffer : buffers) {
+        const uint8_t* begin = buffer->get<uint8_t>();
+        const uint8_t* end = begin + buffer->size();
+        std::copy(begin, end, output + offset);
+        offset += buffer->alignedSize();
+    }
+}
+
+}  // namespace
+
+void waitForSyncFence(int syncFd) {
+    constexpr int kInfiniteTimeout = -1;
+    ASSERT_GT(syncFd, 0);
+    int r = sync_wait(syncFd, kInfiniteTimeout);
+    ASSERT_GE(r, 0);
+}
+
+Model createModel(const TestModel& testModel) {
+    uint32_t constCopySize = 0;
+    uint32_t constRefSize = 0;
+    std::vector<const TestBuffer*> constCopies;
+    std::vector<const TestBuffer*> constReferences;
+
+    Subgraph mainSubgraph = createSubgraph(testModel.main, &constCopySize, &constCopies,
+                                           &constRefSize, &constReferences);
+    std::vector<Subgraph> refSubgraphs(testModel.referenced.size());
+    std::transform(testModel.referenced.begin(), testModel.referenced.end(), refSubgraphs.begin(),
+                   [&constCopySize, &constCopies, &constRefSize,
+                    &constReferences](const TestSubgraph& testSubgraph) {
+                       return createSubgraph(testSubgraph, &constCopySize, &constCopies,
+                                             &constRefSize, &constReferences);
+                   });
+
+    // Constant copies.
+    std::vector<uint8_t> operandValues(constCopySize);
+    copyTestBuffers(constCopies, operandValues.data());
+
+    // Shared memory.
+    std::vector<nn::Memory> pools = {};
+    if (constRefSize > 0) {
+        const auto pool = nn::createSharedMemory(constRefSize).value();
+        pools.push_back(pool);
+
+        // load data
+        const auto mappedMemory = nn::map(pool).value();
+        uint8_t* mappedPtr = static_cast<uint8_t*>(std::get<void*>(mappedMemory.pointer));
+        CHECK(mappedPtr != nullptr);
+
+        copyTestBuffers(constReferences, mappedPtr);
+    }
+
+    std::vector<Memory> aidlPools;
+    aidlPools.reserve(pools.size());
+    for (auto& pool : pools) {
+        auto aidlPool = utils::convert(pool).value();
+        aidlPools.push_back(std::move(aidlPool));
+    }
+
+    return {.main = std::move(mainSubgraph),
+            .referenced = std::move(refSubgraphs),
+            .operandValues = std::move(operandValues),
+            .pools = std::move(aidlPools),
+            .relaxComputationFloat32toFloat16 = testModel.isRelaxed};
+}
+
+static bool isOutputSizeGreaterThanOne(const TestModel& testModel, uint32_t index) {
+    const auto byteSize = testModel.main.operands[testModel.main.outputIndexes[index]].data.size();
+    return byteSize > 1u;
+}
+
+static void makeOutputInsufficientSize(uint32_t outputIndex, Request* request) {
+    auto& length = request->outputs[outputIndex].location.length;
+    ASSERT_GT(length, 1u);
+    length -= 1u;
+}
+
+static void makeOutputDimensionsUnspecified(Model* model) {
+    for (auto i : model->main.outputIndexes) {
+        auto& dims = model->main.operands[i].dimensions;
+        std::fill(dims.begin(), dims.end(), 0);
+    }
+}
+
+// Manages the lifetime of memory resources used in an execution.
+class ExecutionContext {
+  public:
+    ExecutionContext(std::shared_ptr<IDevice> device, std::shared_ptr<IPreparedModel> preparedModel)
+        : kDevice(std::move(device)), kPreparedModel(std::move(preparedModel)) {}
+
+    std::optional<Request> createRequest(const TestModel& testModel, MemoryType memoryType);
+    std::vector<TestBuffer> getOutputBuffers(const TestModel& testModel,
+                                             const Request& request) const;
+
+  private:
+    // Get a TestBuffer with data copied from an IBuffer object.
+    void getBuffer(const std::shared_ptr<IBuffer>& buffer, size_t size,
+                   TestBuffer* testBuffer) const;
+
+    static constexpr uint32_t kInputPoolIndex = 0;
+    static constexpr uint32_t kOutputPoolIndex = 1;
+    static constexpr uint32_t kDeviceMemoryBeginIndex = 2;
+
+    const std::shared_ptr<IDevice> kDevice;
+    const std::shared_ptr<IPreparedModel> kPreparedModel;
+    std::unique_ptr<TestMemoryBase> mInputMemory, mOutputMemory;
+    std::vector<std::shared_ptr<IBuffer>> mBuffers;
+};
+
+std::optional<Request> ExecutionContext::createRequest(const TestModel& testModel,
+                                                       MemoryType memoryType) {
+    // Memory pools are organized as:
+    // - 0: Input shared memory pool
+    // - 1: Output shared memory pool
+    // - [2, 2+i): Input device memories
+    // - [2+i, 2+i+o): Output device memories
+    DeviceMemoryAllocator allocator(kDevice, kPreparedModel, testModel);
+    std::vector<int32_t> tokens;
+    mBuffers.clear();
+
+    // Model inputs.
+    std::vector<RequestArgument> inputs(testModel.main.inputIndexes.size());
+    size_t inputSize = 0;
+    for (uint32_t i = 0; i < testModel.main.inputIndexes.size(); i++) {
+        const auto& op = testModel.main.operands[testModel.main.inputIndexes[i]];
+        if (op.data.size() == 0) {
+            // Omitted input.
+            inputs[i] = {.hasNoValue = true};
+            continue;
+        } else if (memoryType == MemoryType::DEVICE) {
+            SCOPED_TRACE("Input index = " + std::to_string(i));
+            auto [buffer, token] = allocator.allocate<IOType::INPUT>(i);
+            if (buffer != nullptr) {
+                DataLocation loc = {.poolIndex = static_cast<int32_t>(mBuffers.size() +
+                                                                      kDeviceMemoryBeginIndex)};
+                mBuffers.push_back(std::move(buffer));
+                tokens.push_back(token);
+                inputs[i] = {.hasNoValue = false, .location = loc, .dimensions = {}};
+                continue;
+            }
+        }
+
+        // Reserve shared memory for input.
+        DataLocation loc = {.poolIndex = kInputPoolIndex,
+                            .offset = static_cast<int64_t>(inputSize),
+                            .length = static_cast<int64_t>(op.data.size())};
+        inputSize += op.data.alignedSize();
+        inputs[i] = {.hasNoValue = false, .location = loc, .dimensions = {}};
+    }
+
+    // Model outputs.
+    std::vector<RequestArgument> outputs(testModel.main.outputIndexes.size());
+    size_t outputSize = 0;
+    for (uint32_t i = 0; i < testModel.main.outputIndexes.size(); i++) {
+        const auto& op = testModel.main.operands[testModel.main.outputIndexes[i]];
+        if (memoryType == MemoryType::DEVICE) {
+            SCOPED_TRACE("Output index = " + std::to_string(i));
+            auto [buffer, token] = allocator.allocate<IOType::OUTPUT>(i);
+            if (buffer != nullptr) {
+                DataLocation loc = {.poolIndex = static_cast<int32_t>(mBuffers.size() +
+                                                                      kDeviceMemoryBeginIndex)};
+                mBuffers.push_back(std::move(buffer));
+                tokens.push_back(token);
+                outputs[i] = {.hasNoValue = false, .location = loc, .dimensions = {}};
+                continue;
+            }
+        }
+
+        // In the case of zero-sized output, we should at least provide a one-byte buffer.
+        // This is because zero-sized tensors are only supported internally to the driver, or
+        // reported in output shapes. It is illegal for the client to pre-specify a zero-sized
+        // tensor as model output. Otherwise, we will have two semantic conflicts:
+        // - "Zero dimension" conflicts with "unspecified dimension".
+        // - "Omitted operand buffer" conflicts with "zero-sized operand buffer".
+        size_t bufferSize = std::max<size_t>(op.data.size(), 1);
+
+        // Reserve shared memory for output.
+        DataLocation loc = {.poolIndex = kOutputPoolIndex,
+                            .offset = static_cast<int64_t>(outputSize),
+                            .length = static_cast<int64_t>(bufferSize)};
+        outputSize += op.data.size() == 0 ? TestBuffer::kAlignment : op.data.alignedSize();
+        outputs[i] = {.hasNoValue = false, .location = loc, .dimensions = {}};
+    }
+
+    if (memoryType == MemoryType::DEVICE && mBuffers.empty()) {
+        return std::nullopt;
+    }
+
+    // Memory pools.
+    if (memoryType == MemoryType::BLOB_AHWB) {
+        mInputMemory = TestBlobAHWB::create(std::max<size_t>(inputSize, 1));
+        mOutputMemory = TestBlobAHWB::create(std::max<size_t>(outputSize, 1));
+    } else {
+        mInputMemory = TestAshmem::create(std::max<size_t>(inputSize, 1));
+        mOutputMemory = TestAshmem::create(std::max<size_t>(outputSize, 1));
+    }
+    CHECK_NE(mInputMemory, nullptr);
+    CHECK_NE(mOutputMemory, nullptr);
+    std::vector<RequestMemoryPool> pools;
+    pools.reserve(kDeviceMemoryBeginIndex + mBuffers.size());
+
+    auto copiedInputMemory = utils::clone(*mInputMemory->getAidlMemory());
+    CHECK(copiedInputMemory.has_value()) << copiedInputMemory.error().message;
+    auto copiedOutputMemory = utils::clone(*mOutputMemory->getAidlMemory());
+    CHECK(copiedOutputMemory.has_value()) << copiedOutputMemory.error().message;
+
+    pools.push_back(RequestMemoryPool::make<RequestMemoryPool::Tag::pool>(
+            std::move(copiedInputMemory).value()));
+    pools.push_back(RequestMemoryPool::make<RequestMemoryPool::Tag::pool>(
+            std::move(copiedOutputMemory).value()));
+    for (const auto& token : tokens) {
+        pools.push_back(RequestMemoryPool::make<RequestMemoryPool::Tag::token>(token));
+    }
+
+    // Copy input data to the input shared memory pool.
+    uint8_t* inputPtr = mInputMemory->getPointer();
+    for (uint32_t i = 0; i < testModel.main.inputIndexes.size(); i++) {
+        if (!inputs[i].hasNoValue && inputs[i].location.poolIndex == kInputPoolIndex) {
+            const auto& op = testModel.main.operands[testModel.main.inputIndexes[i]];
+            const uint8_t* begin = op.data.get<uint8_t>();
+            const uint8_t* end = begin + op.data.size();
+            std::copy(begin, end, inputPtr + inputs[i].location.offset);
+        }
+    }
+    return Request{
+            .inputs = std::move(inputs), .outputs = std::move(outputs), .pools = std::move(pools)};
+}
+
+std::vector<TestBuffer> ExecutionContext::getOutputBuffers(const TestModel& testModel,
+                                                           const Request& request) const {
+    // Copy out output results.
+    uint8_t* outputPtr = mOutputMemory->getPointer();
+    std::vector<TestBuffer> outputBuffers;
+    for (uint32_t i = 0; i < request.outputs.size(); i++) {
+        const auto& outputLoc = request.outputs[i].location;
+        if (outputLoc.poolIndex == kOutputPoolIndex) {
+            outputBuffers.emplace_back(outputLoc.length, outputPtr + outputLoc.offset);
+        } else {
+            const auto& op = testModel.main.operands[testModel.main.outputIndexes[i]];
+            if (op.data.size() == 0) {
+                outputBuffers.emplace_back(0, nullptr);
+            } else {
+                SCOPED_TRACE("Output index = " + std::to_string(i));
+                const uint32_t bufferIndex = outputLoc.poolIndex - kDeviceMemoryBeginIndex;
+                TestBuffer buffer;
+                getBuffer(mBuffers[bufferIndex], op.data.size(), &buffer);
+                outputBuffers.push_back(std::move(buffer));
+            }
+        }
+    }
+    return outputBuffers;
+}
+
+// Get a TestBuffer with data copied from an IBuffer object.
+void ExecutionContext::getBuffer(const std::shared_ptr<IBuffer>& buffer, size_t size,
+                                 TestBuffer* testBuffer) const {
+    // IBuffer -> Shared memory.
+    auto sharedMemory = nn::createSharedMemory(size).value();
+    auto aidlMemory = utils::convert(sharedMemory).value();
+    const auto ret = buffer->copyTo(aidlMemory);
+    ASSERT_TRUE(ret.isOk());
+
+    // Shared memory -> TestBuffer.
+    const auto outputMemory = nn::map(sharedMemory).value();
+    const uint8_t* outputPtr = std::visit(
+            [](auto* ptr) { return static_cast<const uint8_t*>(ptr); }, outputMemory.pointer);
+    ASSERT_NE(outputPtr, nullptr);
+    ASSERT_NE(testBuffer, nullptr);
+    *testBuffer = TestBuffer(size, outputPtr);
+}
+
+static bool hasZeroSizedOutput(const TestModel& testModel) {
+    return std::any_of(testModel.main.outputIndexes.begin(), testModel.main.outputIndexes.end(),
+                       [&testModel](uint32_t index) {
+                           return testModel.main.operands[index].data.size() == 0;
+                       });
+}
+
+void EvaluatePreparedModel(const std::shared_ptr<IDevice>& device,
+                           const std::shared_ptr<IPreparedModel>& preparedModel,
+                           const TestModel& testModel, const TestConfig& testConfig,
+                           bool* skipped = nullptr) {
+    if (skipped != nullptr) {
+        *skipped = false;
+    }
+    // If output0 does not have size larger than one byte, we can not test with insufficient buffer.
+    if (testConfig.outputType == OutputType::INSUFFICIENT &&
+        !isOutputSizeGreaterThanOne(testModel, 0)) {
+        return;
+    }
+
+    ExecutionContext context(device, preparedModel);
+    auto maybeRequest = context.createRequest(testModel, testConfig.memoryType);
+    // Skip if testing memory domain but no device memory has been allocated.
+    if (!maybeRequest.has_value()) {
+        return;
+    }
+
+    Request request = std::move(maybeRequest).value();
+
+    constexpr uint32_t kInsufficientOutputIndex = 0;
+    if (testConfig.outputType == OutputType::INSUFFICIENT) {
+        makeOutputInsufficientSize(kInsufficientOutputIndex, &request);
+    }
+
+    int64_t loopTimeoutDuration = kOmittedTimeoutDuration;
+    // OutputType::MISSED_DEADLINE is only used by
+    // TestKind::INTINITE_LOOP_TIMEOUT tests to verify that an infinite loop is
+    // aborted after a timeout.
+    if (testConfig.outputType == OutputType::MISSED_DEADLINE) {
+        // Override the default loop timeout duration with a small value to
+        // speed up test execution.
+        constexpr int64_t kMillisecond = 1'000'000;
+        loopTimeoutDuration = 1 * kMillisecond;
+    }
+
+    ErrorStatus executionStatus;
+    std::vector<OutputShape> outputShapes;
+    Timing timing = kNoTiming;
+    switch (testConfig.executor) {
+        case Executor::SYNC: {
+            SCOPED_TRACE("synchronous");
+
+            ExecutionResult executionResult;
+            // execute
+            const auto ret = preparedModel->executeSynchronously(request, testConfig.measureTiming,
+                                                                 kNoDeadline, loopTimeoutDuration,
+                                                                 &executionResult);
+            ASSERT_TRUE(ret.isOk() || ret.getExceptionCode() == EX_SERVICE_SPECIFIC)
+                    << ret.getDescription();
+            if (ret.isOk()) {
+                executionStatus = executionResult.outputSufficientSize
+                                          ? ErrorStatus::NONE
+                                          : ErrorStatus::OUTPUT_INSUFFICIENT_SIZE;
+                outputShapes = std::move(executionResult.outputShapes);
+                timing = executionResult.timing;
+            } else {
+                executionStatus = static_cast<ErrorStatus>(ret.getServiceSpecificError());
+            }
+            break;
+        }
+        case Executor::FENCED: {
+            SCOPED_TRACE("fenced");
+            ErrorStatus result = ErrorStatus::NONE;
+            ndk::ScopedFileDescriptor syncFenceFd;
+            std::shared_ptr<IFencedExecutionCallback> fencedCallback;
+            auto ret = preparedModel->executeFenced(request, {}, testConfig.measureTiming,
+                                                    kNoDeadline, loopTimeoutDuration, kNoDuration,
+                                                    &syncFenceFd, &fencedCallback);
+            ASSERT_TRUE(ret.isOk() || ret.getExceptionCode() == EX_SERVICE_SPECIFIC)
+                    << ret.getDescription();
+            if (!ret.isOk()) {
+                result = static_cast<ErrorStatus>(ret.getServiceSpecificError());
+                executionStatus = result;
+            } else if (syncFenceFd.get() != -1) {
+                std::vector<ndk::ScopedFileDescriptor> waitFor;
+                auto dupFd = dup(syncFenceFd.get());
+                ASSERT_NE(dupFd, -1);
+                waitFor.emplace_back(dupFd);
+                // If a sync fence is returned, try start another run waiting for the sync fence.
+                ret = preparedModel->executeFenced(request, waitFor, testConfig.measureTiming,
+                                                   kNoDeadline, loopTimeoutDuration, kNoDuration,
+                                                   &syncFenceFd, &fencedCallback);
+                ASSERT_TRUE(ret.isOk());
+                waitForSyncFence(syncFenceFd.get());
+            }
+            if (result == ErrorStatus::NONE) {
+                ASSERT_NE(fencedCallback, nullptr);
+                Timing timingFenced;
+                auto ret =
+                        fencedCallback->getExecutionInfo(&timing, &timingFenced, &executionStatus);
+                ASSERT_TRUE(ret.isOk());
+            }
+            break;
+        }
+        default: {
+            FAIL() << "Unsupported execution mode for AIDL interface.";
+        }
+    }
+
+    if (testConfig.outputType != OutputType::FULLY_SPECIFIED &&
+        executionStatus == ErrorStatus::GENERAL_FAILURE) {
+        if (skipped != nullptr) {
+            *skipped = true;
+        }
+        if (!testConfig.reportSkipping) {
+            return;
+        }
+        LOG(INFO) << "NN VTS: Early termination of test because vendor service cannot "
+                     "execute model that it does not support.";
+        std::cout << "[          ]   Early termination of test because vendor service cannot "
+                     "execute model that it does not support."
+                  << std::endl;
+        GTEST_SKIP();
+    }
+    if (!testConfig.measureTiming) {
+        EXPECT_EQ(timing, kNoTiming);
+    } else {
+        if (timing.timeOnDevice != -1 && timing.timeInDriver != -1) {
+            EXPECT_LE(timing.timeOnDevice, timing.timeInDriver);
+        }
+    }
+
+    switch (testConfig.outputType) {
+        case OutputType::FULLY_SPECIFIED:
+            if (testConfig.executor == Executor::FENCED && hasZeroSizedOutput(testModel)) {
+                // Executor::FENCED does not support zero-sized output.
+                ASSERT_EQ(ErrorStatus::INVALID_ARGUMENT, executionStatus);
+                return;
+            }
+            // If the model output operands are fully specified, outputShapes must be either
+            // either empty, or have the same number of elements as the number of outputs.
+            ASSERT_EQ(ErrorStatus::NONE, executionStatus);
+            ASSERT_TRUE(outputShapes.size() == 0 ||
+                        outputShapes.size() == testModel.main.outputIndexes.size());
+            break;
+        case OutputType::UNSPECIFIED:
+            if (testConfig.executor == Executor::FENCED) {
+                // For Executor::FENCED, the output shape must be fully specified.
+                ASSERT_EQ(ErrorStatus::INVALID_ARGUMENT, executionStatus);
+                return;
+            }
+            // If the model output operands are not fully specified, outputShapes must have
+            // the same number of elements as the number of outputs.
+            ASSERT_EQ(ErrorStatus::NONE, executionStatus);
+            ASSERT_EQ(outputShapes.size(), testModel.main.outputIndexes.size());
+            break;
+        case OutputType::INSUFFICIENT:
+            if (testConfig.executor == Executor::FENCED) {
+                // For Executor::FENCED, the output shape must be fully specified.
+                ASSERT_EQ(ErrorStatus::INVALID_ARGUMENT, executionStatus);
+                return;
+            }
+            ASSERT_EQ(ErrorStatus::OUTPUT_INSUFFICIENT_SIZE, executionStatus);
+            ASSERT_EQ(outputShapes.size(), testModel.main.outputIndexes.size());
+            // Check that all returned output dimensions are at least as fully specified as the
+            // union of the information about the corresponding operand in the model and in the
+            // request. In this test, all model outputs have known rank with all dimensions
+            // unspecified, and no dimensional information is provided in the request.
+            for (uint32_t i = 0; i < outputShapes.size(); i++) {
+                ASSERT_EQ(outputShapes[i].isSufficient, i != kInsufficientOutputIndex);
+                const auto& actual = outputShapes[i].dimensions;
+                const auto& golden =
+                        testModel.main.operands[testModel.main.outputIndexes[i]].dimensions;
+                ASSERT_EQ(actual.size(), golden.size());
+                for (uint32_t j = 0; j < actual.size(); j++) {
+                    if (actual[j] == 0) continue;
+                    EXPECT_EQ(actual[j], golden[j]) << "index: " << j;
+                }
+            }
+            return;
+        case OutputType::MISSED_DEADLINE:
+            ASSERT_TRUE(executionStatus == ErrorStatus::MISSED_DEADLINE_TRANSIENT ||
+                        executionStatus == ErrorStatus::MISSED_DEADLINE_PERSISTENT)
+                    << "executionStatus = " << executionStatus;
+            return;
+    }
+
+    // Go through all outputs, check returned output shapes.
+    for (uint32_t i = 0; i < outputShapes.size(); i++) {
+        EXPECT_TRUE(outputShapes[i].isSufficient);
+        const auto& expect = testModel.main.operands[testModel.main.outputIndexes[i]].dimensions;
+        const auto unsignedActual = nn::toUnsigned(outputShapes[i].dimensions);
+        ASSERT_TRUE(unsignedActual.has_value());
+        const std::vector<uint32_t>& actual = unsignedActual.value();
+        EXPECT_EQ(expect, actual);
+    }
+
+    // Retrieve execution results.
+    const std::vector<TestBuffer> outputs = context.getOutputBuffers(testModel, request);
+
+    // We want "close-enough" results.
+    checkResults(testModel, outputs);
+}
+
+void EvaluatePreparedModel(const std::shared_ptr<IDevice>& device,
+                           const std::shared_ptr<IPreparedModel>& preparedModel,
+                           const TestModel& testModel, TestKind testKind) {
+    std::vector<OutputType> outputTypesList;
+    std::vector<bool> measureTimingList;
+    std::vector<Executor> executorList;
+    std::vector<MemoryType> memoryTypeList;
+
+    switch (testKind) {
+        case TestKind::GENERAL: {
+            outputTypesList = {OutputType::FULLY_SPECIFIED};
+            measureTimingList = {false, true};
+            executorList = {Executor::SYNC};
+            memoryTypeList = {MemoryType::ASHMEM};
+        } break;
+        case TestKind::DYNAMIC_SHAPE: {
+            outputTypesList = {OutputType::UNSPECIFIED, OutputType::INSUFFICIENT};
+            measureTimingList = {false, true};
+            executorList = {Executor::SYNC, Executor::FENCED};
+            memoryTypeList = {MemoryType::ASHMEM};
+        } break;
+        case TestKind::MEMORY_DOMAIN: {
+            outputTypesList = {OutputType::FULLY_SPECIFIED};
+            measureTimingList = {false};
+            executorList = {Executor::SYNC, Executor::FENCED};
+            memoryTypeList = {MemoryType::BLOB_AHWB, MemoryType::DEVICE};
+        } break;
+        case TestKind::FENCED_COMPUTE: {
+            outputTypesList = {OutputType::FULLY_SPECIFIED};
+            measureTimingList = {false, true};
+            executorList = {Executor::FENCED};
+            memoryTypeList = {MemoryType::ASHMEM};
+        } break;
+        case TestKind::QUANTIZATION_COUPLING: {
+            LOG(FATAL) << "Wrong TestKind for EvaluatePreparedModel";
+            return;
+        } break;
+        case TestKind::INTINITE_LOOP_TIMEOUT: {
+            outputTypesList = {OutputType::MISSED_DEADLINE};
+            measureTimingList = {false, true};
+            executorList = {Executor::SYNC, Executor::FENCED};
+            memoryTypeList = {MemoryType::ASHMEM};
+        } break;
+    }
+
+    for (const OutputType outputType : outputTypesList) {
+        for (const bool measureTiming : measureTimingList) {
+            for (const Executor executor : executorList) {
+                for (const MemoryType memoryType : memoryTypeList) {
+                    const TestConfig testConfig(executor, measureTiming, outputType, memoryType);
+                    EvaluatePreparedModel(device, preparedModel, testModel, testConfig);
+                }
+            }
+        }
+    }
+}
+
+void EvaluatePreparedCoupledModels(const std::shared_ptr<IDevice>& device,
+                                   const std::shared_ptr<IPreparedModel>& preparedModel,
+                                   const TestModel& testModel,
+                                   const std::shared_ptr<IPreparedModel>& preparedCoupledModel,
+                                   const TestModel& coupledModel) {
+    const std::vector<OutputType> outputTypesList = {OutputType::FULLY_SPECIFIED};
+    const std::vector<bool> measureTimingList = {false, true};
+    const std::vector<Executor> executorList = {Executor::SYNC, Executor::FENCED};
+
+    for (const OutputType outputType : outputTypesList) {
+        for (const bool measureTiming : measureTimingList) {
+            for (const Executor executor : executorList) {
+                const TestConfig testConfig(executor, measureTiming, outputType, MemoryType::ASHMEM,
+                                            /*reportSkipping=*/false);
+                bool baseSkipped = false;
+                EvaluatePreparedModel(device, preparedModel, testModel, testConfig, &baseSkipped);
+                bool coupledSkipped = false;
+                EvaluatePreparedModel(device, preparedCoupledModel, coupledModel, testConfig,
+                                      &coupledSkipped);
+                ASSERT_EQ(baseSkipped, coupledSkipped);
+                if (baseSkipped) {
+                    LOG(INFO) << "NN VTS: Early termination of test because vendor service cannot "
+                                 "execute model that it does not support.";
+                    std::cout << "[          ]   Early termination of test because vendor service "
+                                 "cannot "
+                                 "execute model that it does not support."
+                              << std::endl;
+                    GTEST_SKIP();
+                }
+            }
+        }
+    }
+}
+
+void Execute(const std::shared_ptr<IDevice>& device, const TestModel& testModel,
+             TestKind testKind) {
+    Model model = createModel(testModel);
+    if (testKind == TestKind::DYNAMIC_SHAPE) {
+        makeOutputDimensionsUnspecified(&model);
+    }
+
+    std::shared_ptr<IPreparedModel> preparedModel;
+    switch (testKind) {
+        case TestKind::GENERAL:
+        case TestKind::DYNAMIC_SHAPE:
+        case TestKind::MEMORY_DOMAIN:
+        case TestKind::FENCED_COMPUTE:
+        case TestKind::INTINITE_LOOP_TIMEOUT: {
+            createPreparedModel(device, model, &preparedModel);
+            if (preparedModel == nullptr) return;
+            EvaluatePreparedModel(device, preparedModel, testModel, testKind);
+        } break;
+        case TestKind::QUANTIZATION_COUPLING: {
+            ASSERT_TRUE(testModel.hasQuant8CoupledOperands());
+            createPreparedModel(device, model, &preparedModel,
+                                /*reportSkipping*/ false);
+            TestModel signedQuantizedModel = convertQuant8AsymmOperandsToSigned(testModel);
+            std::shared_ptr<IPreparedModel> preparedCoupledModel;
+            createPreparedModel(device, createModel(signedQuantizedModel), &preparedCoupledModel,
+                                /*reportSkipping*/ false);
+            // If we couldn't prepare a model with unsigned quantization, we must
+            // fail to prepare a model with signed quantization as well.
+            if (preparedModel == nullptr) {
+                ASSERT_EQ(preparedCoupledModel, nullptr);
+                // If we failed to prepare both of the models, we can safely skip
+                // the test.
+                LOG(INFO) << "NN VTS: Early termination of test because vendor service cannot "
+                             "prepare model that it does not support.";
+                std::cout
+                        << "[          ]   Early termination of test because vendor service cannot "
+                           "prepare model that it does not support."
+                        << std::endl;
+                GTEST_SKIP();
+            }
+            ASSERT_NE(preparedCoupledModel, nullptr);
+            EvaluatePreparedCoupledModels(device, preparedModel, testModel, preparedCoupledModel,
+                                          signedQuantizedModel);
+        } break;
+    }
+}
+
+void GeneratedTestBase::SetUp() {
+    testing::TestWithParam<GeneratedTestParam>::SetUp();
+    ASSERT_NE(kDevice, nullptr);
+}
+
+std::vector<NamedModel> getNamedModels(const FilterFn& filter) {
+    return TestModelManager::get().getTestModels(filter);
+}
+
+std::vector<NamedModel> getNamedModels(const FilterNameFn& filter) {
+    return TestModelManager::get().getTestModels(filter);
+}
+
+std::string printGeneratedTest(const testing::TestParamInfo<GeneratedTestParam>& info) {
+    const auto& [namedDevice, namedModel] = info.param;
+    return gtestCompliantName(getName(namedDevice) + "_" + getName(namedModel));
+}
+
+// Tag for the generated tests
+class GeneratedTest : public GeneratedTestBase {};
+
+// Tag for the dynamic output shape tests
+class DynamicOutputShapeTest : public GeneratedTest {};
+
+// Tag for the memory domain tests
+class MemoryDomainTest : public GeneratedTest {};
+
+// Tag for the fenced compute tests
+class FencedComputeTest : public GeneratedTest {};
+
+// Tag for the dynamic output shape tests
+class QuantizationCouplingTest : public GeneratedTest {};
+
+// Tag for the loop timeout tests
+class InfiniteLoopTimeoutTest : public GeneratedTest {};
+
+TEST_P(GeneratedTest, Test) {
+    Execute(kDevice, kTestModel, TestKind::GENERAL);
+}
+
+TEST_P(DynamicOutputShapeTest, Test) {
+    Execute(kDevice, kTestModel, TestKind::DYNAMIC_SHAPE);
+}
+
+TEST_P(MemoryDomainTest, Test) {
+    Execute(kDevice, kTestModel, TestKind::MEMORY_DOMAIN);
+}
+
+TEST_P(FencedComputeTest, Test) {
+    Execute(kDevice, kTestModel, TestKind::FENCED_COMPUTE);
+}
+
+TEST_P(QuantizationCouplingTest, Test) {
+    Execute(kDevice, kTestModel, TestKind::QUANTIZATION_COUPLING);
+}
+
+TEST_P(InfiniteLoopTimeoutTest, Test) {
+    Execute(kDevice, kTestModel, TestKind::INTINITE_LOOP_TIMEOUT);
+}
+
+INSTANTIATE_GENERATED_TEST(GeneratedTest,
+                           [](const TestModel& testModel) { return !testModel.expectFailure; });
+
+INSTANTIATE_GENERATED_TEST(DynamicOutputShapeTest, [](const TestModel& testModel) {
+    return !testModel.expectFailure && !testModel.hasScalarOutputs();
+});
+
+INSTANTIATE_GENERATED_TEST(MemoryDomainTest,
+                           [](const TestModel& testModel) { return !testModel.expectFailure; });
+
+INSTANTIATE_GENERATED_TEST(FencedComputeTest,
+                           [](const TestModel& testModel) { return !testModel.expectFailure; });
+
+INSTANTIATE_GENERATED_TEST(QuantizationCouplingTest, [](const TestModel& testModel) {
+    return !testModel.expectFailure && testModel.hasQuant8CoupledOperands() &&
+           testModel.main.operations.size() == 1;
+});
+
+INSTANTIATE_GENERATED_TEST(InfiniteLoopTimeoutTest, [](const TestModel& testModel) {
+    return testModel.isInfiniteLoopTimeoutTest();
+});
+
+}  // namespace aidl::android::hardware::neuralnetworks::vts::functional