Merge changes from topic "nnapi-control-flow"

* changes:
  Add NNAPI loop timeout API
  Add control flow support to NNAPI VTS tests
  Add control flow performance to NNAPI Capabilities
diff --git a/current.txt b/current.txt
index c27d5e0..3806cff 100644
--- a/current.txt
+++ b/current.txt
@@ -628,9 +628,9 @@
 9db064ee44268a876be0367ff771e618362d39ec603b6ecab17e1575725fcd87 android.hardware.neuralnetworks@1.3::IDevice
 4167dc3ad35e9cd0d2057d4868c7675ae2c3c9d05bbd614c1f5dccfa5fd68797 android.hardware.neuralnetworks@1.3::IExecutionCallback
 2fa3679ad7c94b5e88724adcd560c561041068a4ca565c63830e68101988746a android.hardware.neuralnetworks@1.3::IFencedExecutionCallback
-237b23b126a66f3432658020fed78cdd06ba6297459436fe6bae0ba753370833 android.hardware.neuralnetworks@1.3::IPreparedModel
+43088ffc71945b463a7279262cfe2e290f6ed2f15d3fd6032798a3be299fb08f android.hardware.neuralnetworks@1.3::IPreparedModel
 0439a1fbbec7f16e5e4c653d85ac685d51bfafbae15b8f8cca530acdd7d6a8ce android.hardware.neuralnetworks@1.3::IPreparedModelCallback
-2fabd246f985d94a0172dacefb0d6cf19e2aeb2d5f17752653988ef39570a52d android.hardware.neuralnetworks@1.3::types
+306fda32ac969fd51d75d066352cadcb769944ec4823be4cdd3f86fdb9e97511 android.hardware.neuralnetworks@1.3::types
 3e01d4446cd69fd1c48f8572efd97487bc179564b32bd795800b97bbe10be37b android.hardware.wifi@1.4::IWifi
 a64467bae843569f0d465c5be7f0c7a5b987985b55a3ef4794dd5afc68538650 android.hardware.wifi.supplicant@1.3::ISupplicant
 44445b8a03d7b9e68b2fbd954672c18a8fce9e32851b0692f4f4ab3407f86ecb android.hardware.wifi.supplicant@1.3::ISupplicantStaIface
diff --git a/neuralnetworks/1.0/vts/functional/GeneratedTestHarness.cpp b/neuralnetworks/1.0/vts/functional/GeneratedTestHarness.cpp
index 595ad85..e28605d 100644
--- a/neuralnetworks/1.0/vts/functional/GeneratedTestHarness.cpp
+++ b/neuralnetworks/1.0/vts/functional/GeneratedTestHarness.cpp
@@ -42,10 +42,11 @@
 
 Model createModel(const TestModel& testModel) {
     // Model operands.
-    hidl_vec<Operand> operands(testModel.operands.size());
+    CHECK_EQ(testModel.referenced.size(), 0u);  // Not supported in 1.0.
+    hidl_vec<Operand> operands(testModel.main.operands.size());
     size_t constCopySize = 0, constRefSize = 0;
-    for (uint32_t i = 0; i < testModel.operands.size(); i++) {
-        const auto& op = testModel.operands[i];
+    for (uint32_t i = 0; i < testModel.main.operands.size(); i++) {
+        const auto& op = testModel.main.operands[i];
 
         DataLocation loc = {};
         if (op.lifetime == TestOperandLifeTime::CONSTANT_COPY) {
@@ -70,9 +71,9 @@
     }
 
     // Model operations.
-    hidl_vec<Operation> operations(testModel.operations.size());
-    std::transform(testModel.operations.begin(), testModel.operations.end(), operations.begin(),
-                   [](const TestOperation& op) -> Operation {
+    hidl_vec<Operation> operations(testModel.main.operations.size());
+    std::transform(testModel.main.operations.begin(), testModel.main.operations.end(),
+                   operations.begin(), [](const TestOperation& op) -> Operation {
                        return {.type = static_cast<OperationType>(op.type),
                                .inputs = op.inputs,
                                .outputs = op.outputs};
@@ -80,8 +81,8 @@
 
     // Constant copies.
     hidl_vec<uint8_t> operandValues(constCopySize);
-    for (uint32_t i = 0; i < testModel.operands.size(); i++) {
-        const auto& op = testModel.operands[i];
+    for (uint32_t i = 0; i < testModel.main.operands.size(); i++) {
+        const auto& op = testModel.main.operands[i];
         if (op.lifetime == TestOperandLifeTime::CONSTANT_COPY) {
             const uint8_t* begin = op.data.get<uint8_t>();
             const uint8_t* end = begin + op.data.size();
@@ -102,8 +103,8 @@
                 reinterpret_cast<uint8_t*>(static_cast<void*>(mappedMemory->getPointer()));
         CHECK(mappedPtr != nullptr);
 
-        for (uint32_t i = 0; i < testModel.operands.size(); i++) {
-            const auto& op = testModel.operands[i];
+        for (uint32_t i = 0; i < testModel.main.operands.size(); i++) {
+            const auto& op = testModel.main.operands[i];
             if (op.lifetime == TestOperandLifeTime::CONSTANT_REFERENCE) {
                 const uint8_t* begin = op.data.get<uint8_t>();
                 const uint8_t* end = begin + op.data.size();
@@ -114,8 +115,8 @@
 
     return {.operands = std::move(operands),
             .operations = std::move(operations),
-            .inputIndexes = testModel.inputIndexes,
-            .outputIndexes = testModel.outputIndexes,
+            .inputIndexes = testModel.main.inputIndexes,
+            .outputIndexes = testModel.main.outputIndexes,
             .operandValues = std::move(operandValues),
             .pools = std::move(pools)};
 }
diff --git a/neuralnetworks/1.0/vts/functional/Utils.cpp b/neuralnetworks/1.0/vts/functional/Utils.cpp
index 5b630fd..0dba85a 100644
--- a/neuralnetworks/1.0/vts/functional/Utils.cpp
+++ b/neuralnetworks/1.0/vts/functional/Utils.cpp
@@ -42,10 +42,10 @@
 
 Request createRequest(const TestModel& testModel) {
     // Model inputs.
-    hidl_vec<RequestArgument> inputs(testModel.inputIndexes.size());
+    hidl_vec<RequestArgument> inputs(testModel.main.inputIndexes.size());
     size_t inputSize = 0;
-    for (uint32_t i = 0; i < testModel.inputIndexes.size(); i++) {
-        const auto& op = testModel.operands[testModel.inputIndexes[i]];
+    for (uint32_t i = 0; i < testModel.main.inputIndexes.size(); i++) {
+        const auto& op = testModel.main.operands[testModel.main.inputIndexes[i]];
         if (op.data.size() == 0) {
             // Omitted input.
             inputs[i] = {.hasNoValue = true};
@@ -59,10 +59,10 @@
     }
 
     // Model outputs.
-    hidl_vec<RequestArgument> outputs(testModel.outputIndexes.size());
+    hidl_vec<RequestArgument> outputs(testModel.main.outputIndexes.size());
     size_t outputSize = 0;
-    for (uint32_t i = 0; i < testModel.outputIndexes.size(); i++) {
-        const auto& op = testModel.operands[testModel.outputIndexes[i]];
+    for (uint32_t i = 0; i < testModel.main.outputIndexes.size(); i++) {
+        const auto& op = testModel.main.operands[testModel.main.outputIndexes[i]];
 
         // In the case of zero-sized output, we should at least provide a one-byte buffer.
         // This is because zero-sized tensors are only supported internally to the driver, or
@@ -90,8 +90,8 @@
     CHECK(inputPtr != nullptr);
 
     // Copy input data to the memory pool.
-    for (uint32_t i = 0; i < testModel.inputIndexes.size(); i++) {
-        const auto& op = testModel.operands[testModel.inputIndexes[i]];
+    for (uint32_t i = 0; i < testModel.main.inputIndexes.size(); i++) {
+        const auto& op = testModel.main.operands[testModel.main.inputIndexes[i]];
         if (op.data.size() > 0) {
             const uint8_t* begin = op.data.get<uint8_t>();
             const uint8_t* end = begin + op.data.size();
diff --git a/neuralnetworks/1.1/vts/functional/GeneratedTestHarness.cpp b/neuralnetworks/1.1/vts/functional/GeneratedTestHarness.cpp
index 7a929d6..cee15a3 100644
--- a/neuralnetworks/1.1/vts/functional/GeneratedTestHarness.cpp
+++ b/neuralnetworks/1.1/vts/functional/GeneratedTestHarness.cpp
@@ -49,10 +49,11 @@
 
 Model createModel(const TestModel& testModel) {
     // Model operands.
-    hidl_vec<Operand> operands(testModel.operands.size());
+    CHECK_EQ(testModel.referenced.size(), 0u);  // Not supported in 1.1.
+    hidl_vec<Operand> operands(testModel.main.operands.size());
     size_t constCopySize = 0, constRefSize = 0;
-    for (uint32_t i = 0; i < testModel.operands.size(); i++) {
-        const auto& op = testModel.operands[i];
+    for (uint32_t i = 0; i < testModel.main.operands.size(); i++) {
+        const auto& op = testModel.main.operands[i];
 
         DataLocation loc = {};
         if (op.lifetime == TestOperandLifeTime::CONSTANT_COPY) {
@@ -77,9 +78,9 @@
     }
 
     // Model operations.
-    hidl_vec<Operation> operations(testModel.operations.size());
-    std::transform(testModel.operations.begin(), testModel.operations.end(), operations.begin(),
-                   [](const TestOperation& op) -> Operation {
+    hidl_vec<Operation> operations(testModel.main.operations.size());
+    std::transform(testModel.main.operations.begin(), testModel.main.operations.end(),
+                   operations.begin(), [](const TestOperation& op) -> Operation {
                        return {.type = static_cast<OperationType>(op.type),
                                .inputs = op.inputs,
                                .outputs = op.outputs};
@@ -87,8 +88,8 @@
 
     // Constant copies.
     hidl_vec<uint8_t> operandValues(constCopySize);
-    for (uint32_t i = 0; i < testModel.operands.size(); i++) {
-        const auto& op = testModel.operands[i];
+    for (uint32_t i = 0; i < testModel.main.operands.size(); i++) {
+        const auto& op = testModel.main.operands[i];
         if (op.lifetime == TestOperandLifeTime::CONSTANT_COPY) {
             const uint8_t* begin = op.data.get<uint8_t>();
             const uint8_t* end = begin + op.data.size();
@@ -109,8 +110,8 @@
                 reinterpret_cast<uint8_t*>(static_cast<void*>(mappedMemory->getPointer()));
         CHECK(mappedPtr != nullptr);
 
-        for (uint32_t i = 0; i < testModel.operands.size(); i++) {
-            const auto& op = testModel.operands[i];
+        for (uint32_t i = 0; i < testModel.main.operands.size(); i++) {
+            const auto& op = testModel.main.operands[i];
             if (op.lifetime == TestOperandLifeTime::CONSTANT_REFERENCE) {
                 const uint8_t* begin = op.data.get<uint8_t>();
                 const uint8_t* end = begin + op.data.size();
@@ -121,8 +122,8 @@
 
     return {.operands = std::move(operands),
             .operations = std::move(operations),
-            .inputIndexes = testModel.inputIndexes,
-            .outputIndexes = testModel.outputIndexes,
+            .inputIndexes = testModel.main.inputIndexes,
+            .outputIndexes = testModel.main.outputIndexes,
             .operandValues = std::move(operandValues),
             .pools = std::move(pools),
             .relaxComputationFloat32toFloat16 = testModel.isRelaxed};
diff --git a/neuralnetworks/1.2/vts/functional/CompilationCachingTests.cpp b/neuralnetworks/1.2/vts/functional/CompilationCachingTests.cpp
index 2130a76..10dec79 100644
--- a/neuralnetworks/1.2/vts/functional/CompilationCachingTests.cpp
+++ b/neuralnetworks/1.2/vts/functional/CompilationCachingTests.cpp
@@ -207,10 +207,10 @@
     };
 
     return {
-            .operands = std::move(operands),
-            .operations = std::move(operations),
-            .inputIndexes = {1},
-            .outputIndexes = {len * 2 + 1},
+            .main = {.operands = std::move(operands),
+                     .operations = std::move(operations),
+                     .inputIndexes = {1},
+                     .outputIndexes = {len * 2 + 1}},
             .isRelaxed = false,
     };
 }
diff --git a/neuralnetworks/1.2/vts/functional/GeneratedTestHarness.cpp b/neuralnetworks/1.2/vts/functional/GeneratedTestHarness.cpp
index 599fd1d..4c8fede 100644
--- a/neuralnetworks/1.2/vts/functional/GeneratedTestHarness.cpp
+++ b/neuralnetworks/1.2/vts/functional/GeneratedTestHarness.cpp
@@ -75,10 +75,11 @@
 
 Model createModel(const TestModel& testModel) {
     // Model operands.
-    hidl_vec<Operand> operands(testModel.operands.size());
+    CHECK_EQ(testModel.referenced.size(), 0u);  // Not supported in 1.1.
+    hidl_vec<Operand> operands(testModel.main.operands.size());
     size_t constCopySize = 0, constRefSize = 0;
-    for (uint32_t i = 0; i < testModel.operands.size(); i++) {
-        const auto& op = testModel.operands[i];
+    for (uint32_t i = 0; i < testModel.main.operands.size(); i++) {
+        const auto& op = testModel.main.operands[i];
 
         DataLocation loc = {};
         if (op.lifetime == TestOperandLifeTime::CONSTANT_COPY) {
@@ -110,9 +111,9 @@
     }
 
     // Model operations.
-    hidl_vec<Operation> operations(testModel.operations.size());
-    std::transform(testModel.operations.begin(), testModel.operations.end(), operations.begin(),
-                   [](const TestOperation& op) -> Operation {
+    hidl_vec<Operation> operations(testModel.main.operations.size());
+    std::transform(testModel.main.operations.begin(), testModel.main.operations.end(),
+                   operations.begin(), [](const TestOperation& op) -> Operation {
                        return {.type = static_cast<OperationType>(op.type),
                                .inputs = op.inputs,
                                .outputs = op.outputs};
@@ -120,8 +121,8 @@
 
     // Constant copies.
     hidl_vec<uint8_t> operandValues(constCopySize);
-    for (uint32_t i = 0; i < testModel.operands.size(); i++) {
-        const auto& op = testModel.operands[i];
+    for (uint32_t i = 0; i < testModel.main.operands.size(); i++) {
+        const auto& op = testModel.main.operands[i];
         if (op.lifetime == TestOperandLifeTime::CONSTANT_COPY) {
             const uint8_t* begin = op.data.get<uint8_t>();
             const uint8_t* end = begin + op.data.size();
@@ -142,8 +143,8 @@
                 reinterpret_cast<uint8_t*>(static_cast<void*>(mappedMemory->getPointer()));
         CHECK(mappedPtr != nullptr);
 
-        for (uint32_t i = 0; i < testModel.operands.size(); i++) {
-            const auto& op = testModel.operands[i];
+        for (uint32_t i = 0; i < testModel.main.operands.size(); i++) {
+            const auto& op = testModel.main.operands[i];
             if (op.lifetime == TestOperandLifeTime::CONSTANT_REFERENCE) {
                 const uint8_t* begin = op.data.get<uint8_t>();
                 const uint8_t* end = begin + op.data.size();
@@ -154,15 +155,15 @@
 
     return {.operands = std::move(operands),
             .operations = std::move(operations),
-            .inputIndexes = testModel.inputIndexes,
-            .outputIndexes = testModel.outputIndexes,
+            .inputIndexes = testModel.main.inputIndexes,
+            .outputIndexes = testModel.main.outputIndexes,
             .operandValues = std::move(operandValues),
             .pools = std::move(pools),
             .relaxComputationFloat32toFloat16 = testModel.isRelaxed};
 }
 
 static bool isOutputSizeGreaterThanOne(const TestModel& testModel, uint32_t index) {
-    const auto byteSize = testModel.operands[testModel.outputIndexes[index]].data.size();
+    const auto byteSize = testModel.main.operands[testModel.main.outputIndexes[index]].data.size();
     return byteSize > 1u;
 }
 
@@ -302,17 +303,17 @@
             // either empty, or have the same number of elements as the number of outputs.
             ASSERT_EQ(ErrorStatus::NONE, executionStatus);
             ASSERT_TRUE(outputShapes.size() == 0 ||
-                        outputShapes.size() == testModel.outputIndexes.size());
+                        outputShapes.size() == testModel.main.outputIndexes.size());
             break;
         case OutputType::UNSPECIFIED:
             // If the model output operands are not fully specified, outputShapes must have
             // the same number of elements as the number of outputs.
             ASSERT_EQ(ErrorStatus::NONE, executionStatus);
-            ASSERT_EQ(outputShapes.size(), testModel.outputIndexes.size());
+            ASSERT_EQ(outputShapes.size(), testModel.main.outputIndexes.size());
             break;
         case OutputType::INSUFFICIENT:
             ASSERT_EQ(ErrorStatus::OUTPUT_INSUFFICIENT_SIZE, executionStatus);
-            ASSERT_EQ(outputShapes.size(), testModel.outputIndexes.size());
+            ASSERT_EQ(outputShapes.size(), testModel.main.outputIndexes.size());
             ASSERT_FALSE(outputShapes[0].isSufficient);
             return;
     }
@@ -320,7 +321,7 @@
     // Go through all outputs, check returned output shapes.
     for (uint32_t i = 0; i < outputShapes.size(); i++) {
         EXPECT_TRUE(outputShapes[i].isSufficient);
-        const auto& expect = testModel.operands[testModel.outputIndexes[i]].dimensions;
+        const auto& expect = testModel.main.operands[testModel.main.outputIndexes[i]].dimensions;
         const std::vector<uint32_t> actual = outputShapes[i].dimensions;
         EXPECT_EQ(expect, actual);
     }
diff --git a/neuralnetworks/1.3/IPreparedModel.hal b/neuralnetworks/1.3/IPreparedModel.hal
index d645de7..4ce3691 100644
--- a/neuralnetworks/1.3/IPreparedModel.hal
+++ b/neuralnetworks/1.3/IPreparedModel.hal
@@ -92,6 +92,17 @@
      * @param deadline The time by which the execution must complete. If the
      *                 execution cannot be finished by the deadline, the
      *                 execution must be aborted.
+     * @param loopTimeoutDuration The maximum amount of time that should be spent
+     *                            executing a {@link OperationType::WHILE}
+     *                            operation. If a loop condition model does not
+     *                            output false within this duration, the
+     *                            execution must be aborted. If the model
+     *                            contains a {@link OperationType::WHILE}
+     *                            operation and no loop timeout duration is
+     *                            provided, the maximum amount of time is {@link
+     *                            LoopTimeoutDurationNs::DEFAULT}. When
+     *                            provided, the duration must not exceed {@link
+     *                            LoopTimeoutDurationNs::MAXIMUM}.
      * @param callback A callback object used to return the error status of
      *                 the execution, shape information of model output operands, and
      *                 duration of execution. The callback object's notify function must
@@ -111,7 +122,7 @@
      *                  driver
      */
     execute_1_3(Request request, MeasureTiming measure, OptionalTimePoint deadline,
-                IExecutionCallback callback)
+                OptionalTimeoutDuration loopTimeoutDuration, IExecutionCallback callback)
         generates (ErrorStatus status);
 
     /**
@@ -163,6 +174,17 @@
      * @param deadline The time by which the execution must complete. If the
      *                 execution cannot be finished by the deadline, the
      *                 execution must be aborted.
+     * @param loopTimeoutDuration The maximum amount of time that should be spent
+     *                            executing a {@link OperationType::WHILE}
+     *                            operation. If a loop condition model does not
+     *                            output false within this duration, the
+     *                            execution must be aborted. If the model
+     *                            contains a {@link OperationType::WHILE}
+     *                            operation and no loop timeout duration is
+     *                            provided, the maximum amount of time is {@link
+     *                            LoopTimeoutDurationNs::DEFAULT}. When
+     *                            provided, the duration must not exceed {@link
+     *                            LoopTimeoutDurationNs::MAXIMUM}.
      * @return status Error status of the execution, must be:
      *                - NONE if execution is performed successfully
      *                - DEVICE_UNAVAILABLE if driver is offline or busy
@@ -187,7 +209,8 @@
      *                measurement is not available.
      */
     executeSynchronously_1_3(Request request, MeasureTiming measure,
-                             OptionalTimePoint deadline)
+                             OptionalTimePoint deadline,
+                             OptionalTimeoutDuration loopTimeoutDuration)
                   generates (ErrorStatus status, vec<OutputShape> outputShapes,
                              Timing timing);
 
@@ -243,6 +266,17 @@
      * @param deadline The time by which the execution must complete. If the
      *                 execution cannot be finished by the deadline, the
      *                 execution must be aborted.
+     * @param loopTimeoutDuration The maximum amount of time that should be spent
+     *                            executing a {@link OperationType::WHILE}
+     *                            operation. If a loop condition model does not
+     *                            output false within this duration, the
+     *                            execution must be aborted. If the model
+     *                            contains a {@link OperationType::WHILE}
+     *                            operation and no loop timeout duration is
+     *                            provided, the maximum amount of time is {@link
+     *                            LoopTimeoutDurationNs::DEFAULT}. When
+     *                            provided, the duration must not exceed {@link
+     *                            LoopTimeoutDurationNs::MAXIMUM}.
      * @param duration The length of time within which the execution must
      *                 complete after all sync fences in waitFor are signaled. If the
      *                 execution cannot be finished within the duration, the execution
@@ -264,6 +298,7 @@
      *                  and error status when the execution is completed.
      */
     executeFenced(Request request, vec<handle> waitFor, MeasureTiming measure,
-                  OptionalTimePoint deadline, OptionalTimeoutDuration duration)
+                  OptionalTimePoint deadline, OptionalTimeoutDuration loopTimeoutDuration,
+                  OptionalTimeoutDuration duration)
         generates (ErrorStatus status, handle syncFence, IFencedExecutionCallback callback);
 };
diff --git a/neuralnetworks/1.3/types.hal b/neuralnetworks/1.3/types.hal
index 530f984..a808a2e 100644
--- a/neuralnetworks/1.3/types.hal
+++ b/neuralnetworks/1.3/types.hal
@@ -5176,8 +5176,10 @@
 /**
  * The capabilities of a driver.
  *
- * Performance of an operation comes from the type of its first operand.
- * This represents performance for non extension operand types.
+ * This represents performance of non-extension operations.
+ *
+ * Performance of an operation other than {@link OperationType::IF} and
+ * {@link OperationType::WHILE} comes from the type of its first operand.
  */
 struct Capabilities {
     /**
@@ -5200,11 +5202,32 @@
 
     /**
      * Performance by operand type. Must be sorted by OperandType.
-     * If a particular OperandType is not present in operandPerformance,
+     *
+     * If a particular {@link OperandType} is not present in operandPerformance,
      * its performance is treated as
      * { .execTime = FLT_MAX, .powerUsage = FLT_MAX }.
+     *
+     * Performance does not apply to {@link OperandType::SUBGRAPH}, and a driver
+     * must not report operand performance for {@link OperandType::SUBGRAPH}.
      */
     vec<OperandPerformance> operandPerformance;
+
+    /**
+     * Performance of an {@link OperationType::IF} operation is the sum of
+     * {@link Capabilities::ifPerformance} and the mean of performance for the
+     * two branch subgraphs, where performance for a subgraph is the sum of the
+     * performance of all operations within the subgraph.
+     */
+    PerformanceInfo ifPerformance;
+
+    /**
+     * Performance of a {@link OperationType::WHILE} operation is the sum of
+     * {@link Capabilities::whilePerformance}, performance for the condition
+     * subgraph and performance for the body subgraph, where performance for a
+     * subgraph is the sum of the performance of all operations within the
+     * subgraph.
+     */
+    PerformanceInfo whilePerformance;
 };
 
 /**
@@ -5648,3 +5671,14 @@
      */
     RESOURCE_EXHAUSTED_PERSISTENT,
 };
+
+/**
+ * Each {@link OperationType::WHILE} operation in the model has an implicit
+ * execution timeout duration associated with it ("loop timeout duration").
+ * This duration is configurable on a per-execution basis and must not exceed
+ * 15 seconds. The default value is 2 seconds.
+ */
+enum LoopTimeoutDurationNs : uint64_t {
+    DEFAULT = 2000000000,
+    MAXIMUM = 15000000000,
+};
diff --git a/neuralnetworks/1.3/types.t b/neuralnetworks/1.3/types.t
index 3d0d02d..0a6e45e 100644
--- a/neuralnetworks/1.3/types.t
+++ b/neuralnetworks/1.3/types.t
@@ -103,8 +103,10 @@
 /**
  * The capabilities of a driver.
  *
- * Performance of an operation comes from the type of its first operand.
- * This represents performance for non extension operand types.
+ * This represents performance of non-extension operations.
+ *
+ * Performance of an operation other than {@link OperationType::IF} and
+ * {@link OperationType::WHILE} comes from the type of its first operand.
  */
 struct Capabilities {
     /**
@@ -127,11 +129,32 @@
 
     /**
      * Performance by operand type. Must be sorted by OperandType.
-     * If a particular OperandType is not present in operandPerformance,
+     *
+     * If a particular {@link OperandType} is not present in operandPerformance,
      * its performance is treated as
      * { .execTime = FLT_MAX, .powerUsage = FLT_MAX }.
+     *
+     * Performance does not apply to {@link OperandType::SUBGRAPH}, and a driver
+     * must not report operand performance for {@link OperandType::SUBGRAPH}.
      */
     vec<OperandPerformance> operandPerformance;
+
+    /**
+     * Performance of an {@link OperationType::IF} operation is the sum of
+     * {@link Capabilities::ifPerformance} and the mean of performance for the
+     * two branch subgraphs, where performance for a subgraph is the sum of the
+     * performance of all operations within the subgraph.
+     */
+    PerformanceInfo ifPerformance;
+
+    /**
+     * Performance of a {@link OperationType::WHILE} operation is the sum of
+     * {@link Capabilities::whilePerformance}, performance for the condition
+     * subgraph and performance for the body subgraph, where performance for a
+     * subgraph is the sum of the performance of all operations within the
+     * subgraph.
+     */
+    PerformanceInfo whilePerformance;
 };
 
 /**
@@ -575,3 +598,14 @@
      */
     RESOURCE_EXHAUSTED_PERSISTENT,
 };
+
+/**
+ * Each {@link OperationType::WHILE} operation in the model has an implicit
+ * execution timeout duration associated with it ("loop timeout duration").
+ * This duration is configurable on a per-execution basis and must not exceed
+ * 15 seconds. The default value is 2 seconds.
+ */
+enum LoopTimeoutDurationNs : uint64_t {
+    DEFAULT = 2000000000,
+    MAXIMUM = 15000000000,
+};
diff --git a/neuralnetworks/1.3/vts/functional/BasicTests.cpp b/neuralnetworks/1.3/vts/functional/BasicTests.cpp
index 891850c..1c25369 100644
--- a/neuralnetworks/1.3/vts/functional/BasicTests.cpp
+++ b/neuralnetworks/1.3/vts/functional/BasicTests.cpp
@@ -57,6 +57,11 @@
                                    [](const OperandPerformance& a, const OperandPerformance& b) {
                                        return a.type < b.type;
                                    }));
+        EXPECT_TRUE(std::all_of(opPerf.begin(), opPerf.end(), [](const OperandPerformance& a) {
+            return a.type != OperandType::SUBGRAPH;
+        }));
+        EXPECT_TRUE(isPositive(capabilities.ifPerformance));
+        EXPECT_TRUE(isPositive(capabilities.whilePerformance));
     });
     EXPECT_TRUE(ret.isOk());
 }
diff --git a/neuralnetworks/1.3/vts/functional/CompilationCachingTests.cpp b/neuralnetworks/1.3/vts/functional/CompilationCachingTests.cpp
index 0bd24da..ac18c8f 100644
--- a/neuralnetworks/1.3/vts/functional/CompilationCachingTests.cpp
+++ b/neuralnetworks/1.3/vts/functional/CompilationCachingTests.cpp
@@ -209,10 +209,10 @@
     };
 
     return {
-            .operands = std::move(operands),
-            .operations = std::move(operations),
-            .inputIndexes = {1},
-            .outputIndexes = {len * 2 + 1},
+            .main = {.operands = std::move(operands),
+                     .operations = std::move(operations),
+                     .inputIndexes = {1},
+                     .outputIndexes = {len * 2 + 1}},
             .isRelaxed = false,
     };
 }
diff --git a/neuralnetworks/1.3/vts/functional/GeneratedTestHarness.cpp b/neuralnetworks/1.3/vts/functional/GeneratedTestHarness.cpp
index 82f34ff..89edfb7 100644
--- a/neuralnetworks/1.3/vts/functional/GeneratedTestHarness.cpp
+++ b/neuralnetworks/1.3/vts/functional/GeneratedTestHarness.cpp
@@ -169,7 +169,8 @@
         if constexpr (ioType == IOType::INPUT) {
             if (buffer != nullptr) {
                 // TestBuffer -> Shared memory.
-                const auto& testBuffer = kTestModel.operands[kTestModel.inputIndexes[index]].data;
+                const auto& testBuffer =
+                        kTestModel.main.operands[kTestModel.main.inputIndexes[index]].data;
                 ASSERT_GT(testBuffer.size(), 0);
                 hidl_memory tmp = nn::allocateSharedMemory(testBuffer.size());
                 sp<IMemory> inputMemory = mapMemory(tmp);
@@ -195,26 +196,42 @@
     const TestModel& kTestModel;
 };
 
-}  // namespace
+Subgraph createSubgraph(const TestSubgraph& testSubgraph, uint32_t* constCopySize,
+                        std::vector<const TestBuffer*>* constCopies, uint32_t* constRefSize,
+                        std::vector<const TestBuffer*>* constReferences) {
+    CHECK(constCopySize != nullptr);
+    CHECK(constCopies != nullptr);
+    CHECK(constRefSize != nullptr);
+    CHECK(constReferences != nullptr);
 
-Model createModel(const TestModel& testModel) {
-    // Model operands.
-    hidl_vec<Operand> operands(testModel.operands.size());
-    size_t constCopySize = 0, constRefSize = 0;
-    for (uint32_t i = 0; i < testModel.operands.size(); i++) {
-        const auto& op = testModel.operands[i];
+    // Operands.
+    hidl_vec<Operand> operands(testSubgraph.operands.size());
+    for (uint32_t i = 0; i < testSubgraph.operands.size(); i++) {
+        const auto& op = testSubgraph.operands[i];
 
         DataLocation loc = {};
         if (op.lifetime == TestOperandLifeTime::CONSTANT_COPY) {
-            loc = {.poolIndex = 0,
-                   .offset = static_cast<uint32_t>(constCopySize),
-                   .length = static_cast<uint32_t>(op.data.size())};
-            constCopySize += op.data.alignedSize();
+            loc = {
+                    .poolIndex = 0,
+                    .offset = *constCopySize,
+                    .length = static_cast<uint32_t>(op.data.size()),
+            };
+            constCopies->push_back(&op.data);
+            *constCopySize += op.data.alignedSize();
         } else if (op.lifetime == TestOperandLifeTime::CONSTANT_REFERENCE) {
-            loc = {.poolIndex = 0,
-                   .offset = static_cast<uint32_t>(constRefSize),
-                   .length = static_cast<uint32_t>(op.data.size())};
-            constRefSize += op.data.alignedSize();
+            loc = {
+                    .poolIndex = 0,
+                    .offset = *constRefSize,
+                    .length = static_cast<uint32_t>(op.data.size()),
+            };
+            constReferences->push_back(&op.data);
+            *constRefSize += op.data.alignedSize();
+        } else if (op.lifetime == TestOperandLifeTime::SUBGRAPH) {
+            loc = {
+                    .poolIndex = 0,
+                    .offset = *op.data.get<uint32_t>(),
+                    .length = 0,
+            };
         }
 
         V1_2::Operand::ExtraParams extraParams;
@@ -233,25 +250,52 @@
                        .extraParams = std::move(extraParams)};
     }
 
-    // Model operations.
-    hidl_vec<Operation> operations(testModel.operations.size());
-    std::transform(testModel.operations.begin(), testModel.operations.end(), operations.begin(),
-                   [](const TestOperation& op) -> Operation {
+    // Operations.
+    hidl_vec<Operation> operations(testSubgraph.operations.size());
+    std::transform(testSubgraph.operations.begin(), testSubgraph.operations.end(),
+                   operations.begin(), [](const TestOperation& op) -> Operation {
                        return {.type = static_cast<OperationType>(op.type),
                                .inputs = op.inputs,
                                .outputs = op.outputs};
                    });
 
+    return {.operands = std::move(operands),
+            .operations = std::move(operations),
+            .inputIndexes = testSubgraph.inputIndexes,
+            .outputIndexes = testSubgraph.outputIndexes};
+}
+
+void copyTestBuffers(const std::vector<const TestBuffer*>& buffers, uint8_t* output) {
+    uint32_t offset = 0;
+    for (const TestBuffer* buffer : buffers) {
+        const uint8_t* begin = buffer->get<uint8_t>();
+        const uint8_t* end = begin + buffer->size();
+        std::copy(begin, end, output + offset);
+        offset += buffer->alignedSize();
+    }
+}
+
+}  // namespace
+
+Model createModel(const TestModel& testModel) {
+    uint32_t constCopySize = 0;
+    uint32_t constRefSize = 0;
+    std::vector<const TestBuffer*> constCopies;
+    std::vector<const TestBuffer*> constReferences;
+
+    Subgraph mainSubgraph = createSubgraph(testModel.main, &constCopySize, &constCopies,
+                                           &constRefSize, &constReferences);
+    hidl_vec<Subgraph> refSubgraphs(testModel.referenced.size());
+    std::transform(testModel.referenced.begin(), testModel.referenced.end(), refSubgraphs.begin(),
+                   [&constCopySize, &constCopies, &constRefSize,
+                    &constReferences](const TestSubgraph& testSubgraph) {
+                       return createSubgraph(testSubgraph, &constCopySize, &constCopies,
+                                             &constRefSize, &constReferences);
+                   });
+
     // Constant copies.
     hidl_vec<uint8_t> operandValues(constCopySize);
-    for (uint32_t i = 0; i < testModel.operands.size(); i++) {
-        const auto& op = testModel.operands[i];
-        if (op.lifetime == TestOperandLifeTime::CONSTANT_COPY) {
-            const uint8_t* begin = op.data.get<uint8_t>();
-            const uint8_t* end = begin + op.data.size();
-            std::copy(begin, end, operandValues.data() + operands[i].location.offset);
-        }
-    }
+    copyTestBuffers(constCopies, operandValues.data());
 
     // Shared memory.
     hidl_vec<hidl_memory> pools = {};
@@ -266,27 +310,18 @@
                 reinterpret_cast<uint8_t*>(static_cast<void*>(mappedMemory->getPointer()));
         CHECK(mappedPtr != nullptr);
 
-        for (uint32_t i = 0; i < testModel.operands.size(); i++) {
-            const auto& op = testModel.operands[i];
-            if (op.lifetime == TestOperandLifeTime::CONSTANT_REFERENCE) {
-                const uint8_t* begin = op.data.get<uint8_t>();
-                const uint8_t* end = begin + op.data.size();
-                std::copy(begin, end, mappedPtr + operands[i].location.offset);
-            }
-        }
+        copyTestBuffers(constReferences, mappedPtr);
     }
 
-    return {.main = {.operands = std::move(operands),
-                     .operations = std::move(operations),
-                     .inputIndexes = testModel.inputIndexes,
-                     .outputIndexes = testModel.outputIndexes},
+    return {.main = std::move(mainSubgraph),
+            .referenced = std::move(refSubgraphs),
             .operandValues = std::move(operandValues),
             .pools = std::move(pools),
             .relaxComputationFloat32toFloat16 = testModel.isRelaxed};
 }
 
 static bool isOutputSizeGreaterThanOne(const TestModel& testModel, uint32_t index) {
-    const auto byteSize = testModel.operands[testModel.outputIndexes[index]].data.size();
+    const auto byteSize = testModel.main.operands[testModel.main.outputIndexes[index]].data.size();
     return byteSize > 1u;
 }
 
@@ -320,10 +355,10 @@
     std::vector<uint32_t> tokens;
 
     // Model inputs.
-    hidl_vec<RequestArgument> inputs(testModel.inputIndexes.size());
+    hidl_vec<RequestArgument> inputs(testModel.main.inputIndexes.size());
     size_t inputSize = 0;
-    for (uint32_t i = 0; i < testModel.inputIndexes.size(); i++) {
-        const auto& op = testModel.operands[testModel.inputIndexes[i]];
+    for (uint32_t i = 0; i < testModel.main.inputIndexes.size(); i++) {
+        const auto& op = testModel.main.operands[testModel.main.inputIndexes[i]];
         if (op.data.size() == 0) {
             // Omitted input.
             inputs[i] = {.hasNoValue = true};
@@ -350,10 +385,10 @@
     }
 
     // Model outputs.
-    hidl_vec<RequestArgument> outputs(testModel.outputIndexes.size());
+    hidl_vec<RequestArgument> outputs(testModel.main.outputIndexes.size());
     size_t outputSize = 0;
-    for (uint32_t i = 0; i < testModel.outputIndexes.size(); i++) {
-        const auto& op = testModel.operands[testModel.outputIndexes[i]];
+    for (uint32_t i = 0; i < testModel.main.outputIndexes.size(); i++) {
+        const auto& op = testModel.main.operands[testModel.main.outputIndexes[i]];
         if (preferDeviceMemory) {
             SCOPED_TRACE("Output index = " + std::to_string(i));
             auto [buffer, token] = allocator.allocate<IOType::OUTPUT>(i);
@@ -398,9 +433,9 @@
     CHECK(inputMemory.get() != nullptr);
     uint8_t* inputPtr = static_cast<uint8_t*>(static_cast<void*>(inputMemory->getPointer()));
     CHECK(inputPtr != nullptr);
-    for (uint32_t i = 0; i < testModel.inputIndexes.size(); i++) {
+    for (uint32_t i = 0; i < testModel.main.inputIndexes.size(); i++) {
         if (!inputs[i].hasNoValue && inputs[i].location.poolIndex == kInputPoolIndex) {
-            const auto& op = testModel.operands[testModel.inputIndexes[i]];
+            const auto& op = testModel.main.operands[testModel.main.inputIndexes[i]];
             const uint8_t* begin = op.data.get<uint8_t>();
             const uint8_t* end = begin + op.data.size();
             std::copy(begin, end, inputPtr + inputs[i].location.offset);
@@ -443,7 +478,7 @@
         if (outputLoc.poolIndex == kOutputPoolIndex) {
             outputBuffers.emplace_back(outputLoc.length, outputPtr + outputLoc.offset);
         } else {
-            const auto& op = testModel.operands[testModel.outputIndexes[i]];
+            const auto& op = testModel.main.operands[testModel.main.outputIndexes[i]];
             if (op.data.size() == 0) {
                 outputBuffers.emplace_back();
             } else {
@@ -461,7 +496,7 @@
 static Return<ErrorStatus> ExecutePreparedModel(const sp<IPreparedModel>& preparedModel,
                                                 const Request& request, MeasureTiming measure,
                                                 sp<ExecutionCallback>& callback) {
-    return preparedModel->execute_1_3(request, measure, {}, callback);
+    return preparedModel->execute_1_3(request, measure, {}, {}, callback);
 }
 static Return<ErrorStatus> ExecutePreparedModel(const sp<IPreparedModel>& preparedModel,
                                                 const Request& request, MeasureTiming measure,
@@ -469,7 +504,7 @@
                                                 Timing* timing) {
     ErrorStatus result;
     Return<void> ret = preparedModel->executeSynchronously_1_3(
-            request, measure, {},
+            request, measure, {}, {},
             [&result, outputShapes, timing](ErrorStatus error, const hidl_vec<OutputShape>& shapes,
                                             const Timing& time) {
                 result = error;
@@ -577,7 +612,7 @@
             hidl_handle syncFenceHandle;
             sp<IFencedExecutionCallback> fencedCallback;
             Return<void> ret = preparedModel->executeFenced(
-                    request, {}, testConfig.measureTiming, {}, {},
+                    request, {}, testConfig.measureTiming, {}, {}, {},
                     [&result, &syncFenceHandle, &fencedCallback](
                             ErrorStatus error, const hidl_handle& handle,
                             const sp<IFencedExecutionCallback>& callback) {
@@ -638,17 +673,17 @@
             // either empty, or have the same number of elements as the number of outputs.
             ASSERT_EQ(ErrorStatus::NONE, executionStatus);
             ASSERT_TRUE(outputShapes.size() == 0 ||
-                        outputShapes.size() == testModel.outputIndexes.size());
+                        outputShapes.size() == testModel.main.outputIndexes.size());
             break;
         case OutputType::UNSPECIFIED:
             // If the model output operands are not fully specified, outputShapes must have
             // the same number of elements as the number of outputs.
             ASSERT_EQ(ErrorStatus::NONE, executionStatus);
-            ASSERT_EQ(outputShapes.size(), testModel.outputIndexes.size());
+            ASSERT_EQ(outputShapes.size(), testModel.main.outputIndexes.size());
             break;
         case OutputType::INSUFFICIENT:
             ASSERT_EQ(ErrorStatus::OUTPUT_INSUFFICIENT_SIZE, executionStatus);
-            ASSERT_EQ(outputShapes.size(), testModel.outputIndexes.size());
+            ASSERT_EQ(outputShapes.size(), testModel.main.outputIndexes.size());
             ASSERT_FALSE(outputShapes[0].isSufficient);
             return;
     }
@@ -656,7 +691,7 @@
     // Go through all outputs, check returned output shapes.
     for (uint32_t i = 0; i < outputShapes.size(); i++) {
         EXPECT_TRUE(outputShapes[i].isSufficient);
-        const auto& expect = testModel.operands[testModel.outputIndexes[i]].dimensions;
+        const auto& expect = testModel.main.operands[testModel.main.outputIndexes[i]].dimensions;
         const std::vector<uint32_t> actual = outputShapes[i].dimensions;
         EXPECT_EQ(expect, actual);
     }
@@ -862,7 +897,7 @@
                            [](const TestModel& testModel) { return !testModel.expectFailure; });
 
 INSTANTIATE_GENERATED_TEST(QuantizationCouplingTest, [](const TestModel& testModel) {
-    return testModel.hasQuant8CoupledOperands() && testModel.operations.size() == 1;
+    return testModel.hasQuant8CoupledOperands() && testModel.main.operations.size() == 1;
 });
 
 }  // namespace android::hardware::neuralnetworks::V1_3::vts::functional
diff --git a/neuralnetworks/1.3/vts/functional/QualityOfServiceTests.cpp b/neuralnetworks/1.3/vts/functional/QualityOfServiceTests.cpp
index 76d133a..fccc612 100644
--- a/neuralnetworks/1.3/vts/functional/QualityOfServiceTests.cpp
+++ b/neuralnetworks/1.3/vts/functional/QualityOfServiceTests.cpp
@@ -171,7 +171,7 @@
 
     // launch execution
     const sp<ExecutionCallback> callback = new ExecutionCallback();
-    Return<ErrorStatus> ret = preparedModel->execute_1_3(request, measure, deadline, callback);
+    Return<ErrorStatus> ret = preparedModel->execute_1_3(request, measure, deadline, {}, callback);
     EXPECT_TRUE(ret.isOk());
     EXPECT_EQ(ErrorStatus::NONE, ret.withDefault(ErrorStatus::GENERAL_FAILURE));
     if (!ret.isOk() || ret != ErrorStatus::NONE) return std::nullopt;
@@ -198,7 +198,7 @@
 
     // run execution
     const Return<void> ret =
-            preparedModel->executeSynchronously_1_3(request, measure, deadline, cb);
+            preparedModel->executeSynchronously_1_3(request, measure, deadline, {}, cb);
     EXPECT_TRUE(ret.isOk());
     if (!ret.isOk()) return std::nullopt;
 
@@ -239,12 +239,13 @@
 
     // If the model output operands are fully specified, outputShapes must be either
     // either empty, or have the same number of elements as the number of outputs.
-    ASSERT_TRUE(outputShapes.size() == 0 || outputShapes.size() == testModel.outputIndexes.size());
+    ASSERT_TRUE(outputShapes.size() == 0 ||
+                outputShapes.size() == testModel.main.outputIndexes.size());
 
     // Go through all outputs, check returned output shapes.
     for (uint32_t i = 0; i < outputShapes.size(); i++) {
         EXPECT_TRUE(outputShapes[i].isSufficient);
-        const auto& expect = testModel.operands[testModel.outputIndexes[i]].dimensions;
+        const auto& expect = testModel.main.operands[testModel.main.outputIndexes[i]].dimensions;
         const std::vector<uint32_t> actual = outputShapes[i].dimensions;
         EXPECT_EQ(expect, actual);
     }
diff --git a/neuralnetworks/1.3/vts/functional/ValidateModel.cpp b/neuralnetworks/1.3/vts/functional/ValidateModel.cpp
index b9ea430..09e9922 100644
--- a/neuralnetworks/1.3/vts/functional/ValidateModel.cpp
+++ b/neuralnetworks/1.3/vts/functional/ValidateModel.cpp
@@ -182,6 +182,7 @@
         case OperandType::TENSOR_FLOAT16:
         case OperandType::TENSOR_FLOAT32:
         case OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL:
+        case OperandType::SUBGRAPH:
             return 1.0f;
         case OperandType::TENSOR_INT32:
             return -1.0f;
@@ -220,6 +221,7 @@
         case OperandType::TENSOR_FLOAT32:
         case OperandType::TENSOR_INT32:
         case OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL:
+        case OperandType::SUBGRAPH:
             return {1};
         case OperandType::TENSOR_QUANT8_ASYMM:
             return {-1, 256};
diff --git a/neuralnetworks/1.3/vts/functional/ValidateRequest.cpp b/neuralnetworks/1.3/vts/functional/ValidateRequest.cpp
index 2a4269f..20f4fe2 100644
--- a/neuralnetworks/1.3/vts/functional/ValidateRequest.cpp
+++ b/neuralnetworks/1.3/vts/functional/ValidateRequest.cpp
@@ -70,7 +70,7 @@
 
         sp<ExecutionCallback> executionCallback = new ExecutionCallback();
         Return<ErrorStatus> executeLaunchStatus =
-                preparedModel->execute_1_3(request, measure, deadline, executionCallback);
+                preparedModel->execute_1_3(request, measure, deadline, {}, executionCallback);
         ASSERT_TRUE(executeLaunchStatus.isOk());
         ASSERT_EQ(ErrorStatus::INVALID_ARGUMENT, static_cast<ErrorStatus>(executeLaunchStatus));
 
@@ -88,7 +88,7 @@
         SCOPED_TRACE(message + " [executeSynchronously_1_3]");
 
         Return<void> executeStatus = preparedModel->executeSynchronously_1_3(
-                request, measure, deadline,
+                request, measure, deadline, {},
                 [](ErrorStatus error, const hidl_vec<OutputShape>& outputShapes,
                    const Timing& timing) {
                     ASSERT_EQ(ErrorStatus::INVALID_ARGUMENT, error);
@@ -143,7 +143,7 @@
     {
         SCOPED_TRACE(message + " [executeFenced]");
         Return<void> ret =
-                preparedModel->executeFenced(request, {}, MeasureTiming::NO, deadline, {},
+                preparedModel->executeFenced(request, {}, MeasureTiming::NO, deadline, {}, {},
                                              [](ErrorStatus error, const hidl_handle& handle,
                                                 const sp<IFencedExecutionCallback>& callback) {
                                                  ASSERT_EQ(ErrorStatus::INVALID_ARGUMENT, error);
@@ -196,7 +196,7 @@
 void validateRequestFailure(const sp<IPreparedModel>& preparedModel, const Request& request) {
     SCOPED_TRACE("Expecting request to fail [executeSynchronously_1_3]");
     Return<void> executeStatus = preparedModel->executeSynchronously_1_3(
-            request, MeasureTiming::NO, {},
+            request, MeasureTiming::NO, {}, {},
             [](ErrorStatus error, const hidl_vec<OutputShape>& outputShapes, const Timing& timing) {
                 ASSERT_NE(ErrorStatus::NONE, error);
                 EXPECT_EQ(outputShapes.size(), 0);
diff --git a/neuralnetworks/1.3/vts/functional/VtsHalNeuralnetworks.cpp b/neuralnetworks/1.3/vts/functional/VtsHalNeuralnetworks.cpp
index 9a87569..16341da 100644
--- a/neuralnetworks/1.3/vts/functional/VtsHalNeuralnetworks.cpp
+++ b/neuralnetworks/1.3/vts/functional/VtsHalNeuralnetworks.cpp
@@ -137,7 +137,7 @@
 void validateExecuteFenced(const sp<IPreparedModel>& preparedModel, const Request& request) {
     SCOPED_TRACE("Expecting request to fail [executeFenced]");
     Return<void> ret_null = preparedModel->executeFenced(
-            request, {hidl_handle(nullptr)}, V1_2::MeasureTiming::NO, {}, {},
+            request, {hidl_handle(nullptr)}, V1_2::MeasureTiming::NO, {}, {}, {},
             [](ErrorStatus error, const hidl_handle& handle,
                const sp<IFencedExecutionCallback>& callback) {
                 ASSERT_EQ(ErrorStatus::INVALID_ARGUMENT, error);