Re-organize NNAPI Burst utility classes This change: * Renames ExecutionBurstController to Burst in 1.2/utils * Renames ExecutionBurstUtils to BurstUtils in 1.2/utils * Renames ExecutionBurstServer to Burst in common/adapter Bug: N/A Test: mma Change-Id: Ibd460229887c8c9cd23ebc6ee61da37c7c820288

commit: 137ee99a165e15689d2842aa8899c6fead82cfdf [log] [tgz]
author: Michael Butler <butlermichael@google.com> Mon Nov 01 16:40:31 2021 -0700
committer: Michael Butler <butlermichael@google.com> Mon Nov 01 16:40:31 2021 -0700
tree: 6070de6de5a47b7cbdb3b0f9648b5c295f784783
parent: cfc16f94b200da9fb3946ac08b554a4ffa9d792b [diff]
diff --git a/neuralnetworks/utils/adapter/include/nnapi/hal/Burst.h b/neuralnetworks/utils/adapter/include/nnapi/hal/Burst.h
new file mode 100644
index 0000000..a3aa706
--- /dev/null
+++ b/neuralnetworks/utils/adapter/include/nnapi/hal/Burst.h

@@ -0,0 +1,155 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_UTILS_ADAPTER_BURST_H
+#define ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_UTILS_ADAPTER_BURST_H
+
+#include <android-base/thread_annotations.h>
+#include <android/hardware/neuralnetworks/1.0/types.h>
+#include <android/hardware/neuralnetworks/1.2/IBurstCallback.h>
+#include <android/hardware/neuralnetworks/1.2/IBurstContext.h>
+#include <android/hardware/neuralnetworks/1.2/IPreparedModel.h>
+#include <android/hardware/neuralnetworks/1.2/types.h>
+#include <fmq/MessageQueue.h>
+#include <hidl/MQDescriptor.h>
+#include <nnapi/IBurst.h>
+#include <nnapi/Result.h>
+#include <nnapi/Types.h>
+#include <nnapi/hal/1.0/ProtectCallback.h>
+#include <nnapi/hal/1.2/BurstUtils.h>
+
+#include <atomic>
+#include <chrono>
+#include <memory>
+#include <optional>
+#include <thread>
+#include <tuple>
+#include <vector>
+
+namespace android::hardware::neuralnetworks::adapter {
+
+/**
+ * The Burst class is responsible for waiting for and deserializing a request object from a FMQ,
+ * performing the inference, and serializing the result back across another FMQ.
+ */
+class Burst : public V1_2::IBurstContext {
+    struct PrivateConstructorTag {};
+
+  public:
+    /**
+     * Class to cache the memory objects for a burst object.
+     *
+     * This class is thread-safe.
+     */
+    class MemoryCache {
+      public:
+        // Precondition: burstExecutor != nullptr
+        // Precondition: burstCallback != nullptr
+        MemoryCache(nn::SharedBurst burstExecutor, sp<V1_2::IBurstCallback> burstCallback);
+
+        /**
+         * Get the cached memory objects corresponding to provided slot identifiers.
+         *
+         * If the slot entry is not present in the cache, this class will use V1_2::IBurstCallback
+         * to retrieve those entries that are not present in the cache, then cache them.
+         *
+         * @param slots Identifiers of memory objects to be retrieved.
+         * @return A vector where each element is the memory object and a ref-counted cache "hold"
+         *     object to preserve the cache entry of the IBurst object as long as the "hold" object
+         *     is alive, otherwise GeneralError. Each element of the vector corresponds to the
+         *     element of slot.
+         */
+        nn::GeneralResult<std::vector<std::pair<nn::SharedMemory, nn::IBurst::OptionalCacheHold>>>
+        getCacheEntries(const std::vector<int32_t>& slots);
+
+        /**
+         * Remove an entry from the cache.
+         *
+         * @param slot Identifier of the memory object to be removed from the cache.
+         */
+        void removeCacheEntry(int32_t slot);
+
+      private:
+        nn::GeneralResult<void> ensureCacheEntriesArePresentLocked(
+                const std::vector<int32_t>& slots) REQUIRES(mMutex);
+        nn::GeneralResult<std::pair<nn::SharedMemory, nn::IBurst::OptionalCacheHold>>
+        getCacheEntryLocked(int32_t slot) REQUIRES(mMutex);
+        void addCacheEntryLocked(int32_t slot, nn::SharedMemory memory) REQUIRES(mMutex);
+
+        std::mutex mMutex;
+        std::map<int32_t, std::pair<nn::SharedMemory, nn::IBurst::OptionalCacheHold>> mCache
+                GUARDED_BY(mMutex);
+        nn::SharedBurst kBurstExecutor;
+        const sp<V1_2::IBurstCallback> kBurstCallback;
+    };
+
+    /**
+     * Create automated context to manage FMQ-based executions.
+     *
+     * This function is intended to be used by a service to automatically:
+     * 1) Receive data from a provided FMQ
+     * 2) Execute a model with the given information
+     * 3) Send the result to the created FMQ
+     *
+     * @param callback Callback used to retrieve memories corresponding to unrecognized slots.
+     * @param requestChannel Input FMQ channel through which the client passes the request to the
+     *     service.
+     * @param resultChannel Output FMQ channel from which the client can retrieve the result of the
+     *     execution.
+     * @param burstExecutor Object which maintains a local cache of the memory pools and executes
+     *     using the cached memory pools.
+     * @param pollingTimeWindow How much time (in microseconds) the Burst is allowed to poll the FMQ
+     *     before waiting on the blocking futex. Polling may result in lower latencies at the
+     *     potential cost of more power usage.
+     * @return V1_2::IBurstContext Handle to the burst context.
+     */
+    static nn::GeneralResult<sp<Burst>> create(
+            const sp<V1_2::IBurstCallback>& callback,
+            const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
+            const MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel,
+            nn::SharedBurst burstExecutor,
+            std::chrono::microseconds pollingTimeWindow = std::chrono::microseconds{0});
+
+    Burst(PrivateConstructorTag tag, const sp<V1_2::IBurstCallback>& callback,
+          std::unique_ptr<V1_2::utils::RequestChannelReceiver> requestChannel,
+          std::unique_ptr<V1_2::utils::ResultChannelSender> resultChannel,
+          nn::SharedBurst burstExecutor);
+    ~Burst();
+
+    // Used by the NN runtime to preemptively remove any stored memory. See
+    // V1_2::IBurstContext::freeMemory for more information.
+    Return<void> freeMemory(int32_t slot) override;
+
+  private:
+    // Work loop that will continue processing execution requests until the Burst object is freed.
+    void task();
+
+    nn::ExecutionResult<std::pair<hidl_vec<V1_2::OutputShape>, V1_2::Timing>> execute(
+            const V1_0::Request& requestWithoutPools, const std::vector<int32_t>& slotsOfPools,
+            V1_2::MeasureTiming measure);
+
+    std::thread mWorker;
+    std::atomic<bool> mTeardown{false};
+    const sp<V1_2::IBurstCallback> mCallback;
+    const std::unique_ptr<V1_2::utils::RequestChannelReceiver> mRequestChannelReceiver;
+    const std::unique_ptr<V1_2::utils::ResultChannelSender> mResultChannelSender;
+    const nn::SharedBurst mBurstExecutor;
+    MemoryCache mMemoryCache;
+};
+
+}  // namespace android::hardware::neuralnetworks::adapter
+
+#endif  // ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_UTILS_ADAPTER_BURST_H

diff --git a/neuralnetworks/utils/adapter/src/Burst.cpp b/neuralnetworks/utils/adapter/src/Burst.cpp
new file mode 100644
index 0000000..8b2e1dd
--- /dev/null
+++ b/neuralnetworks/utils/adapter/src/Burst.cpp

@@ -0,0 +1,259 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Burst.h"
+
+#include <android-base/logging.h>
+#include <nnapi/IBurst.h>
+#include <nnapi/Result.h>
+#include <nnapi/TypeUtils.h>
+#include <nnapi/Types.h>
+#include <nnapi/Validation.h>
+#include <nnapi/hal/1.0/Conversions.h>
+#include <nnapi/hal/1.0/HandleError.h>
+#include <nnapi/hal/1.0/ProtectCallback.h>
+#include <nnapi/hal/1.2/BurstUtils.h>
+#include <nnapi/hal/1.2/Conversions.h>
+#include <nnapi/hal/TransferValue.h>
+
+#include <algorithm>
+#include <cstring>
+#include <limits>
+#include <map>
+#include <memory>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "Tracing.h"
+
+namespace android::hardware::neuralnetworks::adapter {
+namespace {
+
+constexpr V1_2::Timing kTiming = {std::numeric_limits<uint64_t>::max(),
+                                  std::numeric_limits<uint64_t>::max()};
+
+nn::GeneralResult<std::vector<nn::SharedMemory>> getMemoriesCallback(
+        V1_0::ErrorStatus status, const hidl_vec<hidl_memory>& memories) {
+    HANDLE_STATUS_HIDL(status) << "getting burst memories failed with " << toString(status);
+    std::vector<nn::SharedMemory> canonicalMemories;
+    canonicalMemories.reserve(memories.size());
+    for (const auto& memory : memories) {
+        canonicalMemories.push_back(NN_TRY(nn::convert(memory)));
+    }
+    return canonicalMemories;
+}
+
+}  // anonymous namespace
+
+Burst::MemoryCache::MemoryCache(nn::SharedBurst burstExecutor,
+                                sp<V1_2::IBurstCallback> burstCallback)
+    : kBurstExecutor(std::move(burstExecutor)), kBurstCallback(std::move(burstCallback)) {
+    CHECK(kBurstExecutor != nullptr);
+    CHECK(kBurstCallback != nullptr);
+}
+
+nn::GeneralResult<std::vector<std::pair<nn::SharedMemory, nn::IBurst::OptionalCacheHold>>>
+Burst::MemoryCache::getCacheEntries(const std::vector<int32_t>& slots) {
+    std::lock_guard guard(mMutex);
+    NN_TRY(ensureCacheEntriesArePresentLocked(slots));
+
+    std::vector<std::pair<nn::SharedMemory, nn::IBurst::OptionalCacheHold>> results;
+    results.reserve(slots.size());
+    for (int32_t slot : slots) {
+        results.push_back(NN_TRY(getCacheEntryLocked(slot)));
+    }
+
+    return results;
+}
+
+nn::GeneralResult<void> Burst::MemoryCache::ensureCacheEntriesArePresentLocked(
+        const std::vector<int32_t>& slots) {
+    const auto slotIsKnown = [this](int32_t slot)
+                                     REQUIRES(mMutex) { return mCache.count(slot) > 0; };
+
+    // find unique unknown slots
+    std::vector<int32_t> unknownSlots = slots;
+    std::sort(unknownSlots.begin(), unknownSlots.end());
+    auto unknownSlotsEnd = std::unique(unknownSlots.begin(), unknownSlots.end());
+    unknownSlotsEnd = std::remove_if(unknownSlots.begin(), unknownSlotsEnd, slotIsKnown);
+    unknownSlots.erase(unknownSlotsEnd, unknownSlots.end());
+
+    // quick-exit if all slots are known
+    if (unknownSlots.empty()) {
+        return {};
+    }
+
+    auto cb = neuralnetworks::utils::CallbackValue(getMemoriesCallback);
+
+    const auto ret = kBurstCallback->getMemories(unknownSlots, cb);
+    HANDLE_TRANSPORT_FAILURE(ret);
+
+    auto returnedMemories = NN_TRY(cb.take());
+
+    if (returnedMemories.size() != unknownSlots.size()) {
+        return NN_ERROR() << "Burst::MemoryCache::ensureCacheEntriesArePresentLocked: Error "
+                             "retrieving memories -- count mismatch between requested memories ("
+                          << unknownSlots.size() << ") and returned memories ("
+                          << returnedMemories.size() << ")";
+    }
+
+    // add memories to unknown slots
+    for (size_t i = 0; i < unknownSlots.size(); ++i) {
+        addCacheEntryLocked(unknownSlots[i], std::move(returnedMemories[i]));
+    }
+
+    return {};
+}
+
+nn::GeneralResult<std::pair<nn::SharedMemory, nn::IBurst::OptionalCacheHold>>
+Burst::MemoryCache::getCacheEntryLocked(int32_t slot) {
+    if (const auto iter = mCache.find(slot); iter != mCache.end()) {
+        return iter->second;
+    }
+    return NN_ERROR() << "Burst::MemoryCache::getCacheEntryLocked failed because slot " << slot
+                      << " is not present in the cache";
+}
+
+void Burst::MemoryCache::addCacheEntryLocked(int32_t slot, nn::SharedMemory memory) {
+    auto hold = kBurstExecutor->cacheMemory(memory);
+    mCache.emplace(slot, std::make_pair(std::move(memory), std::move(hold)));
+}
+
+void Burst::MemoryCache::removeCacheEntry(int32_t slot) {
+    std::lock_guard guard(mMutex);
+    mCache.erase(slot);
+}
+
+// Burst methods
+
+nn::GeneralResult<sp<Burst>> Burst::create(
+        const sp<V1_2::IBurstCallback>& callback,
+        const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
+        const MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel, nn::SharedBurst burstExecutor,
+        std::chrono::microseconds pollingTimeWindow) {
+    // check inputs
+    if (callback == nullptr || burstExecutor == nullptr) {
+        return NN_ERROR() << "Burst::create passed a nullptr";
+    }
+
+    // create FMQ objects
+    auto requestChannelReceiver =
+            NN_TRY(V1_2::utils::RequestChannelReceiver::create(requestChannel, pollingTimeWindow));
+    auto resultChannelSender = NN_TRY(V1_2::utils::ResultChannelSender::create(resultChannel));
+
+    // check FMQ objects
+    CHECK(requestChannelReceiver != nullptr);
+    CHECK(resultChannelSender != nullptr);
+
+    // make and return context
+    return sp<Burst>::make(PrivateConstructorTag{}, callback, std::move(requestChannelReceiver),
+                           std::move(resultChannelSender), std::move(burstExecutor));
+}
+
+Burst::Burst(PrivateConstructorTag /*tag*/, const sp<V1_2::IBurstCallback>& callback,
+             std::unique_ptr<V1_2::utils::RequestChannelReceiver> requestChannel,
+             std::unique_ptr<V1_2::utils::ResultChannelSender> resultChannel,
+             nn::SharedBurst burstExecutor)
+    : mCallback(callback),
+      mRequestChannelReceiver(std::move(requestChannel)),
+      mResultChannelSender(std::move(resultChannel)),
+      mBurstExecutor(std::move(burstExecutor)),
+      mMemoryCache(mBurstExecutor, mCallback) {
+    // TODO: highly document the threading behavior of this class
+    mWorker = std::thread([this] { task(); });
+}
+
+Burst::~Burst() {
+    // set teardown flag
+    mTeardown = true;
+    mRequestChannelReceiver->invalidate();
+
+    // wait for task thread to end
+    mWorker.join();
+}
+
+Return<void> Burst::freeMemory(int32_t slot) {
+    mMemoryCache.removeCacheEntry(slot);
+    return Void();
+}
+
+void Burst::task() {
+    // loop until the burst object is being destroyed
+    while (!mTeardown) {
+        // receive request
+        auto arguments = mRequestChannelReceiver->getBlocking();
+
+        // if the request packet was not properly received, return a generic error and skip the
+        // execution
+        //
+        // if the burst is being torn down, skip the execution so the "task" function can end
+        if (!arguments.has_value()) {
+            if (!mTeardown) {
+                mResultChannelSender->send(V1_0::ErrorStatus::GENERAL_FAILURE, {}, kTiming);
+            }
+            continue;
+        }
+
+        // unpack the arguments; types are Request, std::vector<int32_t>, and V1_2::MeasureTiming,
+        // respectively
+        const auto [requestWithoutPools, slotsOfPools, measure] = std::move(arguments).value();
+
+        auto result = execute(requestWithoutPools, slotsOfPools, measure);
+
+        // return result
+        if (result.has_value()) {
+            const auto& [outputShapes, timing] = result.value();
+            mResultChannelSender->send(V1_0::ErrorStatus::NONE, outputShapes, timing);
+        } else {
+            const auto& [message, code, outputShapes] = result.error();
+            LOG(ERROR) << "IBurst::execute failed with " << code << ": " << message;
+            mResultChannelSender->send(V1_2::utils::convert(code).value(),
+                                       V1_2::utils::convert(outputShapes).value(), kTiming);
+        }
+    }
+}
+
+nn::ExecutionResult<std::pair<hidl_vec<V1_2::OutputShape>, V1_2::Timing>> Burst::execute(
+        const V1_0::Request& requestWithoutPools, const std::vector<int32_t>& slotsOfPools,
+        V1_2::MeasureTiming measure) {
+    NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION,
+                 "Burst getting memory, executing, and returning results");
+
+    // ensure executor with cache has required memory
+    const auto cacheEntries = NN_TRY(mMemoryCache.getCacheEntries(slotsOfPools));
+
+    // convert request, populating its pools
+    // This code performs an unvalidated convert because the request object without its pools is
+    // invalid because it is incomplete. Instead, the validation is performed after the memory pools
+    // have been added to the request.
+    auto canonicalRequest = NN_TRY(nn::unvalidatedConvert(requestWithoutPools));
+    CHECK(canonicalRequest.pools.empty());
+    std::transform(cacheEntries.begin(), cacheEntries.end(),
+                   std::back_inserter(canonicalRequest.pools),
+                   [](const auto& cacheEntry) { return cacheEntry.first; });
+    NN_TRY(validate(canonicalRequest));
+
+    nn::MeasureTiming canonicalMeasure = NN_TRY(nn::convert(measure));
+
+    const auto [outputShapes, timing] =
+            NN_TRY(mBurstExecutor->execute(canonicalRequest, canonicalMeasure, {}, {}));
+
+    return std::make_pair(NN_TRY(V1_2::utils::convert(outputShapes)),
+                          NN_TRY(V1_2::utils::convert(timing)));
+}
+
+}  // namespace android::hardware::neuralnetworks::adapter

diff --git a/neuralnetworks/utils/adapter/src/PreparedModel.cpp b/neuralnetworks/utils/adapter/src/PreparedModel.cpp
index b03a131..a14e782 100644
--- a/neuralnetworks/utils/adapter/src/PreparedModel.cpp
+++ b/neuralnetworks/utils/adapter/src/PreparedModel.cpp

@@ -16,6 +16,8 @@
 
 #include "PreparedModel.h"
 
+#include "Burst.h"
+
 #include <android-base/logging.h>
 #include <android/hardware/neuralnetworks/1.0/IExecutionCallback.h>
 #include <android/hardware/neuralnetworks/1.0/types.h>
@@ -32,7 +34,6 @@
 #include <nnapi/Types.h>
 #include <nnapi/Validation.h>
 #include <nnapi/hal/1.0/Utils.h>
-#include <nnapi/hal/1.2/ExecutionBurstServer.h>
 #include <nnapi/hal/1.2/Utils.h>
 #include <nnapi/hal/1.3/Conversions.h>
 #include <nnapi/hal/1.3/Utils.h>
@@ -277,9 +278,8 @@
         const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
         const MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel) {
     auto burstExecutor = NN_TRY(preparedModel->configureExecutionBurst());
-    return V1_2::utils::ExecutionBurstServer::create(
-            callback, requestChannel, resultChannel, std::move(burstExecutor),
-            V1_2::utils::getBurstServerPollingTimeWindow());
+    return Burst::create(callback, requestChannel, resultChannel, std::move(burstExecutor),
+                         V1_2::utils::getBurstServerPollingTimeWindow());
 }
 
 nn::GeneralResult<std::pair<hidl_handle, sp<V1_3::IFencedExecutionCallback>>> executeFenced(
commit	137ee99a165e15689d2842aa8899c6fead82cfdf	[log] [tgz]
author	Michael Butler <butlermichael@google.com>	Mon Nov 01 16:40:31 2021 -0700
committer	Michael Butler <butlermichael@google.com>	Mon Nov 01 16:40:31 2021 -0700
tree	6070de6de5a47b7cbdb3b0f9648b5c295f784783
parent	cfc16f94b200da9fb3946ac08b554a4ffa9d792b [diff]