Implement full canonical Burst in NN util code
Bug: 180492058
Bug: 177267324
Test: mma
Test: presubmit
Change-Id: I5018f6cf2dbaf705f74f4f46318142c64433e19d
Merged-In: I5018f6cf2dbaf705f74f4f46318142c64433e19d
(cherry picked from commit acff4063b63c04cbb28af87eab61e9a1fa70980a)
diff --git a/neuralnetworks/1.2/utils/src/ExecutionBurstController.cpp b/neuralnetworks/1.2/utils/src/ExecutionBurstController.cpp
index 2265861..eedf591 100644
--- a/neuralnetworks/1.2/utils/src/ExecutionBurstController.cpp
+++ b/neuralnetworks/1.2/utils/src/ExecutionBurstController.cpp
@@ -17,283 +17,321 @@
#define LOG_TAG "ExecutionBurstController"
#include "ExecutionBurstController.h"
+#include "ExecutionBurstUtils.h"
#include <android-base/logging.h>
+#include <android-base/thread_annotations.h>
+#include <nnapi/IBurst.h>
+#include <nnapi/IPreparedModel.h>
+#include <nnapi/Result.h>
+#include <nnapi/TypeUtils.h>
+#include <nnapi/Types.h>
+#include <nnapi/Validation.h>
+#include <nnapi/hal/1.0/Conversions.h>
+#include <nnapi/hal/HandleError.h>
+#include <nnapi/hal/ProtectCallback.h>
+#include <nnapi/hal/TransferValue.h>
#include <algorithm>
#include <cstring>
#include <limits>
#include <memory>
#include <string>
+#include <thread>
#include <tuple>
#include <utility>
#include <vector>
-#include "ExecutionBurstUtils.h"
-#include "HalInterfaces.h"
+#include "Callbacks.h"
+#include "Conversions.h"
#include "Tracing.h"
#include "Utils.h"
-namespace android::nn {
+namespace android::hardware::neuralnetworks::V1_2::utils {
namespace {
-class BurstContextDeathHandler : public hardware::hidl_death_recipient {
- public:
- using Callback = std::function<void()>;
-
- BurstContextDeathHandler(const Callback& onDeathCallback) : mOnDeathCallback(onDeathCallback) {
- CHECK(onDeathCallback != nullptr);
+nn::GeneralResult<sp<IBurstContext>> executionBurstResultCallback(
+ V1_0::ErrorStatus status, const sp<IBurstContext>& burstContext) {
+ HANDLE_HAL_STATUS(status) << "IPreparedModel::configureExecutionBurst failed with status "
+ << toString(status);
+ if (burstContext == nullptr) {
+ return NN_ERROR(nn::ErrorStatus::GENERAL_FAILURE)
+ << "IPreparedModel::configureExecutionBurst returned nullptr for burst";
}
-
- void serviceDied(uint64_t /*cookie*/, const wp<hidl::base::V1_0::IBase>& /*who*/) override {
- LOG(ERROR) << "BurstContextDeathHandler::serviceDied -- service unexpectedly died!";
- mOnDeathCallback();
- }
-
- private:
- const Callback mOnDeathCallback;
-};
-
-} // anonymous namespace
-
-hardware::Return<void> ExecutionBurstController::ExecutionBurstCallback::getMemories(
- const hardware::hidl_vec<int32_t>& slots, getMemories_cb cb) {
- std::lock_guard<std::mutex> guard(mMutex);
-
- // get all memories
- hardware::hidl_vec<hardware::hidl_memory> memories(slots.size());
- std::transform(slots.begin(), slots.end(), memories.begin(), [this](int32_t slot) {
- return slot < mMemoryCache.size() ? mMemoryCache[slot] : hardware::hidl_memory{};
- });
-
- // ensure all memories are valid
- if (!std::all_of(memories.begin(), memories.end(),
- [](const hardware::hidl_memory& memory) { return memory.valid(); })) {
- cb(V1_0::ErrorStatus::INVALID_ARGUMENT, {});
- return hardware::Void();
- }
-
- // return successful
- cb(V1_0::ErrorStatus::NONE, std::move(memories));
- return hardware::Void();
+ return burstContext;
}
-std::vector<int32_t> ExecutionBurstController::ExecutionBurstCallback::getSlots(
- const hardware::hidl_vec<hardware::hidl_memory>& memories,
- const std::vector<intptr_t>& keys) {
- std::lock_guard<std::mutex> guard(mMutex);
-
- // retrieve (or bind) all slots corresponding to memories
- std::vector<int32_t> slots;
- slots.reserve(memories.size());
- for (size_t i = 0; i < memories.size(); ++i) {
- slots.push_back(getSlotLocked(memories[i], keys[i]));
+nn::GeneralResult<hidl_vec<hidl_memory>> getMemoriesHelper(
+ const hidl_vec<int32_t>& slots,
+ const std::shared_ptr<ExecutionBurstController::MemoryCache>& memoryCache) {
+ hidl_vec<hidl_memory> memories(slots.size());
+ for (size_t i = 0; i < slots.size(); ++i) {
+ const int32_t slot = slots[i];
+ const auto memory = NN_TRY(memoryCache->getMemory(slot));
+ memories[i] = NN_TRY(V1_0::utils::unvalidatedConvert(memory));
+ if (!memories[i].valid()) {
+ return NN_ERROR() << "memory at slot " << slot << " is invalid";
+ }
}
- return slots;
+ return memories;
}
-std::pair<bool, int32_t> ExecutionBurstController::ExecutionBurstCallback::freeMemory(
- intptr_t key) {
- std::lock_guard<std::mutex> guard(mMutex);
+} // namespace
- auto iter = mMemoryIdToSlot.find(key);
- if (iter == mMemoryIdToSlot.end()) {
- return {false, 0};
- }
- const int32_t slot = iter->second;
- mMemoryIdToSlot.erase(key);
- mMemoryCache[slot] = {};
- mFreeSlots.push(slot);
- return {true, slot};
+// MemoryCache methods
+
+ExecutionBurstController::MemoryCache::MemoryCache() {
+ constexpr size_t kPreallocatedCount = 1024;
+ std::vector<int32_t> freeSlotsSpace;
+ freeSlotsSpace.reserve(kPreallocatedCount);
+ mFreeSlots = std::stack<int32_t, std::vector<int32_t>>(std::move(freeSlotsSpace));
+ mMemoryCache.reserve(kPreallocatedCount);
+ mCacheCleaner.reserve(kPreallocatedCount);
}
-int32_t ExecutionBurstController::ExecutionBurstCallback::getSlotLocked(
- const hardware::hidl_memory& memory, intptr_t key) {
- auto iter = mMemoryIdToSlot.find(key);
- if (iter == mMemoryIdToSlot.end()) {
- const int32_t slot = allocateSlotLocked();
- mMemoryIdToSlot[key] = slot;
- mMemoryCache[slot] = memory;
- return slot;
- } else {
+void ExecutionBurstController::MemoryCache::setBurstContext(sp<IBurstContext> burstContext) {
+ std::lock_guard guard(mMutex);
+ mBurstContext = std::move(burstContext);
+}
+
+std::pair<int32_t, ExecutionBurstController::MemoryCache::SharedCleanup>
+ExecutionBurstController::MemoryCache::cacheMemory(const nn::SharedMemory& memory) {
+ std::unique_lock lock(mMutex);
+ base::ScopedLockAssertion lockAssert(mMutex);
+
+ // Use existing cache entry if (1) the Memory object is in the cache and (2) the cache entry is
+ // not currently being freed.
+ auto iter = mMemoryIdToSlot.find(memory);
+ while (iter != mMemoryIdToSlot.end()) {
const int32_t slot = iter->second;
- return slot;
+ if (auto cleaner = mCacheCleaner.at(slot).lock()) {
+ return std::make_pair(slot, std::move(cleaner));
+ }
+
+ // If the code reaches this point, the Memory object was in the cache, but is currently
+ // being destroyed. This code waits until the cache entry has been freed, then loops to
+ // ensure the cache entry has been freed or has been made present by another thread.
+ mCond.wait(lock);
+ iter = mMemoryIdToSlot.find(memory);
}
+
+ // Allocate a new cache entry.
+ const int32_t slot = allocateSlotLocked();
+ mMemoryIdToSlot[memory] = slot;
+ mMemoryCache[slot] = memory;
+
+ // Create reference-counted self-cleaning cache object.
+ auto self = weak_from_this();
+ Task cleanup = [memory, memoryCache = std::move(self)] {
+ if (const auto lock = memoryCache.lock()) {
+ lock->freeMemory(memory);
+ }
+ };
+ auto cleaner = std::make_shared<const Cleanup>(std::move(cleanup));
+ mCacheCleaner[slot] = cleaner;
+
+ return std::make_pair(slot, std::move(cleaner));
}
-int32_t ExecutionBurstController::ExecutionBurstCallback::allocateSlotLocked() {
+nn::GeneralResult<nn::SharedMemory> ExecutionBurstController::MemoryCache::getMemory(int32_t slot) {
+ std::lock_guard guard(mMutex);
+ if (slot < 0 || static_cast<size_t>(slot) >= mMemoryCache.size()) {
+ return NN_ERROR() << "Invalid slot: " << slot << " vs " << mMemoryCache.size();
+ }
+ return mMemoryCache[slot];
+}
+
+void ExecutionBurstController::MemoryCache::freeMemory(const nn::SharedMemory& memory) {
+ {
+ std::lock_guard guard(mMutex);
+ const int32_t slot = mMemoryIdToSlot.at(memory);
+ if (mBurstContext) {
+ mBurstContext->freeMemory(slot);
+ }
+ mMemoryIdToSlot.erase(memory);
+ mMemoryCache[slot] = {};
+ mCacheCleaner[slot].reset();
+ mFreeSlots.push(slot);
+ }
+ mCond.notify_all();
+}
+
+int32_t ExecutionBurstController::MemoryCache::allocateSlotLocked() {
constexpr size_t kMaxNumberOfSlots = std::numeric_limits<int32_t>::max();
- // if there is a free slot, use it
- if (mFreeSlots.size() > 0) {
+ // If there is a free slot, use it.
+ if (!mFreeSlots.empty()) {
const int32_t slot = mFreeSlots.top();
mFreeSlots.pop();
return slot;
}
- // otherwise use a slot for the first time
- CHECK(mMemoryCache.size() < kMaxNumberOfSlots) << "Exceeded maximum number of slots!";
+ // Use a slot for the first time.
+ CHECK_LT(mMemoryCache.size(), kMaxNumberOfSlots) << "Exceeded maximum number of slots!";
const int32_t slot = static_cast<int32_t>(mMemoryCache.size());
mMemoryCache.emplace_back();
+ mCacheCleaner.emplace_back();
return slot;
}
-std::unique_ptr<ExecutionBurstController> ExecutionBurstController::create(
- const sp<V1_2::IPreparedModel>& preparedModel,
+// ExecutionBurstCallback methods
+
+ExecutionBurstController::ExecutionBurstCallback::ExecutionBurstCallback(
+ const std::shared_ptr<MemoryCache>& memoryCache)
+ : kMemoryCache(memoryCache) {
+ CHECK(memoryCache != nullptr);
+}
+
+Return<void> ExecutionBurstController::ExecutionBurstCallback::getMemories(
+ const hidl_vec<int32_t>& slots, getMemories_cb cb) {
+ const auto memoryCache = kMemoryCache.lock();
+ if (memoryCache == nullptr) {
+ LOG(ERROR) << "ExecutionBurstController::ExecutionBurstCallback::getMemories called after "
+ "the MemoryCache has been freed";
+ cb(V1_0::ErrorStatus::GENERAL_FAILURE, {});
+ return Void();
+ }
+
+ const auto maybeMemories = getMemoriesHelper(slots, memoryCache);
+ if (!maybeMemories.has_value()) {
+ const auto& [message, code] = maybeMemories.error();
+ LOG(ERROR) << "ExecutionBurstController::ExecutionBurstCallback::getMemories failed with "
+ << code << ": " << message;
+ cb(V1_0::ErrorStatus::INVALID_ARGUMENT, {});
+ return Void();
+ }
+
+ cb(V1_0::ErrorStatus::NONE, maybeMemories.value());
+ return Void();
+}
+
+// ExecutionBurstController methods
+
+nn::GeneralResult<std::shared_ptr<const ExecutionBurstController>> ExecutionBurstController::create(
+ const sp<V1_2::IPreparedModel>& preparedModel, FallbackFunction fallback,
std::chrono::microseconds pollingTimeWindow) {
// check inputs
if (preparedModel == nullptr) {
- LOG(ERROR) << "ExecutionBurstController::create passed a nullptr";
- return nullptr;
+ return NN_ERROR() << "ExecutionBurstController::create passed a nullptr";
}
- // create callback object
- sp<ExecutionBurstCallback> callback = new ExecutionBurstCallback();
-
// create FMQ objects
- auto [requestChannelSenderTemp, requestChannelDescriptor] =
- RequestChannelSender::create(kExecutionBurstChannelLength);
- auto [resultChannelReceiverTemp, resultChannelDescriptor] =
- ResultChannelReceiver::create(kExecutionBurstChannelLength, pollingTimeWindow);
- std::shared_ptr<RequestChannelSender> requestChannelSender =
- std::move(requestChannelSenderTemp);
- std::shared_ptr<ResultChannelReceiver> resultChannelReceiver =
- std::move(resultChannelReceiverTemp);
+ auto [requestChannelSender, requestChannelDescriptor] =
+ NN_TRY(RequestChannelSender::create(kExecutionBurstChannelLength));
+ auto [resultChannelReceiver, resultChannelDescriptor] =
+ NN_TRY(ResultChannelReceiver::create(kExecutionBurstChannelLength, pollingTimeWindow));
// check FMQ objects
- if (!requestChannelSender || !resultChannelReceiver || !requestChannelDescriptor ||
- !resultChannelDescriptor) {
- LOG(ERROR) << "ExecutionBurstController::create failed to create FastMessageQueue";
- return nullptr;
- }
+ CHECK(requestChannelSender != nullptr);
+ CHECK(requestChannelDescriptor != nullptr);
+ CHECK(resultChannelReceiver != nullptr);
+ CHECK(resultChannelDescriptor != nullptr);
+
+ // create memory cache
+ auto memoryCache = std::make_shared<MemoryCache>();
+
+ // create callback object
+ auto burstCallback = sp<ExecutionBurstCallback>::make(memoryCache);
+ auto cb = hal::utils::CallbackValue(executionBurstResultCallback);
// configure burst
- V1_0::ErrorStatus errorStatus;
- sp<IBurstContext> burstContext;
- const hardware::Return<void> ret = preparedModel->configureExecutionBurst(
- callback, *requestChannelDescriptor, *resultChannelDescriptor,
- [&errorStatus, &burstContext](V1_0::ErrorStatus status,
- const sp<IBurstContext>& context) {
- errorStatus = status;
- burstContext = context;
- });
+ const Return<void> ret = preparedModel->configureExecutionBurst(
+ burstCallback, *requestChannelDescriptor, *resultChannelDescriptor, cb);
+ HANDLE_TRANSPORT_FAILURE(ret);
- // check burst
- if (!ret.isOk()) {
- LOG(ERROR) << "IPreparedModel::configureExecutionBurst failed with description "
- << ret.description();
- return nullptr;
- }
- if (errorStatus != V1_0::ErrorStatus::NONE) {
- LOG(ERROR) << "IPreparedModel::configureExecutionBurst failed with status "
- << toString(errorStatus);
- return nullptr;
- }
- if (burstContext == nullptr) {
- LOG(ERROR) << "IPreparedModel::configureExecutionBurst returned nullptr for burst";
- return nullptr;
- }
+ auto burstContext = NN_TRY(cb.take());
+ memoryCache->setBurstContext(burstContext);
// create death handler object
- BurstContextDeathHandler::Callback onDeathCallback = [requestChannelSender,
- resultChannelReceiver] {
- requestChannelSender->invalidate();
- resultChannelReceiver->invalidate();
- };
- const sp<BurstContextDeathHandler> deathHandler = new BurstContextDeathHandler(onDeathCallback);
-
- // linkToDeath registers a callback that will be invoked on service death to
- // proactively handle service crashes. If the linkToDeath call fails,
- // asynchronous calls are susceptible to hangs if the service crashes before
- // providing the response.
- const hardware::Return<bool> deathHandlerRet = burstContext->linkToDeath(deathHandler, 0);
- if (!deathHandlerRet.isOk() || deathHandlerRet != true) {
- LOG(ERROR) << "ExecutionBurstController::create -- Failed to register a death recipient "
- "for the IBurstContext object.";
- return nullptr;
- }
+ auto deathHandler = NN_TRY(neuralnetworks::utils::DeathHandler::create(burstContext));
+ deathHandler.protectCallbackForLifetimeOfDeathHandler(requestChannelSender.get());
+ deathHandler.protectCallbackForLifetimeOfDeathHandler(resultChannelReceiver.get());
// make and return controller
- return std::make_unique<ExecutionBurstController>(requestChannelSender, resultChannelReceiver,
- burstContext, callback, deathHandler);
+ return std::make_shared<const ExecutionBurstController>(
+ PrivateConstructorTag{}, std::move(fallback), std::move(requestChannelSender),
+ std::move(resultChannelReceiver), std::move(burstCallback), std::move(burstContext),
+ std::move(memoryCache), std::move(deathHandler));
}
ExecutionBurstController::ExecutionBurstController(
- const std::shared_ptr<RequestChannelSender>& requestChannelSender,
- const std::shared_ptr<ResultChannelReceiver>& resultChannelReceiver,
- const sp<IBurstContext>& burstContext, const sp<ExecutionBurstCallback>& callback,
- const sp<hardware::hidl_death_recipient>& deathHandler)
- : mRequestChannelSender(requestChannelSender),
- mResultChannelReceiver(resultChannelReceiver),
- mBurstContext(burstContext),
- mMemoryCache(callback),
- mDeathHandler(deathHandler) {}
+ PrivateConstructorTag /*tag*/, FallbackFunction fallback,
+ std::unique_ptr<RequestChannelSender> requestChannelSender,
+ std::unique_ptr<ResultChannelReceiver> resultChannelReceiver,
+ sp<ExecutionBurstCallback> callback, sp<IBurstContext> burstContext,
+ std::shared_ptr<MemoryCache> memoryCache, neuralnetworks::utils::DeathHandler deathHandler)
+ : kFallback(std::move(fallback)),
+ mRequestChannelSender(std::move(requestChannelSender)),
+ mResultChannelReceiver(std::move(resultChannelReceiver)),
+ mBurstCallback(std::move(callback)),
+ mBurstContext(std::move(burstContext)),
+ mMemoryCache(std::move(memoryCache)),
+ kDeathHandler(std::move(deathHandler)) {}
-ExecutionBurstController::~ExecutionBurstController() {
- // It is safe to ignore any errors resulting from this unlinkToDeath call
- // because the ExecutionBurstController object is already being destroyed
- // and its underlying IBurstContext object is no longer being used by the NN
- // runtime.
- if (mDeathHandler) {
- mBurstContext->unlinkToDeath(mDeathHandler).isOk();
+ExecutionBurstController::OptionalCacheHold ExecutionBurstController::cacheMemory(
+ const nn::SharedMemory& memory) const {
+ auto [slot, hold] = mMemoryCache->cacheMemory(memory);
+ return hold;
+}
+
+nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>>
+ExecutionBurstController::execute(const nn::Request& request, nn::MeasureTiming measure) const {
+ // This is the first point when we know an execution is occurring, so begin to collect
+ // systraces. Note that the first point we can begin collecting systraces in
+ // ExecutionBurstServer is when the RequestChannelReceiver realizes there is data in the FMQ, so
+ // ExecutionBurstServer collects systraces at different points in the code.
+ NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION, "ExecutionBurstController::execute");
+
+ // if the request is valid but of a higher version than what's supported in burst execution,
+ // fall back to another execution path
+ if (const auto version = NN_TRY(hal::utils::makeExecutionFailure(nn::validate(request)));
+ version > nn::Version::ANDROID_Q) {
+ // fallback to another execution path if the packet could not be sent
+ if (kFallback) {
+ return kFallback(request, measure);
+ }
+ return NN_ERROR() << "Request object has features not supported by IBurst::execute";
}
-}
-static std::tuple<int, std::vector<V1_2::OutputShape>, V1_2::Timing, bool> getExecutionResult(
- V1_0::ErrorStatus status, std::vector<V1_2::OutputShape> outputShapes, V1_2::Timing timing,
- bool fallback) {
- auto [n, checkedOutputShapes, checkedTiming] =
- getExecutionResult(convertToV1_3(status), std::move(outputShapes), timing);
- return {n, convertToV1_2(checkedOutputShapes), convertToV1_2(checkedTiming), fallback};
-}
+ // clear pools field of request, as they will be provided via slots
+ const auto requestWithoutPools =
+ nn::Request{.inputs = request.inputs, .outputs = request.outputs, .pools = {}};
+ auto hidlRequest = NN_TRY(
+ hal::utils::makeExecutionFailure(V1_0::utils::unvalidatedConvert(requestWithoutPools)));
+ const auto hidlMeasure = NN_TRY(hal::utils::makeExecutionFailure(convert(measure)));
-std::tuple<int, std::vector<V1_2::OutputShape>, V1_2::Timing, bool>
-ExecutionBurstController::compute(const V1_0::Request& request, V1_2::MeasureTiming measure,
- const std::vector<intptr_t>& memoryIds) {
- // This is the first point when we know an execution is occurring, so begin
- // to collect systraces. Note that the first point we can begin collecting
- // systraces in ExecutionBurstServer is when the RequestChannelReceiver
- // realizes there is data in the FMQ, so ExecutionBurstServer collects
- // systraces at different points in the code.
- NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION, "ExecutionBurstController::compute");
+ // Ensure that at most one execution is in flight at any given time.
+ const bool alreadyInFlight = mExecutionInFlight.test_and_set();
+ if (alreadyInFlight) {
+ return NN_ERROR() << "IBurst already has an execution in flight";
+ }
+ const auto guard = base::make_scope_guard([this] { mExecutionInFlight.clear(); });
- std::lock_guard<std::mutex> guard(mMutex);
+ std::vector<int32_t> slots;
+ std::vector<OptionalCacheHold> holds;
+ slots.reserve(request.pools.size());
+ holds.reserve(request.pools.size());
+ for (const auto& memoryPool : request.pools) {
+ auto [slot, hold] = mMemoryCache->cacheMemory(std::get<nn::SharedMemory>(memoryPool));
+ slots.push_back(slot);
+ holds.push_back(std::move(hold));
+ }
// send request packet
- const std::vector<int32_t> slots = mMemoryCache->getSlots(request.pools, memoryIds);
- const bool success = mRequestChannelSender->send(request, measure, slots);
- if (!success) {
- LOG(ERROR) << "Error sending FMQ packet";
- // only use fallback execution path if the packet could not be sent
- return getExecutionResult(V1_0::ErrorStatus::GENERAL_FAILURE, {}, kNoTiming12,
- /*fallback=*/true);
+ const auto sendStatus = mRequestChannelSender->send(hidlRequest, hidlMeasure, slots);
+ if (!sendStatus.ok()) {
+ // fallback to another execution path if the packet could not be sent
+ if (kFallback) {
+ return kFallback(request, measure);
+ }
+ return NN_ERROR() << "Error sending FMQ packet: " << sendStatus.error();
}
// get result packet
- const auto result = mResultChannelReceiver->getBlocking();
- if (!result) {
- LOG(ERROR) << "Error retrieving FMQ packet";
- // only use fallback execution path if the packet could not be sent
- return getExecutionResult(V1_0::ErrorStatus::GENERAL_FAILURE, {}, kNoTiming12,
- /*fallback=*/false);
- }
-
- // unpack results and return (only use fallback execution path if the
- // packet could not be sent)
- auto [status, outputShapes, timing] = std::move(*result);
- return getExecutionResult(status, std::move(outputShapes), timing, /*fallback=*/false);
+ const auto [status, outputShapes, timing] =
+ NN_TRY(hal::utils::makeExecutionFailure(mResultChannelReceiver->getBlocking()));
+ return executionCallback(status, outputShapes, timing);
}
-void ExecutionBurstController::freeMemory(intptr_t key) {
- std::lock_guard<std::mutex> guard(mMutex);
-
- bool valid;
- int32_t slot;
- std::tie(valid, slot) = mMemoryCache->freeMemory(key);
- if (valid) {
- mBurstContext->freeMemory(slot).isOk();
- }
-}
-
-} // namespace android::nn
+} // namespace android::hardware::neuralnetworks::V1_2::utils