Clean up getService().
The logic is now easier to read. We only sleep
in cases where we would otherwise end up in a tight
loop:
1) Waiter failure
2) Binder call into retrieved service fails
Bug: 67425500
Test: boot, hidl_test, hidl_test java
Change-Id: I068e03e1bcd885ddf3ced25c83f239058df94302
diff --git a/transport/ServiceManagement.cpp b/transport/ServiceManagement.cpp
index c9ee29a..71129bf 100644
--- a/transport/ServiceManagement.cpp
+++ b/transport/ServiceManagement.cpp
@@ -467,6 +467,37 @@
}
struct Waiter : IServiceNotification {
+ Waiter(const std::string& interface, const std::string& instanceName,
+ const sp<IServiceManager1_1>& sm) : mInterfaceName(interface),
+ mInstanceName(instanceName), mSm(sm) {
+ Return<bool> ret = mSm->registerForNotifications(interface, instanceName, this);
+
+ if (!ret.isOk()) {
+ LOG(ERROR) << "Transport error, " << ret.description()
+ << ", during notification registration for " << interface << "/"
+ << instanceName << ".";
+ return;
+ }
+
+ if (!ret) {
+ LOG(ERROR) << "Could not register for notifications for " << interface << "/"
+ << instanceName << ".";
+ return;
+ }
+
+ mRegisteredForNotifications = true;
+ }
+
+ ~Waiter() {
+ if (mRegisteredForNotifications) {
+ if (!mSm->unregisterForNotifications(mInterfaceName, mInstanceName, this).
+ withDefault(false)) {
+ LOG(ERROR) << "Could not unregister service notification for "
+ << mInterfaceName << "/" << mInstanceName << ".";
+ }
+ }
+ }
+
Return<void> onRegistration(const hidl_string& /* fqName */,
const hidl_string& /* name */,
bool /* preexisting */) override {
@@ -481,9 +512,16 @@
return Void();
}
- void wait(const std::string &interface, const std::string &instanceName) {
+ void wait() {
using std::literals::chrono_literals::operator""s;
+ if (!mRegisteredForNotifications) {
+ // As an alternative, just sleep for a second and return
+ LOG(WARNING) << "Waiting one second for " << mInterfaceName << "/" << mInstanceName;
+ sleep(1);
+ return;
+ }
+
std::unique_lock<std::mutex> lock(mMutex);
while(true) {
mCondition.wait_for(lock, 1s, [this]{
@@ -494,49 +532,64 @@
break;
}
- LOG(WARNING) << "Waited one second for "
- << interface << "/" << instanceName
+ LOG(WARNING) << "Waited one second for " << mInterfaceName << "/" << mInstanceName
<< ". Waiting another...";
}
}
+ // Be careful when using this; after calling reset(), you must always try to retrieve
+ // the corresponding service before blocking on the waiter; otherwise, you might run
+ // into a race-condition where the service has just (re-)registered, you clear the state
+ // here, and subsequently calling waiter->wait() will block forever.
+ void reset() {
+ std::unique_lock<std::mutex> lock(mMutex);
+ mRegistered = false;
+ }
private:
+ const std::string mInterfaceName;
+ const std::string mInstanceName;
+ const sp<IServiceManager1_1>& mSm;
std::mutex mMutex;
std::condition_variable mCondition;
bool mRegistered = false;
+ bool mRegisteredForNotifications = false;
};
void waitForHwService(
const std::string &interface, const std::string &instanceName) {
- const sp<IServiceManager1_1> manager = defaultServiceManager1_1();
+ sp<Waiter> waiter = new Waiter(interface, instanceName, defaultServiceManager1_1());
+ waiter->wait();
+}
- if (manager == nullptr) {
- LOG(ERROR) << "Could not get default service manager.";
- return;
+// Prints relevant error/warning messages for error return values from
+// details::canCastInterface(), both transaction errors (!castReturn.isOk())
+// as well as actual cast failures (castReturn.isOk() && castReturn = false).
+// Returns 'true' if the error is non-fatal and it's useful to retry
+bool handleCastError(const Return<bool>& castReturn, const std::string& descriptor,
+ const std::string& instance) {
+ if (castReturn.isOk()) {
+ if (castReturn) {
+ details::logAlwaysFatal("Successful cast value passed into handleCastError.");
+ }
+ // This should never happen, and there's not really a point in retrying.
+ ALOGE("getService: received incompatible service (bug in hwservicemanager?) for "
+ "%s/%s.", descriptor.c_str(), instance.c_str());
+ return false;
}
-
- sp<Waiter> waiter = new Waiter();
- Return<bool> ret = manager->registerForNotifications(interface, instanceName, waiter);
-
- if (!ret.isOk()) {
- LOG(ERROR) << "Transport error, " << ret.description()
- << ", during notification registration for "
- << interface << "/" << instanceName << ".";
- return;
+ if (castReturn.isDeadObject()) {
+ ALOGW("getService: found dead hwbinder service for %s/%s.", descriptor.c_str(),
+ instance.c_str());
+ return true;
}
-
- if (!ret) {
- LOG(ERROR) << "Could not register for notifications for "
- << interface << "/" << instanceName << ".";
- return;
- }
-
- waiter->wait(interface, instanceName);
-
- if (!manager->unregisterForNotifications(interface, instanceName, waiter).withDefault(false)) {
- LOG(ERROR) << "Could not unregister service notification for "
- << interface << "/" << instanceName << ".";
- }
+ // This can happen due to:
+ // 1) No SELinux permissions
+ // 2) Other transaction failure (no buffer space, kernel error)
+ // The first isn't recoverable, but the second is.
+ // Since we can't yet differentiate between the two, and clients depend
+ // on us not blocking in case 1), treat this as a fatal error for now.
+ ALOGW("getService: unable to call into hwbinder service for %s/%s.",
+ descriptor.c_str(), instance.c_str());
+ return false;
}
sp<::android::hidl::base::V1_0::IBase> getRawServiceInternal(const std::string& descriptor,
@@ -546,7 +599,7 @@
using ::android::hidl::base::V1_0::IBase;
using ::android::hidl::manager::V1_0::IServiceManager;
- const sp<IServiceManager> sm = defaultServiceManager();
+ const sp<IServiceManager1_1> sm = defaultServiceManager1_1();
if (sm == nullptr) {
ALOGE("getService: defaultServiceManager() is null");
return nullptr;
@@ -580,17 +633,9 @@
const bool vintfLegacy = (transport == Transport::EMPTY);
#endif // LIBHIDL_TARGET_TREBLE
- for (int tries = 0;
- !getStub && (vintfHwbinder || (vintfLegacy && tries == 0)) && (retry || tries < 1);
- tries++) {
- if (tries > 1) {
- ALOGI("getService: Will do try %d for %s/%s in 1s...", tries, descriptor.c_str(),
- instance.c_str());
- sleep(1); // TODO(b/67425500): remove and update waitForHwService function
- }
- if (vintfHwbinder && tries > 0) {
- waitForHwService(descriptor, instance);
- }
+ sp<Waiter> waiter = new Waiter(descriptor, instance, sm);
+ while (!getStub && (vintfHwbinder || vintfLegacy)) {
+ waiter->reset(); // don't reorder this -- see comments on reset()
Return<sp<IBase>> ret = sm->get(descriptor, instance);
if (!ret.isOk()) {
ALOGE("getService: defaultServiceManager()->get returns %s for %s/%s.",
@@ -598,39 +643,22 @@
break;
}
sp<IBase> base = ret;
- if (base == nullptr) {
- if (tries > 0) {
- ALOGW("getService: found unexpected null hwbinder interface for %s/%s.",
- descriptor.c_str(), instance.c_str());
+ if (base != nullptr) {
+ Return<bool> canCastRet =
+ details::canCastInterface(base.get(), descriptor.c_str(), true /* emitError */);
+
+ if (canCastRet.isOk() && canCastRet) {
+ return base; // still needs to be wrapped by Bp class.
}
- continue;
+
+ if (!handleCastError(canCastRet, descriptor, instance)) break;
}
- Return<bool> canCastRet =
- details::canCastInterface(base.get(), descriptor.c_str(), true /* emitError */);
+ // In case of legacy or we were not asked to retry, don't.
+ if (vintfLegacy || !retry) break;
- if (!canCastRet.isOk()) {
- if (canCastRet.isDeadObject()) {
- ALOGW("getService: found dead hwbinder service for %s/%s.", descriptor.c_str(),
- instance.c_str());
- continue;
- }
- // TODO(b/67425500): breaks getService == nullptr => hal available assumptions if the
- // service has a transaction failure (one example of this is if the service's binder
- // buffer is full). If this isn't here, you get an infinite loop when you don't have
- // permission to talk to a service.
- ALOGW("getService: unable to call into hwbinder service for %s/%s.", descriptor.c_str(),
- instance.c_str());
- break;
- }
-
- if (!canCastRet) {
- ALOGW("getService: received incompatible service (bug in hwservicemanager?) for %s/%s.",
- descriptor.c_str(), instance.c_str());
- break;
- }
-
- return base; // still needs to be wrapped by Bp class.
+ ALOGI("getService: Trying again for %s/%s...", descriptor.c_str(), instance.c_str());
+ waiter->wait();
}
if (getStub || vintfPassthru || vintfLegacy) {