getService: re-request HALs after timeout
Lazy HALs are started by hwservicemanager, but there is a possible
race condition:
- HAL is requested to be started and starts
- client queries HAL
- HAL executes query and exits (but init doesn't know this yet)
- client queries HAL
- HAL is requested to be started, but init thinks it is already
running, so it isn't started again
- init reaps HAL process
- client is waiting for HAL indefinitely
This will only happen in a case where a lazy HAL exits right when it is
requesting.
Fixes: 79427800
Test: $ANDROID_BUILD_TOP/system/tools/hidl/test/run_all_device_tests.sh
Change-Id: I723a182a709f2291debad69fc4141ba9a064645e
diff --git a/transport/ServiceManagement.cpp b/transport/ServiceManagement.cpp
index f11acab..8c46506 100644
--- a/transport/ServiceManagement.cpp
+++ b/transport/ServiceManagement.cpp
@@ -537,7 +537,7 @@
return Void();
}
- void wait() {
+ void wait(bool timeout) {
using std::literals::chrono_literals::operator""s;
if (!mRegisteredForNotifications) {
@@ -548,7 +548,7 @@
}
std::unique_lock<std::mutex> lock(mMutex);
- while(true) {
+ do {
mCondition.wait_for(lock, 1s, [this]{
return mRegistered;
});
@@ -557,9 +557,8 @@
break;
}
- LOG(WARNING) << "Waited one second for " << mInterfaceName << "/" << mInstanceName
- << ". Waiting another...";
- }
+ LOG(WARNING) << "Waited one second for " << mInterfaceName << "/" << mInstanceName;
+ } while (!timeout);
}
// Be careful when using this; after calling reset(), you must always try to retrieve
@@ -600,7 +599,7 @@
void waitForHwService(
const std::string &interface, const std::string &instanceName) {
sp<Waiter> waiter = new Waiter(interface, instanceName, defaultServiceManager1_1());
- waiter->wait();
+ waiter->wait(false /* timeout */);
waiter->done();
}
@@ -710,7 +709,7 @@
if (waiter != nullptr) {
ALOGI("getService: Trying again for %s/%s...", descriptor.c_str(), instance.c_str());
- waiter->wait();
+ waiter->wait(true /* timeout */);
}
}