update_engine: call res_init and retry one extra time on unresolved host
libcurl error
Based on https://curl.haxx.se/docs/todo.html#updated_DNS_server_while_running:
"If /etc/resolv.conf gets updated while a program using libcurl is running, it
may cause name resolves to fail unless res_init() is called. We should
consider calling res_init() + retry once unconditionally on all name resolve
failures to mitigate against this."
This CL added following behavior:
On libcurl returns CURLE_COULDNT_RESOLVE_HOST error code:
1. we increase the max retry count by 1 for the first time it happens in the
lifetime of an LibcurlHttpFetcher object.
2. we call res_init unconditionally.
We also add UMA metrics to measure whether calling res_init helps
mitigate the unresolved host problem. WIP CL: https://chromium-review.googlesource.com/c/chromium/src/+/1698722
BUG=chromium:982813
TEST=FEATURES="test" emerge-kefka update_engine, tested on a device
Change-Id: Ia894eae93b3a0adbac1a831e657b75cba835dfa0
diff --git a/libcurl_http_fetcher.cc b/libcurl_http_fetcher.cc
index 06722fd..247327a 100644
--- a/libcurl_http_fetcher.cc
+++ b/libcurl_http_fetcher.cc
@@ -16,6 +16,8 @@
#include "update_engine/libcurl_http_fetcher.h"
+#include <netinet/in.h>
+#include <resolv.h>
#include <sys/types.h>
#include <unistd.h>
@@ -480,14 +482,45 @@
if (http_response_code_) {
LOG(INFO) << "HTTP response code: " << http_response_code_;
no_network_retry_count_ = 0;
+ unresolved_host_state_machine_.UpdateState(false);
} else {
LOG(ERROR) << "Unable to get http response code.";
- LogCurlHandleInfo();
+ CURLcode curl_code = GetCurlCode();
+ LOG(ERROR) << "Return code for the transfer: " << curl_code;
+ if (curl_code == CURLE_COULDNT_RESOLVE_HOST) {
+ LOG(ERROR) << "libcurl can not resolve host.";
+ unresolved_host_state_machine_.UpdateState(true);
+ if (delegate_) {
+ delegate_->ReportUpdateCheckMetrics(
+ metrics::CheckResult::kUnset,
+ metrics::CheckReaction::kUnset,
+ metrics::DownloadErrorCode::kUnresolvedHost);
+ }
+ }
}
// we're done!
CleanUp();
+ if (unresolved_host_state_machine_.getState() ==
+ UnresolvedHostStateMachine::State::kRetry) {
+ // Based on
+ // https://curl.haxx.se/docs/todo.html#updated_DNS_server_while_running,
+ // update_engine process should call res_init() and unconditionally retry.
+ res_init();
+ no_network_max_retries_++;
+ LOG(INFO) << "Will retry after reloading resolv.conf because last attempt "
+ "failed to resolve host.";
+ } else if (unresolved_host_state_machine_.getState() ==
+ UnresolvedHostStateMachine::State::kRetriedSuccess) {
+ if (delegate_) {
+ delegate_->ReportUpdateCheckMetrics(
+ metrics::CheckResult::kUnset,
+ metrics::CheckReaction::kUnset,
+ metrics::DownloadErrorCode::kUnresolvedHostRecovered);
+ }
+ }
+
// TODO(petkov): This temporary code tries to deal with the case where the
// update engine performs an update check while the network is not ready
// (e.g., right after resume). Longer term, we should check if the network
@@ -813,7 +846,8 @@
}
}
-void LibcurlHttpFetcher::LogCurlHandleInfo() {
+CURLcode LibcurlHttpFetcher::GetCurlCode() {
+ CURLcode curl_code = CURLE_OK;
while (true) {
// Repeated calls to |curl_multi_info_read| will return a new struct each
// time, until a NULL is returned as a signal that there is no more to get
@@ -831,7 +865,7 @@
CHECK_EQ(curl_handle_, curl_msg->easy_handle);
// Transfer return code reference:
// https://curl.haxx.se/libcurl/c/libcurl-errors.html
- LOG(ERROR) << "Return code for the transfer: " << curl_msg->data.result;
+ curl_code = curl_msg->data.result;
}
}
@@ -842,6 +876,32 @@
if (res == CURLE_OK && connect_error) {
LOG(ERROR) << "Connect error code from the OS: " << connect_error;
}
+
+ return curl_code;
+}
+
+void UnresolvedHostStateMachine::UpdateState(bool failed_to_resolve_host) {
+ switch (state_) {
+ case State::kInit:
+ if (failed_to_resolve_host) {
+ state_ = State::kRetry;
+ }
+ break;
+ case State::kRetry:
+ if (failed_to_resolve_host) {
+ state_ = State::kNotRetry;
+ } else {
+ state_ = State::kRetriedSuccess;
+ }
+ break;
+ case State::kNotRetry:
+ break;
+ case State::kRetriedSuccess:
+ break;
+ default:
+ NOTREACHED();
+ break;
+ }
}
} // namespace chromeos_update_engine