update_engine: add logs and metrics for libcurl errors
curl_multi_perform may return code that is neither CURLM_OK or
CURLM_CALL_MULTI_PERFORM. When error returns we log them in
update_engine.log and send UMA metrics.
When update_engine does not get http response code from libcurl, we log
internal error code of the libcurl transfer for diagnosis.
Chrome CL to add the metrics enum is here: https://chromium-review.googlesource.com/c/chromium/src/+/1566150
BUG=chromium:927039
TEST=unittest
Change-Id: Ie8ce9dc0a6ce5ff6ffc2ff9425b652d125518558
Reviewed-on: https://chromium-review.googlesource.com/1562172
Commit-Ready: Sean Abraham <seanabraham@chromium.org>
Tested-by: Xiaochu Liu <xiaochu@chromium.org>
Reviewed-by: Xiaochu Liu <xiaochu@chromium.org>
Reviewed-by: Amin Hassani <ahassani@chromium.org>
diff --git a/common/http_fetcher.h b/common/http_fetcher.h
index 2b4fc83..93b0e24 100644
--- a/common/http_fetcher.h
+++ b/common/http_fetcher.h
@@ -29,6 +29,7 @@
#include "update_engine/common/http_common.h"
#include "update_engine/common/proxy_resolver.h"
+#include "update_engine/metrics_constants.h"
// This class is a simple wrapper around an HTTP library (libcurl). We can
// easily mock out this interface for testing.
@@ -200,6 +201,13 @@
// situations. It's OK to destroy the |fetcher| object in this callback.
virtual void TransferComplete(HttpFetcher* fetcher, bool successful) = 0;
virtual void TransferTerminated(HttpFetcher* fetcher) {}
+
+ // This allows |HttpFetcher| to send UMA metrics for its internal states
+ // (unrecoverable libcurl internal error, etc.).
+ virtual void ReportUpdateCheckMetrics(
+ metrics::CheckResult result,
+ metrics::CheckReaction reaction,
+ metrics::DownloadErrorCode download_error_code) {}
};
} // namespace chromeos_update_engine
diff --git a/libcurl_http_fetcher.cc b/libcurl_http_fetcher.cc
index 4e33671..1aa7e8b 100644
--- a/libcurl_http_fetcher.cc
+++ b/libcurl_http_fetcher.cc
@@ -407,6 +407,21 @@
}
}
+ // When retcode is not |CURLM_OK| at this point, libcurl has an internal error
+ // that it is less likely to recover from (libcurl bug, out-of-memory, etc.).
+ // In case of an update check, we send UMA metrics and log the error.
+ if (is_update_check_ &&
+ (retcode == CURLM_OUT_OF_MEMORY || retcode == CURLM_INTERNAL_ERROR)) {
+ delegate_->ReportUpdateCheckMetrics(
+ metrics::CheckResult::kUnset,
+ metrics::CheckReaction::kUnset,
+ metrics::DownloadErrorCode::kInternalError);
+ LOG(ERROR) << "curl_multi_perform is in an unrecoverable error condition: "
+ << retcode;
+ } else if (retcode != CURLM_OK) {
+ LOG(ERROR) << "curl_multi_perform returns error: " << retcode;
+ }
+
// If the transfer completes while paused, we should ignore the failure once
// the fetcher is unpaused.
if (running_handles == 0 && transfer_paused_ && !ignore_failure_) {
@@ -431,6 +446,7 @@
no_network_retry_count_ = 0;
} else {
LOG(ERROR) << "Unable to get http response code.";
+ LogCurlHandleInfo();
}
// we're done!
@@ -756,6 +772,39 @@
CURLINFO_RESPONSE_CODE,
&http_response_code) == CURLE_OK) {
http_response_code_ = static_cast<int>(http_response_code);
+ } else {
+ LOG(ERROR) << "Unable to get http response code from curl_easy_getinfo";
+ }
+}
+
+void LibcurlHttpFetcher::LogCurlHandleInfo() {
+ while (true) {
+ // Repeated calls to |curl_multi_info_read| will return a new struct each
+ // time, until a NULL is returned as a signal that there is no more to get
+ // at this point.
+ int msgs_in_queue;
+ CURLMsg* curl_msg =
+ curl_multi_info_read(curl_multi_handle_, &msgs_in_queue);
+ if (curl_msg == nullptr)
+ break;
+ // When |curl_msg| is |CURLMSG_DONE|, a transfer of an easy handle is done,
+ // and then data contains the return code for this transfer.
+ if (curl_msg->msg == CURLMSG_DONE) {
+ // Make sure |curl_multi_handle_| has one and only one easy handle
+ // |curl_handle_|.
+ CHECK_EQ(curl_handle_, curl_msg->easy_handle);
+ // Transfer return code reference:
+ // https://curl.haxx.se/libcurl/c/libcurl-errors.html
+ LOG(ERROR) << "Return code for the transfer: " << curl_msg->data.result;
+ }
+ }
+
+ // Gets connection error if exists.
+ long connect_error = 0; // NOLINT(runtime/int) - curl needs long.
+ CURLcode res =
+ curl_easy_getinfo(curl_handle_, CURLINFO_OS_ERRNO, &connect_error);
+ if (res == CURLE_OK && connect_error) {
+ LOG(ERROR) << "Connect error code from the OS: " << connect_error;
}
}
diff --git a/libcurl_http_fetcher.h b/libcurl_http_fetcher.h
index 25a2df3..24103de 100644
--- a/libcurl_http_fetcher.h
+++ b/libcurl_http_fetcher.h
@@ -106,6 +106,10 @@
max_retry_count_ = max_retry_count;
}
+ void set_is_update_check(bool is_update_check) {
+ is_update_check_ = is_update_check;
+ }
+
private:
// libcurl's CURLOPT_CLOSESOCKETFUNCTION callback function. Called when
// closing a socket created with the CURLOPT_OPENSOCKETFUNCTION callback.
@@ -118,6 +122,11 @@
// Asks libcurl for the http response code and stores it in the object.
void GetHttpResponseCode();
+ // Logs curl handle info.
+ // This can be called only when an http request failed to avoid spamming the
+ // logs. This must be called after |ResumeTransfer| and before |CleanUp|.
+ void LogCurlHandleInfo();
+
// Checks whether stored HTTP response is within the success range.
inline bool IsHttpResponseSuccess() {
return (http_response_code_ >= 200 && http_response_code_ < 300);
@@ -265,6 +274,9 @@
// ServerToCheck::kNone.
ServerToCheck server_to_check_{ServerToCheck::kNone};
+ // True if this object is for update check.
+ bool is_update_check_{false};
+
int low_speed_limit_bps_{kDownloadLowSpeedLimitBps};
int low_speed_time_seconds_{kDownloadLowSpeedTimeSeconds};
int connect_timeout_seconds_{kDownloadConnectTimeoutSeconds};
diff --git a/metrics_constants.h b/metrics_constants.h
index eabb8fb..b3833a3 100644
--- a/metrics_constants.h
+++ b/metrics_constants.h
@@ -60,6 +60,10 @@
// above block and before the kInputMalformed field. This
// is to ensure that error codes are not reordered.
+ // This error is reported when libcurl has an internal error that
+ // update_engine can't recover from.
+ kInternalError = 99,
+
// This error code is used to convey that malformed input was given
// to the utils::GetDownloadErrorCode() function. This should never
// happen but if it does it's because of an internal update_engine
diff --git a/omaha_request_action.cc b/omaha_request_action.cc
index 5b69ec8..c4adec7 100644
--- a/omaha_request_action.cc
+++ b/omaha_request_action.cc
@@ -981,6 +981,14 @@
}
}
+void OmahaRequestAction::ReportUpdateCheckMetrics(
+ metrics::CheckResult result,
+ metrics::CheckReaction reaction,
+ metrics::DownloadErrorCode download_error_code) {
+ system_state_->metrics_reporter()->ReportUpdateCheckMetrics(
+ system_state_, result, reaction, download_error_code);
+}
+
void OmahaRequestAction::CompleteProcessing() {
ScopedActionCompleter completer(processor_, this);
OmahaResponse& output_object = const_cast<OmahaResponse&>(GetOutputObject());
@@ -1376,8 +1384,7 @@
break;
}
- system_state_->metrics_reporter()->ReportUpdateCheckMetrics(
- system_state_, result, reaction, download_error_code);
+ ReportUpdateCheckMetrics(result, reaction, download_error_code);
}
bool OmahaRequestAction::ShouldIgnoreUpdate(const OmahaResponse& response,
diff --git a/omaha_request_action.h b/omaha_request_action.h
index 8e81af9..f006d69 100644
--- a/omaha_request_action.h
+++ b/omaha_request_action.h
@@ -125,6 +125,11 @@
void TransferComplete(HttpFetcher* fetcher, bool successful) override;
+ void ReportUpdateCheckMetrics(
+ metrics::CheckResult result,
+ metrics::CheckReaction reaction,
+ metrics::DownloadErrorCode download_error_code) override;
+
// Returns true if this is an Event request, false if it's an UpdateCheck.
bool IsEvent() const { return event_.get() != nullptr; }
diff --git a/update_attempter.cc b/update_attempter.cc
index 4bbf5a1..34b7bac 100644
--- a/update_attempter.cc
+++ b/update_attempter.cc
@@ -651,6 +651,7 @@
// Try harder to connect to the network, esp when not interactive.
// See comment in libcurl_http_fetcher.cc.
update_check_fetcher->set_no_network_max_retries(interactive ? 1 : 3);
+ update_check_fetcher->set_is_update_check(true);
auto update_check_action = std::make_unique<OmahaRequestAction>(
system_state_, nullptr, std::move(update_check_fetcher), false);
auto response_handler_action =