update_engine: add logs and metrics for libcurl errors

curl_multi_perform may return code that is neither CURLM_OK or
CURLM_CALL_MULTI_PERFORM. When error returns we log them in
update_engine.log and send UMA metrics.

When update_engine does not get http response code from libcurl, we log
internal error code of the libcurl transfer for diagnosis.

Chrome CL to add the metrics enum is here: https://chromium-review.googlesource.com/c/chromium/src/+/1566150

BUG=chromium:927039
TEST=unittest

Change-Id: Ie8ce9dc0a6ce5ff6ffc2ff9425b652d125518558
Reviewed-on: https://chromium-review.googlesource.com/1562172
Commit-Ready: Sean Abraham <seanabraham@chromium.org>
Tested-by: Xiaochu Liu <xiaochu@chromium.org>
Reviewed-by: Xiaochu Liu <xiaochu@chromium.org>
Reviewed-by: Amin Hassani <ahassani@chromium.org>
diff --git a/common/http_fetcher.h b/common/http_fetcher.h
index 2b4fc83..93b0e24 100644
--- a/common/http_fetcher.h
+++ b/common/http_fetcher.h
@@ -29,6 +29,7 @@
 
 #include "update_engine/common/http_common.h"
 #include "update_engine/common/proxy_resolver.h"
+#include "update_engine/metrics_constants.h"
 
 // This class is a simple wrapper around an HTTP library (libcurl). We can
 // easily mock out this interface for testing.
@@ -200,6 +201,13 @@
   // situations. It's OK to destroy the |fetcher| object in this callback.
   virtual void TransferComplete(HttpFetcher* fetcher, bool successful) = 0;
   virtual void TransferTerminated(HttpFetcher* fetcher) {}
+
+  // This allows |HttpFetcher| to send UMA metrics for its internal states
+  // (unrecoverable libcurl internal error, etc.).
+  virtual void ReportUpdateCheckMetrics(
+      metrics::CheckResult result,
+      metrics::CheckReaction reaction,
+      metrics::DownloadErrorCode download_error_code) {}
 };
 
 }  // namespace chromeos_update_engine
diff --git a/libcurl_http_fetcher.cc b/libcurl_http_fetcher.cc
index 4e33671..1aa7e8b 100644
--- a/libcurl_http_fetcher.cc
+++ b/libcurl_http_fetcher.cc
@@ -407,6 +407,21 @@
     }
   }
 
+  // When retcode is not |CURLM_OK| at this point, libcurl has an internal error
+  // that it is less likely to recover from (libcurl bug, out-of-memory, etc.).
+  // In case of an update check, we send UMA metrics and log the error.
+  if (is_update_check_ &&
+      (retcode == CURLM_OUT_OF_MEMORY || retcode == CURLM_INTERNAL_ERROR)) {
+    delegate_->ReportUpdateCheckMetrics(
+        metrics::CheckResult::kUnset,
+        metrics::CheckReaction::kUnset,
+        metrics::DownloadErrorCode::kInternalError);
+    LOG(ERROR) << "curl_multi_perform is in an unrecoverable error condition: "
+               << retcode;
+  } else if (retcode != CURLM_OK) {
+    LOG(ERROR) << "curl_multi_perform returns error: " << retcode;
+  }
+
   // If the transfer completes while paused, we should ignore the failure once
   // the fetcher is unpaused.
   if (running_handles == 0 && transfer_paused_ && !ignore_failure_) {
@@ -431,6 +446,7 @@
     no_network_retry_count_ = 0;
   } else {
     LOG(ERROR) << "Unable to get http response code.";
+    LogCurlHandleInfo();
   }
 
   // we're done!
@@ -756,6 +772,39 @@
                                CURLINFO_RESPONSE_CODE,
                                &http_response_code) == CURLE_OK) {
     http_response_code_ = static_cast<int>(http_response_code);
+  } else {
+    LOG(ERROR) << "Unable to get http response code from curl_easy_getinfo";
+  }
+}
+
+void LibcurlHttpFetcher::LogCurlHandleInfo() {
+  while (true) {
+    // Repeated calls to |curl_multi_info_read| will return a new struct each
+    // time, until a NULL is returned as a signal that there is no more to get
+    // at this point.
+    int msgs_in_queue;
+    CURLMsg* curl_msg =
+        curl_multi_info_read(curl_multi_handle_, &msgs_in_queue);
+    if (curl_msg == nullptr)
+      break;
+    // When |curl_msg| is |CURLMSG_DONE|, a transfer of an easy handle is done,
+    // and then data contains the return code for this transfer.
+    if (curl_msg->msg == CURLMSG_DONE) {
+      // Make sure |curl_multi_handle_| has one and only one easy handle
+      // |curl_handle_|.
+      CHECK_EQ(curl_handle_, curl_msg->easy_handle);
+      // Transfer return code reference:
+      // https://curl.haxx.se/libcurl/c/libcurl-errors.html
+      LOG(ERROR) << "Return code for the transfer: " << curl_msg->data.result;
+    }
+  }
+
+  // Gets connection error if exists.
+  long connect_error = 0;  // NOLINT(runtime/int) - curl needs long.
+  CURLcode res =
+      curl_easy_getinfo(curl_handle_, CURLINFO_OS_ERRNO, &connect_error);
+  if (res == CURLE_OK && connect_error) {
+    LOG(ERROR) << "Connect error code from the OS: " << connect_error;
   }
 }
 
diff --git a/libcurl_http_fetcher.h b/libcurl_http_fetcher.h
index 25a2df3..24103de 100644
--- a/libcurl_http_fetcher.h
+++ b/libcurl_http_fetcher.h
@@ -106,6 +106,10 @@
     max_retry_count_ = max_retry_count;
   }
 
+  void set_is_update_check(bool is_update_check) {
+    is_update_check_ = is_update_check;
+  }
+
  private:
   // libcurl's CURLOPT_CLOSESOCKETFUNCTION callback function. Called when
   // closing a socket created with the CURLOPT_OPENSOCKETFUNCTION callback.
@@ -118,6 +122,11 @@
   // Asks libcurl for the http response code and stores it in the object.
   void GetHttpResponseCode();
 
+  // Logs curl handle info.
+  // This can be called only when an http request failed to avoid spamming the
+  // logs. This must be called after |ResumeTransfer| and before |CleanUp|.
+  void LogCurlHandleInfo();
+
   // Checks whether stored HTTP response is within the success range.
   inline bool IsHttpResponseSuccess() {
     return (http_response_code_ >= 200 && http_response_code_ < 300);
@@ -265,6 +274,9 @@
   // ServerToCheck::kNone.
   ServerToCheck server_to_check_{ServerToCheck::kNone};
 
+  // True if this object is for update check.
+  bool is_update_check_{false};
+
   int low_speed_limit_bps_{kDownloadLowSpeedLimitBps};
   int low_speed_time_seconds_{kDownloadLowSpeedTimeSeconds};
   int connect_timeout_seconds_{kDownloadConnectTimeoutSeconds};
diff --git a/metrics_constants.h b/metrics_constants.h
index eabb8fb..b3833a3 100644
--- a/metrics_constants.h
+++ b/metrics_constants.h
@@ -60,6 +60,10 @@
   // above block and before the kInputMalformed field. This
   // is to ensure that error codes are not reordered.
 
+  // This error is reported when libcurl has an internal error that
+  // update_engine can't recover from.
+  kInternalError = 99,
+
   // This error code is used to convey that malformed input was given
   // to the utils::GetDownloadErrorCode() function. This should never
   // happen but if it does it's because of an internal update_engine
diff --git a/omaha_request_action.cc b/omaha_request_action.cc
index 5b69ec8..c4adec7 100644
--- a/omaha_request_action.cc
+++ b/omaha_request_action.cc
@@ -981,6 +981,14 @@
   }
 }
 
+void OmahaRequestAction::ReportUpdateCheckMetrics(
+    metrics::CheckResult result,
+    metrics::CheckReaction reaction,
+    metrics::DownloadErrorCode download_error_code) {
+  system_state_->metrics_reporter()->ReportUpdateCheckMetrics(
+      system_state_, result, reaction, download_error_code);
+}
+
 void OmahaRequestAction::CompleteProcessing() {
   ScopedActionCompleter completer(processor_, this);
   OmahaResponse& output_object = const_cast<OmahaResponse&>(GetOutputObject());
@@ -1376,8 +1384,7 @@
       break;
   }
 
-  system_state_->metrics_reporter()->ReportUpdateCheckMetrics(
-      system_state_, result, reaction, download_error_code);
+  ReportUpdateCheckMetrics(result, reaction, download_error_code);
 }
 
 bool OmahaRequestAction::ShouldIgnoreUpdate(const OmahaResponse& response,
diff --git a/omaha_request_action.h b/omaha_request_action.h
index 8e81af9..f006d69 100644
--- a/omaha_request_action.h
+++ b/omaha_request_action.h
@@ -125,6 +125,11 @@
 
   void TransferComplete(HttpFetcher* fetcher, bool successful) override;
 
+  void ReportUpdateCheckMetrics(
+      metrics::CheckResult result,
+      metrics::CheckReaction reaction,
+      metrics::DownloadErrorCode download_error_code) override;
+
   // Returns true if this is an Event request, false if it's an UpdateCheck.
   bool IsEvent() const { return event_.get() != nullptr; }
 
diff --git a/update_attempter.cc b/update_attempter.cc
index 4bbf5a1..34b7bac 100644
--- a/update_attempter.cc
+++ b/update_attempter.cc
@@ -651,6 +651,7 @@
   // Try harder to connect to the network, esp when not interactive.
   // See comment in libcurl_http_fetcher.cc.
   update_check_fetcher->set_no_network_max_retries(interactive ? 1 : 3);
+  update_check_fetcher->set_is_update_check(true);
   auto update_check_action = std::make_unique<OmahaRequestAction>(
       system_state_, nullptr, std::move(update_check_fetcher), false);
   auto response_handler_action =