Add resiliency in UE against interrupted HTTP downloads.
Currently when there's an interruption in the HTTP transfer, update_engine
attempts to resume the transfer only 3 times. After that it gives up.
For reasons yet to be investigated (31019), this happens quite consistently
on ARM and hence we need to fix update_engine to be more resilient in such
cases.
The fix is to increase the retry count to 20 for post-OOBE case. We'll
still maintain the original limit of 3 retries for OOBE case so as to not
stall OOBE forever.
BUG=chromeos-31511:Add resiliency in UE against interrupted HTTP downloads
TEST=Existing unit tests hit all new code paths and pass. Manually tested
on ZGB and Kaen.
Change-Id: I4e47761dc9b859701023b749c902ec2b1c649b6f
Reviewed-on: https://gerrit.chromium.org/gerrit/24416
Commit-Ready: Jay Srinivasan <jaysri@chromium.org>
Reviewed-by: Jay Srinivasan <jaysri@chromium.org>
Tested-by: Jay Srinivasan <jaysri@chromium.org>
diff --git a/libcurl_http_fetcher.cc b/libcurl_http_fetcher.cc
index e1e93cb..e2bfc15 100644
--- a/libcurl_http_fetcher.cc
+++ b/libcurl_http_fetcher.cc
@@ -27,7 +27,6 @@
namespace chromeos_update_engine {
namespace {
-const int kMaxRetriesCount = 3;
const int kNoNetworkRetrySeconds = 10;
const char kCACertificatesPath[] = "/usr/share/chromeos-ca-certificates";
} // namespace {}
@@ -214,6 +213,8 @@
transfer_size_ = -1;
resume_offset_ = 0;
retry_count_ = 0;
+ max_retry_count_ = (utils::IsOOBEComplete()) ? kMaxRetryCountOobeComplete
+ : kMaxRetryCountOobeNotComplete;
no_network_retry_count_ = 0;
http_response_code_ = 0;
terminate_requested_ = false;
@@ -306,15 +307,19 @@
delegate_->TransferComplete(this, false); // signal fail
}
} else if ((transfer_size_ >= 0) && (bytes_downloaded_ < transfer_size_)) {
- // Need to restart transfer
retry_count_++;
- LOG(INFO) << "Restarting transfer b/c we finished, had downloaded "
- << bytes_downloaded_ << " bytes, but transfer_size_ is "
- << transfer_size_ << ". retry_count: " << retry_count_;
- if (retry_count_ > kMaxRetriesCount) {
+ LOG(INFO) << "Transfer interrupted after downloading "
+ << bytes_downloaded_ << " of " << transfer_size_ << " bytes. "
+ << transfer_size_ - bytes_downloaded_ << " bytes remaining "
+ << "after " << retry_count_ << " attempt(s)";
+
+ if (retry_count_ > max_retry_count_) {
+ LOG(INFO) << "Reached max attempts (" << retry_count_ << ")";
if (delegate_)
delegate_->TransferComplete(this, false); // signal fail
} else {
+ // Need to restart transfer
+ LOG(INFO) << "Restarting transfer to download the remaining bytes";
g_timeout_add_seconds(retry_seconds_,
&LibcurlHttpFetcher::StaticRetryTimeoutCallback,
this);