p2p: Avoid deadlock with two partially updated devices.

Suppose you have two devices A and B on the same LAN and both devices
are using p2p for updates. Suppose that you turn on A and it starts to
update when the hourly update check kicks in. Then you reboot A in the
middle of, say, the 6th operation. At this point it will share a p2p
file with payload for 5.5 operations (any number between 5 and
6). Update checks are deferred so it's currently not updating.

Then you turn on B and once the update check kicks in it sees that A
has the update. Then B downloads and applies whatever it gets from A
and it ends up sharing a file of the same size (5.5 operations). When
A has nothing more to serve, the connection is dropped and B tries
reconnecting kDownloadP2PMaxRetryCount (5) times before failing the
update check.

Now A wakes up for its update check (we're assuming B had time to get
the bytes from A before A's hourly check kicks in). A tries to use p2p
and since it has already completed 5 operations it asks for a peer
with at least 5 ops in it. Since B qualifies (it's sharing 5.5 ops)
and is the only machine on the LAN, A is downloading from B and then
fails in the same way as B did in the paragraph above.

This results in deadlock with neither of the machines making forward
progress. Fortunately, kMaxP2PAttemptTimeSeconds (= two days) and
kMaxP2PAttempts (= 10) saves us in this case since both A and B will
fall back to downloading without p2p.

This CL fixes this problem by always requesting enough bytes to finish
the current operation.

BUG=chromium:297170
TEST=Unit test that kPrefsManifestDataLength is written to + unit
  tests pass. Also did a manual test where I initiated an update and
  then rebooted the device in an environment where the payload was
  available via p2p. After rebooting and triggering a non-interactive
  update check I observed that the --minimum-size value passed to
  p2p-client by update_engine (p2p-client invocations are logged in
  /var/log/messages), 69575742 bytes, was bigger than the number of
  bytes already downloaded (observed by looking at the size of the p2p
  file in /var/cache/p2p for the current attempt), 69540111 bytes.

Change-Id: I5e0e63f137ff139daec6ef8f0c83ce9dc76fb2a9
Reviewed-on: https://chromium-review.googlesource.com/170519
Reviewed-by: Alex Deymo <deymo@chromium.org>
Reviewed-by: Chris Sosa <sosa@chromium.org>
Commit-Queue: David Zeuthen <zeuthen@chromium.org>
Tested-by: David Zeuthen <zeuthen@chromium.org>
diff --git a/constants.cc b/constants.cc
index 11d5901..767dc47 100644
--- a/constants.cc
+++ b/constants.cc
@@ -55,6 +55,7 @@
     "update-over-cellular-permission";
 const char kPrefsUpdateServerCertificate[] = "update-server-cert";
 const char kPrefsUpdateStateNextDataOffset[] = "update-state-next-data-offset";
+const char kPrefsUpdateStateNextDataLength[] = "update-state-next-data-length";
 const char kPrefsUpdateStateNextOperation[] = "update-state-next-operation";
 const char kPrefsUpdateStateSHA256Context[] = "update-state-sha-256-context";
 const char kPrefsUpdateStateSignatureBlob[] = "update-state-signature-blob";
diff --git a/constants.h b/constants.h
index cd809d4..1e69df8 100644
--- a/constants.h
+++ b/constants.h
@@ -58,6 +58,7 @@
 extern const char kPrefsUpdateOverCellularPermission[];
 extern const char kPrefsUpdateServerCertificate[];
 extern const char kPrefsUpdateStateNextDataOffset[];
+extern const char kPrefsUpdateStateNextDataLength[];
 extern const char kPrefsUpdateStateNextOperation[];
 extern const char kPrefsUpdateStateSHA256Context[];
 extern const char kPrefsUpdateStateSignatureBlob[];
diff --git a/delta_performer.cc b/delta_performer.cc
index 68161d2..c20614d 100644
--- a/delta_performer.cc
+++ b/delta_performer.cc
@@ -1147,6 +1147,7 @@
   if (!quick) {
     prefs->SetString(kPrefsUpdateCheckResponseHash, "");
     prefs->SetInt64(kPrefsUpdateStateNextDataOffset, -1);
+    prefs->SetInt64(kPrefsUpdateStateNextDataLength, 0);
     prefs->SetString(kPrefsUpdateStateSHA256Context, "");
     prefs->SetString(kPrefsUpdateStateSignedSHA256Context, "");
     prefs->SetString(kPrefsUpdateStateSignatureBlob, "");
@@ -1167,6 +1168,21 @@
     TEST_AND_RETURN_FALSE(prefs_->SetInt64(kPrefsUpdateStateNextDataOffset,
                                            buffer_offset_));
     last_updated_buffer_offset_ = buffer_offset_;
+
+    if (next_operation_num_ < num_total_operations_) {
+      const bool is_kernel_partition =
+          next_operation_num_ >= num_rootfs_operations_;
+      const DeltaArchiveManifest_InstallOperation &op =
+          is_kernel_partition ?
+          manifest_.kernel_install_operations(
+              next_operation_num_ - num_rootfs_operations_) :
+          manifest_.install_operations(next_operation_num_);
+      TEST_AND_RETURN_FALSE(prefs_->SetInt64(kPrefsUpdateStateNextDataLength,
+                                             op.data_length()));
+    } else {
+      TEST_AND_RETURN_FALSE(prefs_->SetInt64(kPrefsUpdateStateNextDataLength,
+                                             0));
+    }
   }
   TEST_AND_RETURN_FALSE(prefs_->SetInt64(kPrefsUpdateStateNextOperation,
                                          next_operation_num_));
diff --git a/delta_performer_unittest.cc b/delta_performer_unittest.cc
index 523148d..c2b57e2 100644
--- a/delta_performer_unittest.cc
+++ b/delta_performer_unittest.cc
@@ -579,6 +579,8 @@
       .WillOnce(Return(false));
   EXPECT_CALL(prefs, SetInt64(kPrefsUpdateStateNextDataOffset, _))
       .WillRepeatedly(Return(true));
+  EXPECT_CALL(prefs, SetInt64(kPrefsUpdateStateNextDataLength, _))
+      .WillRepeatedly(Return(true));
   EXPECT_CALL(prefs, SetString(kPrefsUpdateStateSHA256Context, _))
       .WillRepeatedly(Return(true));
   if (op_hash_test == kValidOperationData && signature_test != kSignatureNone) {
diff --git a/omaha_request_action.cc b/omaha_request_action.cc
index e833759..016b5a8 100644
--- a/omaha_request_action.cc
+++ b/omaha_request_action.cc
@@ -876,18 +876,33 @@
 }
 
 void OmahaRequestAction::LookupPayloadViaP2P(const OmahaResponse& response) {
-  // The kPrefsUpdateStateNextDataOffset state variable tracks the
-  // offset into the payload of the last completed operation if we're
-  // in the middle of an update. As such, p2p is only useful if the
-  // peer actually has that many bytes.
+  // If the device is in the middle of an update, the state variables
+  // kPrefsUpdateStateNextDataOffset, kPrefsUpdateStateNextDataLength
+  // tracks the offset and length of the operation currently in
+  // progress. The offset is based from the end of the manifest which
+  // is kPrefsManifestMetadataSize bytes long.
+  //
+  // To make forward progress and avoid deadlocks, we need to find a
+  // peer that has at least the entire operation we're currently
+  // working on. Otherwise we may end up in a situation where two
+  // devices bounce back and forth downloading from each other,
+  // neither making any forward progress until one of them decides to
+  // stop using p2p (via kMaxP2PAttempts and kMaxP2PAttemptTimeSeconds
+  // safe-guards). See http://crbug.com/297170 for an example)
   size_t minimum_size = 0;
-  int64_t next_data_offset = -1;
+  int64_t manifest_metadata_size = 0;
+  int64_t next_data_offset = 0;
+  int64_t next_data_length = 0;
   if (system_state_ != NULL &&
+      system_state_->prefs()->GetInt64(kPrefsManifestMetadataSize,
+                                       &manifest_metadata_size) &&
+      manifest_metadata_size != -1 &&
       system_state_->prefs()->GetInt64(kPrefsUpdateStateNextDataOffset,
-                                       &next_data_offset)) {
-    if (next_data_offset > 0) {
-      minimum_size = next_data_offset;
-    }
+                                       &next_data_offset) &&
+      next_data_offset != -1 &&
+      system_state_->prefs()->GetInt64(kPrefsUpdateStateNextDataLength,
+                                       &next_data_length)) {
+    minimum_size = manifest_metadata_size + next_data_offset + next_data_length;
   }
 
   string file_id = utils::CalculateP2PFileId(response.hash, response.size);