AU: tolerate files that are symlinks in src image, yet not in new image.

This fixes a bug in delta diff generation.

BUG=chromium-os:12090
TEST=unittests; generated delta w/ problematic images

Change-Id: Ic48b012d0d9e4f37edbdcf4140d0fe9c4879e7cd

Review URL: http://codereview.chromium.org/6528006
diff --git a/delta_diff_generator.cc b/delta_diff_generator.cc
index 0d1e4ce..ccdf27a 100644
--- a/delta_diff_generator.cc
+++ b/delta_diff_generator.cc
@@ -55,6 +55,7 @@
 
 namespace {
 const size_t kBlockSize = 4096;  // bytes
+const string kNonexistentPath = "";
 
 // TODO(adlr): switch from 1GiB to 2GiB when we no longer care about old
 // clients:
@@ -97,7 +98,10 @@
   vector<char> data;
   DeltaArchiveManifest_InstallOperation operation;
 
-  TEST_AND_RETURN_FALSE(DeltaDiffGenerator::ReadFileToDiff(old_root + path,
+  string old_path = (old_root == kNonexistentPath) ? kNonexistentPath :
+      old_root + path;
+
+  TEST_AND_RETURN_FALSE(DeltaDiffGenerator::ReadFileToDiff(old_path,
                                                            new_root + path,
                                                            &data,
                                                            &operation,
@@ -141,6 +145,7 @@
                     int data_fd,
                     off_t* data_file_size) {
   set<ino_t> visited_inodes;
+  set<ino_t> visited_src_inodes;
   for (FilesystemIterator fs_iter(new_root,
                                   utils::SetWithValue<string>("/lost+found"));
        !fs_iter.IsEnd(); fs_iter.Increment()) {
@@ -156,10 +161,30 @@
 
     LOG(INFO) << "Encoding file " << fs_iter.GetPartialPath();
 
+    // We can't visit each dst image inode more than once, as that would
+    // duplicate work. Here, we avoid visiting each source image inode
+    // more than once. Technically, we could have multiple operations
+    // that read the same blocks from the source image for diffing, but
+    // we choose not to to avoid complexity. Eventually we will move away
+    // from using a graph/cycle detection/etc to generate diffs, and at that
+    // time, it will be easy (non-complex) to have many operations read
+    // from the same source blocks. At that time, this code can die. -adlr
+    bool should_diff_from_source = true;
+    string src_path = old_root + fs_iter.GetPartialPath();
+    if (utils::FileExists(src_path.c_str())) {
+      struct stat src_stbuf;
+      TEST_AND_RETURN_FALSE_ERRNO(0 == stat(src_path.c_str(), &src_stbuf));
+      should_diff_from_source = !utils::SetContainsKey(visited_src_inodes,
+                                                       src_stbuf.st_ino);
+      visited_src_inodes.insert(src_stbuf.st_ino);
+    }
+
     TEST_AND_RETURN_FALSE(DeltaReadFile(graph,
                                         Vertex::kInvalidIndex,
                                         blocks,
-                                        old_root,
+                                        (should_diff_from_source ?
+                                         old_root :
+                                         kNonexistentPath),
                                         new_root,
                                         fs_iter.GetPartialPath(),
                                         data_fd,
@@ -1160,7 +1185,7 @@
     TEST_AND_RETURN_FALSE(DeltaReadFile(graph,
                                         cut.old_dst,
                                         NULL,
-                                        "/-!@:&*nonexistent_path",
+                                        kNonexistentPath,
                                         new_root,
                                         (*graph)[cut.old_dst].file_name,
                                         data_fd,
diff --git a/delta_performer_unittest.cc b/delta_performer_unittest.cc
index cceffcf..bf4dcc7 100755
--- a/delta_performer_unittest.cc
+++ b/delta_performer_unittest.cc
@@ -38,7 +38,7 @@
 extern const char* kUnittestPublicKeyPath;
 
 namespace {
-  const size_t kBlockSize = 4096;
+const size_t kBlockSize = 4096;
 }  // namespace {}
 
 
@@ -256,6 +256,10 @@
     EXPECT_EQ(0, system(StringPrintf("dd if=/dev/zero of=%s/partsparese bs=1 "
                                      "seek=4096 count=1",
                                      b_mnt.c_str()).c_str()));
+    EXPECT_EQ(0, system(StringPrintf("cp %s/srchardlink0 %s/tmp && "
+                                     "mv %s/tmp %s/srchardlink1",
+                                     b_mnt.c_str(), b_mnt.c_str(),
+                                     b_mnt.c_str(), b_mnt.c_str()).c_str()));
     EXPECT_TRUE(utils::WriteFile(StringPrintf("%s/hardtocompress",
                                               b_mnt.c_str()).c_str(),
                                  reinterpret_cast<const char*>(kRandomString),
diff --git a/test_utils.cc b/test_utils.cc
index 9d1b5d0..3815d7b 100644
--- a/test_utils.cc
+++ b/test_utils.cc
@@ -208,6 +208,9 @@
   EXPECT_EQ(0, System(StringPrintf("ln -s /some/target %s/sym", kMountPath)));
   EXPECT_EQ(0, System(StringPrintf("ln %s/some_dir/test %s/testlink",
                                    kMountPath, kMountPath)));
+  EXPECT_EQ(0, System(StringPrintf("echo T > %s/srchardlink0", kMountPath)));
+  EXPECT_EQ(0, System(StringPrintf("ln %s/srchardlink0 %s/srchardlink1",
+                                   kMountPath, kMountPath)));
   EXPECT_EQ(0, System(StringPrintf("umount -d %s", kMountPath)));
 
   if (out_paths) {
@@ -223,6 +226,8 @@
     out_paths->push_back("/cdev");
     out_paths->push_back("/testlink");
     out_paths->push_back("/sym");
+    out_paths->push_back("/srchardlink0");
+    out_paths->push_back("/srchardlink1");
     out_paths->push_back("/lost+found");
   }
 }