AU: Delta Diff Generator

Adds a class that can take two root filesystem image and generate a
delta between them. Currently it's not very well tested, but this will
improve once the diff applicator is written.

Also, an executable to run the generator.

Other changes:
- Stop leaking loop devices in unittests
- extent mapper: support sparse files, ability to get FS block size
- AppendBlockToExtents support sparse files
- subprocess more verbose on errors
- add WriteAll to utils (WriteAll avoids short-write() returns)
- mkstemp wrapper for ease of use
- VectorIndexOf, finds index of an element in a vector

Review URL: http://codereview.chromium.org/891002
diff --git a/bzip.cc b/bzip.cc
index 4a41980..3ac15d8 100644
--- a/bzip.cc
+++ b/bzip.cc
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
-#include "update_engine/gzip.h"
+#include "update_engine/bzip.h"
 #include <stdlib.h>
 #include <algorithm>
 #include <bzlib.h>
diff --git a/delta_diff_generator.cc b/delta_diff_generator.cc
index 4114c32..4181057 100644
--- a/delta_diff_generator.cc
+++ b/delta_diff_generator.cc
@@ -3,3 +3,863 @@
 // found in the LICENSE file.
 
 #include "update_engine/delta_diff_generator.h"
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <algorithm>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+#include <bzlib.h>
+#include "chromeos/obsolete_logging.h"
+#include "update_engine/bzip.h"
+#include "update_engine/cycle_breaker.h"
+#include "update_engine/extent_mapper.h"
+#include "update_engine/file_writer.h"
+#include "update_engine/filesystem_iterator.h"
+#include "update_engine/graph_types.h"
+#include "update_engine/graph_utils.h"
+#include "update_engine/subprocess.h"
+#include "update_engine/topological_sort.h"
+#include "update_engine/update_metadata.pb.h"
+#include "update_engine/utils.h"
+
+using std::make_pair;
+using std::min;
+using std::set;
+using std::string;
+using std::vector;
+
+namespace chromeos_update_engine {
+
+typedef DeltaDiffGenerator::Block Block;
+
+namespace {
+const size_t kBlockSize = 4096;
+const char* const kBsdiffPath = "/usr/bin/bsdiff";
+const uint64 kVersionNumber = 1;
+const char* const kDeltaMagic = "CrAU";
+
+// Stores all Extents for a file into 'out'. Returns true on success.
+bool GatherExtents(const string& path,
+                   google::protobuf::RepeatedPtrField<Extent>* out) {
+  vector<Extent> extents;
+  TEST_AND_RETURN_FALSE(extent_mapper::ExtentsForFileFibmap(path, &extents));
+  DeltaDiffGenerator::StoreExtents(extents, out);
+  return true;
+}
+
+// Runs the bsdiff tool on two files and returns the resulting delta in
+// 'out'. Returns true on success.
+bool BsdiffFiles(const string& old_file,
+                 const string& new_file,
+                 vector<char>* out) {
+  const string kPatchFile = "/tmp/delta.patchXXXXXX";
+  string patch_file_path;
+
+  TEST_AND_RETURN_FALSE(
+      utils::MakeTempFile(kPatchFile, &patch_file_path, NULL));
+
+  vector<string> cmd;
+  cmd.push_back(kBsdiffPath);
+  cmd.push_back(old_file);
+  cmd.push_back(new_file);
+  cmd.push_back(patch_file_path);
+
+  int rc = 1;
+  vector<char> patch_file;
+  TEST_AND_RETURN_FALSE(Subprocess::SynchronousExec(cmd, &rc));
+  TEST_AND_RETURN_FALSE(rc == 0);
+  TEST_AND_RETURN_FALSE(utils::ReadFile(patch_file_path, out));
+  unlink(patch_file_path.c_str());
+  return true;
+}
+
+// The blocks vector contains a reader and writer for each block on the
+// filesystem that's being in-place updated. We populate the reader/writer
+// fields of blocks by calling this function.
+// For each block in 'operation' that is read or written, find that block
+// in 'blocks' and set the reader/writer field to the vertex passed.
+// 'graph' is not strictly necessary, but useful for printing out
+// error messages.
+bool AddInstallOpToBlocksVector(
+    const DeltaArchiveManifest_InstallOperation& operation,
+    vector<Block>* blocks,
+    const Graph& graph,
+    Vertex::Index vertex) {
+  LOG(INFO) << "AddInstallOpToBlocksVector(" << vertex << "), "
+            << graph[vertex].file_name;
+  // See if this is already present.
+  TEST_AND_RETURN_FALSE(operation.dst_extents_size() > 0);
+  
+  enum BlockField { READER = 0, WRITER, BLOCK_FIELD_COUNT };
+  for (int field = READER; field < BLOCK_FIELD_COUNT; field++) {
+    const int extents_size =
+        (field == READER) ? operation.src_extents_size() :
+        operation.dst_extents_size();
+    const char* past_participle = (field == READER) ? "read" : "written";
+    const google::protobuf::RepeatedPtrField<Extent>& extents =
+        (field == READER) ? operation.src_extents() : operation.dst_extents();
+    Vertex::Index Block::*access_type =
+        (field == READER) ? &Block::reader : &Block::writer;
+
+    for (int i = 0; i < extents_size; i++) {
+      const Extent& extent = extents.Get(i);
+      if (extent.start_block() == kSparseHole) {
+        // Hole in sparse file. skip
+        continue;
+      }
+      for (uint64_t block = extent.start_block();
+           block < (extent.start_block() + extent.num_blocks()); block++) {
+        LOG(INFO) << "ext: " << i << " block: " << block;
+        if ((*blocks)[block].*access_type != Vertex::kInvalidIndex) {
+          LOG(FATAL) << "Block " << block << " is already "
+                     << past_participle << " by "
+                     << (*blocks)[block].*access_type << "("
+                     << graph[(*blocks)[block].*access_type].file_name
+                     << ") and also " << vertex << "("
+                     << graph[vertex].file_name << ")";
+        }
+        (*blocks)[block].*access_type = vertex;
+      }
+    }
+  }
+  return true;
+}
+
+// For a given regular file which must exist at new_root + path, and may
+// exist at old_root + path, creates a new InstallOperation and adds it to
+// the graph. Also, populates the 'blocks' array as necessary.
+// Also, writes the data necessary to send the file down to the client
+// into data_fd, which has length *data_file_size. *data_file_size is
+// updated appropriately.
+// Returns true on success.
+bool DeltaReadFile(Graph* graph,
+                   vector<Block>* blocks,
+                   const string& old_root,
+                   const string& new_root,
+                   const string& path,  // within new_root
+                   int data_fd,
+                   off_t* data_file_size) {
+  vector<char> data;
+  DeltaArchiveManifest_InstallOperation operation;
+
+  TEST_AND_RETURN_FALSE(DeltaDiffGenerator::ReadFileToDiff(old_root + path,
+                                                           new_root + path,
+                                                           &data,
+                                                           &operation));
+
+  // Write the data
+  if (operation.type() != DeltaArchiveManifest_InstallOperation_Type_MOVE) {
+    operation.set_data_offset(*data_file_size);
+    operation.set_data_length(data.size());
+  }
+
+  TEST_AND_RETURN_FALSE(utils::WriteAll(data_fd, &data[0], data.size()));
+  *data_file_size += data.size();
+  
+  // Now, insert into graph and blocks vector
+  graph->resize(graph->size() + 1);
+  graph->back().op = operation;
+  CHECK(graph->back().op.has_type());
+  graph->back().file_name = path;
+  
+  TEST_AND_RETURN_FALSE(AddInstallOpToBlocksVector(graph->back().op,
+                                                   blocks,
+                                                   *graph,
+                                                   graph->size() - 1));
+  return true;
+}
+
+// For each regular file within new_root, creates a node in the graph,
+// determines the best way to compress it (REPLACE, REPLACE_BZ, COPY, BSDIFF),
+// and writes any necessary data to the end of data_fd.
+bool DeltaReadFiles(Graph* graph,
+                    vector<Block>* blocks,
+                    const string& old_root,
+                    const string& new_root,
+                    int data_fd,
+                    off_t* data_file_size) {
+  set<ino_t> visited_inodes;
+  for (FilesystemIterator fs_iter(new_root,
+                                  utils::SetWithValue<string>("/lost+found"));
+       !fs_iter.IsEnd(); fs_iter.Increment()) {
+    if (!S_ISREG(fs_iter.GetStat().st_mode))
+      continue;
+
+    // Make sure we visit each inode only once.
+    if (utils::SetContainsKey(visited_inodes, fs_iter.GetStat().st_ino))
+      continue;
+    visited_inodes.insert(fs_iter.GetStat().st_ino);
+    if (fs_iter.GetStat().st_size == 0)
+      continue;
+
+    LOG(INFO) << "Encoding file " << fs_iter.GetPartialPath();
+    
+    TEST_AND_RETURN_FALSE(DeltaReadFile(graph,
+                                        blocks,
+                                        old_root,
+                                        new_root,
+                                        fs_iter.GetPartialPath(),
+                                        data_fd,
+                                        data_file_size));
+  }
+  return true;
+}
+
+// Attempts to find block_count blocks to use as scratch space.
+// Returns true on success.
+// Right now we return exactly as many blocks as are required.
+// TODO(adlr): consider returning all scratch blocks,
+// even if there are extras, to make it easier for a scratch allocator
+// to find contiguous regions for specific scratch writes.
+bool FindScratchSpace(const vector<Block>& blocks,
+                      vector<Block>::size_type block_count,
+                      vector<Extent>* out) {
+  // Scan blocks for blocks that are neither read nor written.
+  // If we don't find enough of those, return false.
+  // TODO(adlr): return blocks that are written by
+  // operations that don't have incoming edges (and thus, can be
+  // deferred until all old blocks are read by other operations).
+  vector<Extent> ret;
+  vector<Block>::size_type blocks_found = 0;
+  for (vector<Block>::size_type i = 0;
+       i < blocks.size() && blocks_found < block_count; i++) {
+    if (blocks[i].reader == Vertex::kInvalidIndex &&
+        blocks[i].writer == Vertex::kInvalidIndex) {
+      graph_utils::AppendBlockToExtents(&ret, i);
+      blocks_found++;
+    }
+  }
+  if (blocks_found == block_count) {
+    LOG(INFO) << "returning " << blocks_found << " scratch blocks";
+    out->swap(ret);
+    return true;
+  }
+  return false;
+}
+
+// This class takes a collection of Extents and allows the client to
+// allocate space from these extents. The client must not request more
+// space then exists in the source extents. Space is allocated from the
+// beginning of the source extents on; no consideration is paid to
+// fragmentation.
+class LinearExtentAllocator {
+ public:
+  explicit LinearExtentAllocator(const vector<Extent>& extents)
+      : extents_(extents),
+        extent_index_(0),
+        extent_blocks_allocated_(0) {}
+  vector<Extent> Allocate(const uint64_t block_count) {
+    vector<Extent> ret;
+    for (uint64_t blocks = 0; blocks < block_count; blocks++) {
+      CHECK_LT(extent_index_, extents_.size());
+      CHECK_LT(extent_blocks_allocated_, extents_[extent_index_].num_blocks());
+      graph_utils::AppendBlockToExtents(
+          &ret,
+          extents_[extent_index_].start_block() + extent_blocks_allocated_);
+      extent_blocks_allocated_++;
+      if (extent_blocks_allocated_ >= extents_[extent_index_].num_blocks()) {
+        extent_blocks_allocated_ = 0;
+        extent_index_++;
+      }
+    }
+    return ret;
+  }
+ private:
+  const vector<Extent> extents_;
+  vector<Extent>::size_type extent_index_;  // current Extent
+  // number of blocks allocated from the current extent
+  uint64_t extent_blocks_allocated_;
+};
+
+// Reads blocks from image_path that are not yet marked as being written
+// in the blocks array. These blocks that remain are non-file-data blocks.
+// In the future we might consider intelligent diffing between this data
+// and data in the previous image, but for now we just bzip2 compress it
+// and include it in the update.
+// Creates a new node in the graph to write these blocks and writes the
+// appropriate blob to blobs_fd. Reads and updates blobs_length;
+bool ReadUnwrittenBlocks(const vector<Block>& blocks,
+                         int blobs_fd,
+                         off_t* blobs_length,
+                         const string& image_path,
+                         DeltaArchiveManifest_InstallOperation* out_op) {
+  int image_fd = open(image_path.c_str(), O_RDONLY, 000);
+  TEST_AND_RETURN_FALSE_ERRNO(image_fd >= 0);
+  ScopedFdCloser image_fd_closer(&image_fd);
+
+  string temp_file_path;
+  TEST_AND_RETURN_FALSE(utils::MakeTempFile("/tmp/CrAU_temp_data.XXXXXX",
+                                            &temp_file_path,
+                                            NULL));
+
+  FILE* file = fopen(temp_file_path.c_str(), "w");
+  TEST_AND_RETURN_FALSE(file);
+  int err = BZ_OK;
+  
+  BZFILE* bz_file = BZ2_bzWriteOpen(&err,
+                                    file,
+                                    9,  // max compression
+                                    0,  // verbosity
+                                    0);  // default work factor
+  TEST_AND_RETURN_FALSE(err == BZ_OK);
+  
+  vector<Extent> extents;
+  vector<Block>::size_type block_count = 0;
+  
+  LOG(INFO) << "Appending left over blocks to extents";
+  for (vector<Block>::size_type i = 0; i < blocks.size(); i++) {
+    if (blocks[i].writer != Vertex::kInvalidIndex)
+      continue;
+    graph_utils::AppendBlockToExtents(&extents, i);
+    block_count++;
+  }
+
+  // Code will handle 'buf' at any size that's a multiple of kBlockSize,
+  // so we arbitrarily set it to 1024 * kBlockSize.
+  vector<char> buf(1024 * kBlockSize);
+
+  LOG(INFO) << "Reading left over blocks";
+  vector<Block>::size_type blocks_copied_count = 0;
+
+  // For each extent in extents, write the data into BZ2_bzWrite which
+  // sends it to an output file.
+  // We use the temporary buffer 'buf' to hold the data, which may be
+  // smaller than the extent, so in that case we have to loop to get
+  // the extent's data (that's the inner while loop).
+  for (vector<Extent>::const_iterator it = extents.begin();
+       it != extents.end(); ++it) {
+    vector<Block>::size_type blocks_read = 0;
+    while (blocks_read < it->num_blocks()) {
+      const int copy_block_cnt =
+          min(buf.size() / kBlockSize,
+              static_cast<vector<char>::size_type>(
+                  it->num_blocks() - blocks_read));
+      ssize_t rc = pread(image_fd,
+                         &buf[0],
+                         copy_block_cnt * kBlockSize,
+                         (it->start_block() + blocks_read) * kBlockSize);
+      TEST_AND_RETURN_FALSE_ERRNO(rc >= 0);
+      TEST_AND_RETURN_FALSE(static_cast<size_t>(rc) ==
+                            copy_block_cnt * kBlockSize);
+      BZ2_bzWrite(&err, bz_file, &buf[0], copy_block_cnt * kBlockSize);
+      TEST_AND_RETURN_FALSE(err == BZ_OK);
+      blocks_read += copy_block_cnt;
+      blocks_copied_count += copy_block_cnt;
+      LOG(INFO) << "progress: " << ((float)blocks_copied_count)/block_count;
+    }
+  }
+  BZ2_bzWriteClose(&err, bz_file, 0, NULL, NULL);
+  TEST_AND_RETURN_FALSE(err == BZ_OK);
+  bz_file = NULL;
+  TEST_AND_RETURN_FALSE_ERRNO(0 == fclose(file));
+  file = NULL;
+  
+  vector<char> compressed_data;
+  LOG(INFO) << "Reading compressed data off disk";
+  TEST_AND_RETURN_FALSE(utils::ReadFile(temp_file_path, &compressed_data));
+  TEST_AND_RETURN_FALSE(unlink(temp_file_path.c_str()) == 0);
+  
+  // Add node to graph to write these blocks
+  out_op->set_type(DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ);
+  out_op->set_data_offset(*blobs_length);
+  out_op->set_data_length(compressed_data.size());
+  *blobs_length += compressed_data.size();
+  out_op->set_dst_length(kBlockSize * block_count);
+  DeltaDiffGenerator::StoreExtents(extents, out_op->mutable_dst_extents());
+  
+  TEST_AND_RETURN_FALSE(utils::WriteAll(blobs_fd,
+                                        &compressed_data[0],
+                                        compressed_data.size()));
+  LOG(INFO) << "done with extra blocks";
+  return true;
+}
+
+// Writes the uint64 passed in in host-endian to the file as big-endian.
+// Returns true on success.
+bool WriteUint64AsBigEndian(FileWriter* writer, const uint64 value) {
+  uint64 value_be = htobe64(value);
+  TEST_AND_RETURN_FALSE(writer->Write(&value_be, sizeof(value_be)) ==
+                        sizeof(value_be));
+  return true;
+}
+
+// Adds each operation from the graph to the manifest in the order
+// specified by 'order'.
+void InstallOperationsToManifest(
+    const Graph& graph,
+    const vector<Vertex::Index>& order,
+    DeltaArchiveManifest* out_manifest) {
+  for (vector<Vertex::Index>::const_iterator it = order.begin();
+       it != order.end(); ++it) {
+    DeltaArchiveManifest_InstallOperation* op =
+        out_manifest->add_install_operations();
+    *op = graph[*it].op;
+  }
+}
+
+void CheckGraph(const Graph& graph) {
+  for (Graph::const_iterator it = graph.begin(); it != graph.end(); ++it) {
+    CHECK(it->op.has_type());
+  }
+}
+
+}  // namespace {}
+
+bool DeltaDiffGenerator::ReadFileToDiff(
+    const string& old_filename,
+    const string& new_filename,
+    vector<char>* out_data,
+    DeltaArchiveManifest_InstallOperation* out_op) {
+  // Read new data in
+  vector<char> new_data;
+  TEST_AND_RETURN_FALSE(utils::ReadFile(new_filename, &new_data));
+  
+  TEST_AND_RETURN_FALSE(!new_data.empty());
+  
+  vector<char> new_data_bz;
+  TEST_AND_RETURN_FALSE(BzipCompress(new_data, &new_data_bz));
+  CHECK(!new_data_bz.empty());
+
+  vector<char> data;  // Data blob that will be written to delta file.
+
+  DeltaArchiveManifest_InstallOperation operation;
+  size_t current_best_size = 0;
+  if (new_data.size() <= new_data_bz.size()) {
+    operation.set_type(DeltaArchiveManifest_InstallOperation_Type_REPLACE);
+    current_best_size = new_data.size();
+    data = new_data;
+  } else {
+    operation.set_type(DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ);
+    current_best_size = new_data_bz.size();
+    data = new_data_bz;
+  }
+
+  // Do we have an original file to consider?
+  struct stat old_stbuf;
+  if (0 != stat(old_filename.c_str(), &old_stbuf)) {
+    // If stat-ing the old file fails, it should be because it doesn't exist.
+    TEST_AND_RETURN_FALSE(errno == ENOTDIR || errno == ENOENT);
+  } else {
+    // Read old data
+    vector<char> old_data;
+    TEST_AND_RETURN_FALSE(utils::ReadFile(old_filename, &old_data));
+    if (old_data == new_data) {
+      // No change in data.
+      operation.set_type(DeltaArchiveManifest_InstallOperation_Type_MOVE);
+      current_best_size = 0;
+      data.clear();
+    } else {
+      // Try bsdiff of old to new data
+      vector<char> bsdiff_delta;
+      TEST_AND_RETURN_FALSE(
+          BsdiffFiles(old_filename, new_filename, &bsdiff_delta));
+      CHECK_GT(bsdiff_delta.size(), 0);
+      if (bsdiff_delta.size() < current_best_size) {
+        operation.set_type(DeltaArchiveManifest_InstallOperation_Type_BSDIFF);
+        current_best_size = bsdiff_delta.size();
+        
+        data = bsdiff_delta;
+      }
+    }
+  }
+  
+  // Set parameters of the operations
+  CHECK_EQ(data.size(), current_best_size);
+  
+  if (operation.type() == DeltaArchiveManifest_InstallOperation_Type_MOVE ||
+      operation.type() == DeltaArchiveManifest_InstallOperation_Type_BSDIFF) {
+    TEST_AND_RETURN_FALSE(
+        GatherExtents(old_filename, operation.mutable_src_extents()));
+    operation.set_src_length(old_stbuf.st_size);
+  }
+
+  TEST_AND_RETURN_FALSE(
+      GatherExtents(new_filename, operation.mutable_dst_extents()));
+  operation.set_dst_length(new_data.size());
+  
+  out_data->swap(data);
+  *out_op = operation;
+  
+  return true;
+}
+
+void DeltaDiffGenerator::SubstituteBlocks(
+    DeltaArchiveManifest_InstallOperation* op,
+    const vector<Extent>& remove_extents,
+    const vector<Extent>& replace_extents) {
+  // First, expand out the blocks that op reads from
+  vector<uint64> read_blocks;
+  for (int i = 0; i < op->src_extents_size(); i++) {
+    const Extent& extent = op->src_extents(i);
+    if (extent.start_block() == kSparseHole) {
+      read_blocks.resize(read_blocks.size() + extent.num_blocks(), kSparseHole);
+    } else {
+      for (uint64 block = extent.start_block();
+           block < (extent.start_block() + extent.num_blocks()); block++) {
+        read_blocks.push_back(block);
+      }
+    }
+  }
+  {
+    // Expand remove_extents and replace_extents
+    vector<uint64> remove_extents_expanded;
+    for (vector<Extent>::const_iterator it = remove_extents.begin();
+         it != remove_extents.end(); ++it) {
+      const Extent& extent = *it;
+      for (uint64 block = extent.start_block();
+           block < (extent.start_block() + extent.num_blocks()); block++) {
+        remove_extents_expanded.push_back(block);
+      }
+    }
+    vector<uint64> replace_extents_expanded;
+    for (vector<Extent>::const_iterator it = replace_extents.begin();
+         it != replace_extents.end(); ++it) {
+      const Extent& extent = *it;
+      for (uint64 block = extent.start_block();
+           block < (extent.start_block() + extent.num_blocks()); block++) {
+        replace_extents_expanded.push_back(block);
+      }
+    }
+    CHECK_EQ(remove_extents_expanded.size(), replace_extents_expanded.size());
+    for (vector<uint64>::size_type i = 0;
+         i < replace_extents_expanded.size(); i++) {
+      vector<uint64>::size_type index = 0;
+      CHECK(utils::VectorIndexOf(read_blocks,
+                                 remove_extents_expanded[i],
+                                 &index));
+      CHECK(read_blocks[index] == remove_extents_expanded[i]);
+      read_blocks[index] = replace_extents_expanded[i];
+    }
+  }
+  // Convert read_blocks back to extents
+  op->clear_src_extents();
+  vector<Extent> new_extents;
+  for (vector<uint64>::const_iterator it = read_blocks.begin();
+       it != read_blocks.end(); ++it) {
+    graph_utils::AppendBlockToExtents(&new_extents, *it);
+  }
+  DeltaDiffGenerator::StoreExtents(new_extents, op->mutable_src_extents());
+}
+
+bool DeltaDiffGenerator::CutEdges(Graph* graph,
+                                  const vector<Block>& blocks,
+                                  const set<Edge>& edges) {
+  // First, find enough scratch space for the edges we'll be cutting.
+  vector<Block>::size_type blocks_required = 0;
+  for (set<Edge>::const_iterator it = edges.begin(); it != edges.end(); ++it) {
+    blocks_required += graph_utils::EdgeWeight(*graph, *it);
+  }
+  vector<Extent> scratch_extents;
+  LOG(INFO) << "requesting " << blocks_required << " blocks of scratch";
+  TEST_AND_RETURN_FALSE(
+      FindScratchSpace(blocks, blocks_required, &scratch_extents));
+  LinearExtentAllocator scratch_allocator(scratch_extents);
+  
+  uint64_t scratch_blocks_used = 0;
+  for (set<Edge>::const_iterator it = edges.begin();
+       it != edges.end(); ++it) {
+    vector<Extent> old_extents =
+        (*graph)[it->first].out_edges[it->second].extents;
+    // Choose some scratch space
+    scratch_blocks_used += graph_utils::EdgeWeight(*graph, *it);
+    LOG(INFO) << "using " << graph_utils::EdgeWeight(*graph, *it)
+              << " scratch blocks ("
+              << scratch_blocks_used << ")";
+    vector<Extent> scratch =
+        scratch_allocator.Allocate(graph_utils::EdgeWeight(*graph, *it));
+    // create vertex to copy original->scratch
+    graph->resize(graph->size() + 1);
+    
+    // make node depend on the copy operation
+    (*graph)[it->first].out_edges.insert(make_pair(graph->size() - 1,
+                                                   EdgeProperties()));
+
+    // Set src/dst extents and other proto variables for copy operation
+    graph->back().op.set_type(DeltaArchiveManifest_InstallOperation_Type_MOVE);
+    DeltaDiffGenerator::StoreExtents(
+        (*graph)[it->first].out_edges[it->second].extents,
+        graph->back().op.mutable_src_extents());
+    DeltaDiffGenerator::StoreExtents(scratch,
+                                     graph->back().op.mutable_dst_extents());
+    graph->back().op.set_src_length(
+        graph_utils::EdgeWeight(*graph, *it) * kBlockSize);
+    graph->back().op.set_dst_length(graph->back().op.src_length());
+
+    // make the dest node read from the scratch space
+    DeltaDiffGenerator::SubstituteBlocks(
+        &((*graph)[it->second].op),
+        (*graph)[it->first].out_edges[it->second].extents,
+        scratch);
+
+    // delete the old edge
+    CHECK_EQ(1, (*graph)[it->first].out_edges.erase(it->second));
+  }
+  return true;
+}
+
+// Stores all Extents in 'extents' into 'out'.
+void DeltaDiffGenerator::StoreExtents(
+    vector<Extent>& extents,
+    google::protobuf::RepeatedPtrField<Extent>* out) {
+  for (vector<Extent>::const_iterator it = extents.begin();
+       it != extents.end(); ++it) {
+    Extent* new_extent = out->Add();
+    *new_extent = *it;
+  }
+}
+
+// Creates all the edges for the graph. Writers of a block point to
+// readers of the same block. This is because for an edge A->B, B
+// must complete before A executes.
+void DeltaDiffGenerator::CreateEdges(Graph* graph,
+                                     const vector<Block>& blocks) {
+  for (vector<Block>::size_type i = 0; i < blocks.size(); i++) {
+    // Blocks with both a reader and writer get an edge
+    if (blocks[i].reader == Vertex::kInvalidIndex ||
+        blocks[i].writer == Vertex::kInvalidIndex)
+      continue;
+    // Don't have a node depend on itself
+    if (blocks[i].reader == blocks[i].writer)
+      continue;
+    // See if there's already an edge we can add onto
+    Vertex::EdgeMap::iterator edge_it =
+        (*graph)[blocks[i].writer].out_edges.find(blocks[i].reader);
+    if (edge_it == (*graph)[blocks[i].writer].out_edges.end()) {
+      // No existing edge. Create one
+      (*graph)[blocks[i].writer].out_edges.insert(
+          make_pair(blocks[i].reader, EdgeProperties()));
+      edge_it = (*graph)[blocks[i].writer].out_edges.find(blocks[i].reader);
+      CHECK_NE(edge_it, (*graph)[blocks[i].writer].out_edges.end());
+    }
+    graph_utils::AppendBlockToExtents(&edge_it->second.extents, i);
+  }
+}
+
+bool DeltaDiffGenerator::ReorderDataBlobs(
+    DeltaArchiveManifest* manifest,
+    const std::string& data_blobs_path,
+    const std::string& new_data_blobs_path) {
+  int in_fd = open(data_blobs_path.c_str(), O_RDONLY, 0);
+  TEST_AND_RETURN_FALSE_ERRNO(in_fd >= 0);
+  ScopedFdCloser in_fd_closer(&in_fd);
+  
+  DirectFileWriter writer;
+  TEST_AND_RETURN_FALSE(
+      writer.Open(new_data_blobs_path.c_str(),
+                  O_WRONLY | O_TRUNC | O_CREAT,
+                  0644) == 0);
+  ScopedFileWriterCloser writer_closer(&writer);
+  uint64 out_file_size = 0;
+  
+  for (int i = 0; i < manifest->install_operations_size(); i++) {
+    DeltaArchiveManifest_InstallOperation* op =
+        manifest->mutable_install_operations(i);
+    if (!op->has_data_offset())
+      continue;
+    CHECK(op->has_data_length());
+    vector<char> buf(op->data_length());
+    ssize_t rc = pread(in_fd, &buf[0], buf.size(), op->data_offset());
+    TEST_AND_RETURN_FALSE(rc == static_cast<ssize_t>(buf.size()));
+
+    op->set_data_offset(out_file_size);
+    TEST_AND_RETURN_FALSE(writer.Write(&buf[0], buf.size()) ==
+                          static_cast<ssize_t>(buf.size()));
+    out_file_size += buf.size();
+  }
+  return true;
+}
+
+bool DeltaDiffGenerator::GenerateDeltaUpdateFile(const string& old_root,
+                                                 const string& old_image,
+                                                 const string& new_root,
+                                                 const string& new_image,
+                                                 const string& output_path) {
+  struct stat old_image_stbuf;
+  TEST_AND_RETURN_FALSE_ERRNO(stat(old_image.c_str(), &old_image_stbuf) == 0);
+  struct stat new_image_stbuf;
+  TEST_AND_RETURN_FALSE_ERRNO(stat(new_image.c_str(), &new_image_stbuf) == 0);
+  LOG_IF(WARNING, new_image_stbuf.st_size != old_image_stbuf.st_size)
+      << "Old and new images are different sizes.";
+  LOG_IF(FATAL, new_image_stbuf.st_size % kBlockSize)
+      << "New image not a multiple of block size " << kBlockSize;
+  LOG_IF(FATAL, old_image_stbuf.st_size % kBlockSize)
+      << "Old image not a multiple of block size " << kBlockSize;
+
+  vector<Block> blocks(min(old_image_stbuf.st_size / kBlockSize,
+                           new_image_stbuf.st_size / kBlockSize));
+  LOG(INFO) << "blocks (orig): " << (uint32)(&blocks);
+  LOG(INFO) << "w:" << blocks[4097].writer;
+  LOG(INFO) << "invalid: " << Vertex::kInvalidIndex;
+  LOG(INFO) << "len: " << blocks.size();
+  for (vector<Block>::size_type i = 0; i < blocks.size(); i++) {
+    CHECK(blocks[i].reader == Vertex::kInvalidIndex);
+    CHECK(blocks[i].writer == Vertex::kInvalidIndex);
+  }
+  Graph graph;
+  CheckGraph(graph);
+  
+  const string kTempFileTemplate("/tmp/CrAU_temp_data.XXXXXX");
+  string temp_file_path;
+  off_t data_file_size = 0;
+
+  LOG(INFO) << "Reading files...";
+
+  DeltaArchiveManifest_InstallOperation final_op;
+  {
+    int fd;
+    TEST_AND_RETURN_FALSE(
+        utils::MakeTempFile(kTempFileTemplate, &temp_file_path, &fd));
+    TEST_AND_RETURN_FALSE(fd >= 0);
+    ScopedFdCloser fd_closer(&fd);
+  
+    TEST_AND_RETURN_FALSE(DeltaReadFiles(&graph,
+                                         &blocks,
+                                         old_root,
+                                         new_root,
+                                         fd,
+                                         &data_file_size));
+    CheckGraph(graph);
+                                         
+    // TODO(adlr): read all the rest of the blocks in
+    TEST_AND_RETURN_FALSE(ReadUnwrittenBlocks(blocks,
+                                              fd,
+                                              &data_file_size,
+                                              new_image,
+                                              &final_op));  
+  }
+  CheckGraph(graph);
+  
+  LOG(INFO) << "Creating edges...";
+  CreateEdges(&graph, blocks);
+  CheckGraph(graph);
+  
+  CycleBreaker cycle_breaker;
+  LOG(INFO) << "Finding cycles...";
+  set<Edge> cut_edges;
+  cycle_breaker.BreakCycles(graph, &cut_edges);
+  CheckGraph(graph);
+
+  // Calculate number of scratch blocks needed
+
+  LOG(INFO) << "Cutting cycles...";
+  TEST_AND_RETURN_FALSE(CutEdges(&graph, blocks, cut_edges));
+  CheckGraph(graph);
+
+  vector<Vertex::Index> final_order;
+  LOG(INFO) << "Ordering...";
+  TopologicalSort(graph, &final_order);
+  CheckGraph(graph);
+  
+  // Convert to protobuf Manifest object
+  DeltaArchiveManifest manifest;
+  CheckGraph(graph);
+  InstallOperationsToManifest(graph, final_order, &manifest);
+  {
+    // Write final operation
+    DeltaArchiveManifest_InstallOperation* op =
+        manifest.add_install_operations();
+    *op = final_op;
+    CHECK(op->has_type());
+    LOG(INFO) << "final op length: " << op->data_length();
+  }
+  CheckGraph(graph);
+  manifest.set_block_size(kBlockSize);
+  // TODO(adlr): set checksums
+
+  // Reorder the data blobs with the newly ordered manifest
+  string ordered_blobs_path;
+  TEST_AND_RETURN_FALSE(utils::MakeTempFile(
+      "/tmp/CrAU_temp_data.ordered.XXXXXX",
+      &ordered_blobs_path,
+      false));
+  TEST_AND_RETURN_FALSE(ReorderDataBlobs(&manifest,
+                                         temp_file_path,
+                                         ordered_blobs_path));
+
+  // Check that install op blobs are in order and that all blocks are written.
+  {
+    vector<uint32> written_count(blocks.size(), 0);
+    uint64 next_blob_offset = 0;
+    for (int i = 0; i < manifest.install_operations_size(); i++) {
+      const DeltaArchiveManifest_InstallOperation& op =
+          manifest.install_operations(i);
+      for (int j = 0; j < op.dst_extents_size(); j++) {
+        const Extent& extent = op.dst_extents(j);
+        for (uint64 block = extent.start_block();
+             block < (extent.start_block() + extent.num_blocks()); block++) {
+          written_count[block]++;
+        }
+      }
+      if (op.has_data_offset()) {
+        if (op.data_offset() != next_blob_offset) {
+          LOG(FATAL) << "bad blob offset! " << op.data_offset() << " != "
+                     << next_blob_offset;
+        }
+        next_blob_offset += op.data_length();
+      }
+    }
+    // check all blocks written to
+    for (vector<uint32>::size_type i = 0; i < written_count.size(); i++) {
+      if (written_count[i] == 0) {
+        LOG(FATAL) << "block " << i << " not written!";
+      }
+    }
+  }
+
+  // Serialize protobuf
+  string serialized_manifest;
+  
+  CheckGraph(graph);
+  TEST_AND_RETURN_FALSE(manifest.AppendToString(&serialized_manifest));
+  CheckGraph(graph);
+
+  LOG(INFO) << "Writing final delta file header...";
+  DirectFileWriter writer;
+  TEST_AND_RETURN_FALSE_ERRNO(writer.Open(output_path.c_str(),
+                                          O_WRONLY | O_CREAT | O_TRUNC,
+                                          0644) == 0);
+  ScopedFileWriterCloser writer_closer(&writer);
+  
+  // Write header
+  TEST_AND_RETURN_FALSE(writer.Write(kDeltaMagic, strlen(kDeltaMagic)) ==
+                        strlen(kDeltaMagic));
+  
+  // Write version number
+  TEST_AND_RETURN_FALSE(WriteUint64AsBigEndian(&writer, kVersionNumber));
+  
+  // Write protobuf length
+  TEST_AND_RETURN_FALSE(WriteUint64AsBigEndian(&writer,
+                                               serialized_manifest.size()));
+  
+  // Write protobuf
+  LOG(INFO) << "Writing final delta file protobuf... "
+            << serialized_manifest.size();
+  TEST_AND_RETURN_FALSE(writer.Write(serialized_manifest.data(),
+                                     serialized_manifest.size()) ==
+                        static_cast<ssize_t>(serialized_manifest.size()));
+  
+  // Append the data blobs
+  LOG(INFO) << "Writing final delta file data blobs...";
+  int blobs_fd = open(temp_file_path.c_str(), O_RDONLY, 0);
+  ScopedFdCloser blobs_fd_closer(&blobs_fd);
+  TEST_AND_RETURN_FALSE(blobs_fd >= 0);
+  for (;;) {
+    char buf[kBlockSize];
+    ssize_t rc = read(blobs_fd, buf, sizeof(buf));
+    if (0 == rc) {
+      // EOF
+      break;
+    }
+    TEST_AND_RETURN_FALSE_ERRNO(rc > 0);
+    TEST_AND_RETURN_FALSE(writer.Write(buf, rc) == rc);
+  }
+  
+  LOG(INFO) << "All done. Successfully created delta file.";
+  return true;
+}
+
+};  // namespace chromeos_update_engine
diff --git a/delta_diff_generator.h b/delta_diff_generator.h
index 6b232da..6e48519 100644
--- a/delta_diff_generator.h
+++ b/delta_diff_generator.h
@@ -1,15 +1,111 @@
-// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
 #ifndef CHROMEOS_PLATFORM_UPDATE_ENGINE_DELTA_DIFF_GENERATOR_H__
 #define CHROMEOS_PLATFORM_UPDATE_ENGINE_DELTA_DIFF_GENERATOR_H__
 
+#include <string>
+#include <vector>
 #include "base/basictypes.h"
+#include "update_engine/graph_types.h"
+#include "update_engine/update_metadata.pb.h"
+
+// There is one function in DeltaDiffGenerator of importance to users
+// of the class: GenerateDeltaUpdateFile(). Before calling it,
+// the old and new images must be mounted. Call GenerateDeltaUpdateFile()
+// with both the mount-points of the images in addition to the paths of
+// the images (both old and new). A delta from old to new will be
+// generated and stored in output_path.
 
 namespace chromeos_update_engine {
 
 class DeltaDiffGenerator {
+ public:
+  // Represents a disk block on the install partition.
+  struct Block {
+    // During install, each block on the install partition will be written
+    // and some may be read (in all likelihood, many will be read).
+    // The reading and writing will be performed by InstallOperations,
+    // each of which has a corresponding vertex in a graph.
+    // A Block object tells which vertex will read or write this block
+    // at install time.
+    // Generally, there will be a vector of Block objects whose length
+    // is the number of blocks on the install partition.
+    Block() : reader(Vertex::kInvalidIndex), writer(Vertex::kInvalidIndex) {}
+    Vertex::Index reader;
+    Vertex::Index writer;
+  };
+
+  // This is the only function that external users of the class should call.
+  // old_image and new_image are paths to two image files. They should be
+  // mounted read-only at paths old_root and new_root respectively.
+  // output_path is the filename where the delta update should be written.
+  // Returns true on success.
+  static bool GenerateDeltaUpdateFile(const std::string& old_root,
+                                      const std::string& old_image,
+                                      const std::string& new_root,
+                                      const std::string& new_image,
+                                      const std::string& output_path);
+
+  // These functions are public so that the unit tests can access them:
+
+  // Reads old_filename (if it exists) and a new_filename and determines
+  // the smallest way to encode this file for the diff. It stores
+  // necessary data in out_data and fills in out_op.
+  // If there's no change in old and new files, it creates a MOVE
+  // operation. If there is a change, or the old file doesn't exist,
+  // the smallest of REPLACE, REPLACE_BZ, or BSDIFF wins.
+  // new_filename must contain at least one byte.
+  // Returns true on success.
+  static bool ReadFileToDiff(const std::string& old_filename,
+                             const std::string& new_filename,
+                             std::vector<char>* out_data,
+                             DeltaArchiveManifest_InstallOperation* out_op);
+
+  // Modifies blocks read by 'op' so that any blocks referred to by
+  // 'remove_extents' are replaced with blocks from 'replace_extents'.
+  // 'remove_extents' and 'replace_extents' must be the same number of blocks.
+  // Blocks will be substituted in the order listed in the vectors.
+  // E.g. if 'op' reads blocks 1, 2, 3, 4, 5, 6, 7, 8, remove_extents
+  // contains blocks 6, 2, 3, 5, and replace blocks contains
+  // 12, 13, 14, 15, then op will be changed to read from:
+  // 1, 13, 14, 4, 15, 12, 7, 8
+  static void SubstituteBlocks(DeltaArchiveManifest_InstallOperation* op,
+                               const std::vector<Extent>& remove_extents,
+                               const std::vector<Extent>& replace_extents);
+
+  // Cuts 'edges' from 'graph' according to the AU algorithm. This means
+  // for each edge A->B, remove the dependency that B occur before A.
+  // Do this by creating a new operation X that copies from the blocks
+  // specified by the edge's properties to temp space T. Modify B to read
+  // from T rather than the blocks in the edge. Modify A to depend on X,
+  // but not on B. Free space is found by looking in 'blocks'.
+  // Returns true on success.
+  static bool CutEdges(Graph* graph,
+                       const std::vector<Block>& blocks,
+                       const std::set<Edge>& edges);
+
+  // Stores all Extents in 'extents' into 'out'.
+  static void StoreExtents(std::vector<Extent>& extents,
+                           google::protobuf::RepeatedPtrField<Extent>* out);
+                           
+  // Creates all the edges for the graph. Writers of a block point to
+  // readers of the same block. This is because for an edge A->B, B
+  // must complete before A executes.
+  static void CreateEdges(Graph* graph, const std::vector<Block>& blocks);
+  
+  // Install operations in the manifest may reference data blobs, which
+  // are in data_blobs_path. This function creates a new data blobs file
+  // with the data blobs in the same order as the referencing install
+  // operations in the manifest. E.g. if manifest[0] has a data blob
+  // "X" at offset 1, manifest[1] has a data blob "Y" at offset 0,
+  // and data_blobs_path's file contains "YX", new_data_blobs_path
+  // will set to be a file that contains "XY".
+  static bool ReorderDataBlobs(DeltaArchiveManifest* manifest,
+                               const std::string& data_blobs_path,
+                               const std::string& new_data_blobs_path);
+
  private:
   // This should never be constructed
   DISALLOW_IMPLICIT_CONSTRUCTORS(DeltaDiffGenerator);
diff --git a/delta_diff_generator_unittest.cc b/delta_diff_generator_unittest.cc
index a5def92..644d9d8 100644
--- a/delta_diff_generator_unittest.cc
+++ b/delta_diff_generator_unittest.cc
@@ -8,21 +8,341 @@
 #include <unistd.h>
 #include <set>
 #include <string>
+#include <utility>
 #include <vector>
-#include "base/string_util.h"
 #include <gtest/gtest.h>
 #include "chromeos/obsolete_logging.h"
-#include "update_engine/decompressing_file_writer.h"
+#include "update_engine/cycle_breaker.h"
 #include "update_engine/delta_diff_generator.h"
-#include "update_engine/delta_diff_parser.h"
-#include "update_engine/gzip.h"
-#include "update_engine/mock_file_writer.h"
+#include "update_engine/graph_types.h"
+#include "update_engine/graph_utils.h"
 #include "update_engine/subprocess.h"
 #include "update_engine/test_utils.h"
 #include "update_engine/utils.h"
 
+using std::make_pair;
+using std::set;
+using std::string;
+using std::vector;
+
 namespace chromeos_update_engine {
 
-class DeltaDiffGeneratorTest : public ::testing::Test {};
+typedef DeltaDiffGenerator::Block Block;
+
+namespace {
+int64 BlocksInExtents(
+    const google::protobuf::RepeatedPtrField<Extent>& extents) {
+  int64 ret = 0;
+  for (int i = 0; i < extents.size(); i++) {
+    ret += extents.Get(i).num_blocks();
+  }
+  return ret;
+}
+}  // namespace {}
+
+class DeltaDiffGeneratorTest : public ::testing::Test {
+ protected:
+  const string old_path() { return "DeltaDiffGeneratorTest-old_path"; }
+  const string new_path() { return "DeltaDiffGeneratorTest-new_path"; }
+  virtual void TearDown() {
+    unlink(old_path().c_str());
+    unlink(new_path().c_str());
+  }
+};
+
+TEST_F(DeltaDiffGeneratorTest, RunAsRootMoveSmallTest) {
+  EXPECT_TRUE(utils::WriteFile(old_path().c_str(),
+                               reinterpret_cast<const char*>(kRandomString),
+                               sizeof(kRandomString)));
+  EXPECT_TRUE(utils::WriteFile(new_path().c_str(),
+                               reinterpret_cast<const char*>(kRandomString),
+                               sizeof(kRandomString)));
+  vector<char> data;
+  DeltaArchiveManifest_InstallOperation op;
+  EXPECT_TRUE(DeltaDiffGenerator::ReadFileToDiff(old_path(),
+                                                 new_path(),
+                                                 &data,
+                                                 &op));
+  EXPECT_TRUE(data.empty());
+
+  EXPECT_TRUE(op.has_type());
+  EXPECT_EQ(DeltaArchiveManifest_InstallOperation_Type_MOVE, op.type());
+  EXPECT_FALSE(op.has_data_offset());
+  EXPECT_FALSE(op.has_data_length());
+  EXPECT_EQ(1, op.src_extents_size());
+  EXPECT_EQ(sizeof(kRandomString), op.src_length());
+  EXPECT_EQ(1, op.dst_extents_size());
+  EXPECT_EQ(sizeof(kRandomString), op.dst_length());
+  EXPECT_EQ(BlocksInExtents(op.src_extents()),
+            BlocksInExtents(op.dst_extents()));
+  EXPECT_EQ(1, BlocksInExtents(op.dst_extents()));
+}
+
+TEST_F(DeltaDiffGeneratorTest, RunAsRootBsdiffSmallTest) {
+  EXPECT_TRUE(utils::WriteFile(old_path().c_str(),
+                               reinterpret_cast<const char*>(kRandomString),
+                               sizeof(kRandomString) - 1));
+  EXPECT_TRUE(utils::WriteFile(new_path().c_str(),
+                               reinterpret_cast<const char*>(kRandomString),
+                               sizeof(kRandomString)));
+  vector<char> data;
+  DeltaArchiveManifest_InstallOperation op;
+  EXPECT_TRUE(DeltaDiffGenerator::ReadFileToDiff(old_path(),
+                                                 new_path(),
+                                                 &data,
+                                                 &op));
+  EXPECT_FALSE(data.empty());
+
+  EXPECT_TRUE(op.has_type());
+  EXPECT_EQ(DeltaArchiveManifest_InstallOperation_Type_BSDIFF, op.type());
+  EXPECT_FALSE(op.has_data_offset());
+  EXPECT_FALSE(op.has_data_length());
+  EXPECT_EQ(1, op.src_extents_size());
+  EXPECT_EQ(sizeof(kRandomString) - 1, op.src_length());
+  EXPECT_EQ(1, op.dst_extents_size());
+  EXPECT_EQ(sizeof(kRandomString), op.dst_length());
+  EXPECT_EQ(BlocksInExtents(op.src_extents()),
+            BlocksInExtents(op.dst_extents()));
+  EXPECT_EQ(1, BlocksInExtents(op.dst_extents()));
+}
+
+TEST_F(DeltaDiffGeneratorTest, RunAsRootReplaceSmallTest) {
+  vector<char> new_data;
+  for (int i = 0; i < 2; i++) {
+    new_data.insert(new_data.end(),
+                    kRandomString,
+                    kRandomString + sizeof(kRandomString));
+    EXPECT_TRUE(utils::WriteFile(new_path().c_str(),
+                                 &new_data[0],
+                                 new_data.size()));
+    vector<char> data;
+    DeltaArchiveManifest_InstallOperation op;
+    EXPECT_TRUE(DeltaDiffGenerator::ReadFileToDiff(old_path(),
+                                                   new_path(),
+                                                   &data,
+                                                   &op));
+    EXPECT_FALSE(data.empty());
+
+    EXPECT_TRUE(op.has_type());
+    const DeltaArchiveManifest_InstallOperation_Type expected_type =
+        (i == 0 ? DeltaArchiveManifest_InstallOperation_Type_REPLACE :
+         DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ);
+    EXPECT_EQ(expected_type, op.type());
+    EXPECT_FALSE(op.has_data_offset());
+    EXPECT_FALSE(op.has_data_length());
+    EXPECT_EQ(0, op.src_extents_size());
+    EXPECT_FALSE(op.has_src_length());
+    EXPECT_EQ(1, op.dst_extents_size());
+    EXPECT_EQ(new_data.size(), op.dst_length());
+    EXPECT_EQ(1, BlocksInExtents(op.dst_extents()));
+  }
+}
+
+namespace {
+void AppendExtent(vector<Extent>* vect, uint64 start, uint64 length) {
+  vect->resize(vect->size() + 1);
+  vect->back().set_start_block(start);
+  vect->back().set_num_blocks(length);
+}
+void OpAppendExtent(DeltaArchiveManifest_InstallOperation* op,
+                    uint64 start,
+                    uint64 length) {
+  Extent* extent = op->add_src_extents();
+  extent->set_start_block(start);
+  extent->set_num_blocks(length);
+}
+}
+
+TEST_F(DeltaDiffGeneratorTest, SubstituteBlocksTest) {
+  vector<Extent> remove_blocks;
+  AppendExtent(&remove_blocks, 3, 3);
+  AppendExtent(&remove_blocks, 7, 1);
+  vector<Extent> replace_blocks;
+  AppendExtent(&replace_blocks, 10, 2);
+  AppendExtent(&replace_blocks, 13, 2);
+  DeltaArchiveManifest_InstallOperation op;
+  OpAppendExtent(&op, 4, 3);
+  OpAppendExtent(&op, kSparseHole, 4);  // Sparse hole in file
+  OpAppendExtent(&op, 3, 1);
+  OpAppendExtent(&op, 7, 3);
+  
+  DeltaDiffGenerator::SubstituteBlocks(&op, remove_blocks, replace_blocks);
+  
+  EXPECT_EQ(7, op.src_extents_size());
+  EXPECT_EQ(11, op.src_extents(0).start_block());
+  EXPECT_EQ(1, op.src_extents(0).num_blocks());
+  EXPECT_EQ(13, op.src_extents(1).start_block());
+  EXPECT_EQ(1, op.src_extents(1).num_blocks());
+  EXPECT_EQ(6, op.src_extents(2).start_block());
+  EXPECT_EQ(1, op.src_extents(2).num_blocks());
+  EXPECT_EQ(kSparseHole, op.src_extents(3).start_block());
+  EXPECT_EQ(4, op.src_extents(3).num_blocks());
+  EXPECT_EQ(10, op.src_extents(4).start_block());
+  EXPECT_EQ(1, op.src_extents(4).num_blocks());
+  EXPECT_EQ(14, op.src_extents(5).start_block());
+  EXPECT_EQ(1, op.src_extents(5).num_blocks());
+  EXPECT_EQ(8, op.src_extents(6).start_block());
+  EXPECT_EQ(2, op.src_extents(6).num_blocks());
+}
+
+TEST_F(DeltaDiffGeneratorTest, CutEdgesTest) {
+  Graph graph;
+  vector<Block> blocks(9);
+  
+  // Create nodes in graph
+  {
+    graph.resize(graph.size() + 1);
+    graph.back().op.set_type(DeltaArchiveManifest_InstallOperation_Type_MOVE);
+    // Reads from blocks 3, 5, 7
+    vector<Extent> extents;
+    graph_utils::AppendBlockToExtents(&extents, 3);
+    graph_utils::AppendBlockToExtents(&extents, 5);
+    graph_utils::AppendBlockToExtents(&extents, 7);
+    DeltaDiffGenerator::StoreExtents(extents,
+                                     graph.back().op.mutable_src_extents());
+    blocks[3].reader = graph.size() - 1;
+    blocks[5].reader = graph.size() - 1;
+    blocks[7].reader = graph.size() - 1;
+    
+    // Writes to blocks 1, 2, 4
+    extents.clear();
+    graph_utils::AppendBlockToExtents(&extents, 1);
+    graph_utils::AppendBlockToExtents(&extents, 2);
+    graph_utils::AppendBlockToExtents(&extents, 4);
+    DeltaDiffGenerator::StoreExtents(extents,
+                                     graph.back().op.mutable_dst_extents());
+    blocks[1].writer = graph.size() - 1;
+    blocks[2].writer = graph.size() - 1;
+    blocks[4].writer = graph.size() - 1;
+  }
+  {
+    graph.resize(graph.size() + 1);
+    graph.back().op.set_type(DeltaArchiveManifest_InstallOperation_Type_MOVE);
+    // Reads from blocks 1, 2, 4
+    vector<Extent> extents;
+    graph_utils::AppendBlockToExtents(&extents, 1);
+    graph_utils::AppendBlockToExtents(&extents, 2);
+    graph_utils::AppendBlockToExtents(&extents, 4);
+    DeltaDiffGenerator::StoreExtents(extents,
+                                     graph.back().op.mutable_src_extents());
+    blocks[1].reader = graph.size() - 1;
+    blocks[2].reader = graph.size() - 1;
+    blocks[4].reader = graph.size() - 1;
+    
+    // Writes to blocks 3, 5, 6
+    extents.clear();
+    graph_utils::AppendBlockToExtents(&extents, 3);
+    graph_utils::AppendBlockToExtents(&extents, 5);
+    graph_utils::AppendBlockToExtents(&extents, 6);
+    DeltaDiffGenerator::StoreExtents(extents,
+                                     graph.back().op.mutable_dst_extents());
+    blocks[3].writer = graph.size() - 1;
+    blocks[5].writer = graph.size() - 1;
+    blocks[6].writer = graph.size() - 1;
+  }
+  
+  // Create edges
+  DeltaDiffGenerator::CreateEdges(&graph, blocks);
+  
+  // Find cycles
+  CycleBreaker cycle_breaker;
+  set<Edge> cut_edges;
+  cycle_breaker.BreakCycles(graph, &cut_edges);
+
+  EXPECT_EQ(1, cut_edges.size());
+  EXPECT_TRUE(cut_edges.end() != cut_edges.find(make_pair<Vertex::Index>(1,
+                                                                         0)));
+
+  EXPECT_TRUE(DeltaDiffGenerator::CutEdges(&graph, blocks, cut_edges));
+  
+  EXPECT_EQ(3, graph.size());
+  
+  // Check new node in graph:
+  EXPECT_EQ(DeltaArchiveManifest_InstallOperation_Type_MOVE,
+            graph.back().op.type());
+  EXPECT_EQ(2, graph.back().op.src_extents_size());
+  EXPECT_EQ(2, graph.back().op.dst_extents_size());
+  EXPECT_EQ(0, graph.back().op.dst_extents(0).start_block());
+  EXPECT_EQ(1, graph.back().op.dst_extents(0).num_blocks());
+  EXPECT_EQ(8, graph.back().op.dst_extents(1).start_block());
+  EXPECT_EQ(1, graph.back().op.dst_extents(1).num_blocks());
+  EXPECT_TRUE(graph.back().out_edges.empty());
+  
+  // Check that old node reads from new blocks
+  EXPECT_EQ(3, graph[0].op.src_extents_size());
+  EXPECT_EQ(0, graph[0].op.src_extents(0).start_block());
+  EXPECT_EQ(1, graph[0].op.src_extents(0).num_blocks());
+  EXPECT_EQ(8, graph[0].op.src_extents(1).start_block());
+  EXPECT_EQ(1, graph[0].op.src_extents(1).num_blocks());
+  EXPECT_EQ(7, graph[0].op.src_extents(2).start_block());
+  EXPECT_EQ(1, graph[0].op.src_extents(2).num_blocks());
+
+  // And that the old dst extents haven't changed
+  EXPECT_EQ(2, graph[0].op.dst_extents_size());
+  EXPECT_EQ(1, graph[0].op.dst_extents(0).start_block());
+  EXPECT_EQ(2, graph[0].op.dst_extents(0).num_blocks());
+  EXPECT_EQ(4, graph[0].op.dst_extents(1).start_block());
+  EXPECT_EQ(1, graph[0].op.dst_extents(1).num_blocks());
+  
+  // Ensure it only depends on the next node
+  EXPECT_EQ(1, graph[0].out_edges.size());
+  EXPECT_TRUE(graph[0].out_edges.end() != graph[0].out_edges.find(1));
+  
+  // Check second node has unchanged extents
+  EXPECT_EQ(2, graph[1].op.src_extents_size());
+  EXPECT_EQ(1, graph[1].op.src_extents(0).start_block());
+  EXPECT_EQ(2, graph[1].op.src_extents(0).num_blocks());
+  EXPECT_EQ(4, graph[1].op.src_extents(1).start_block());
+  EXPECT_EQ(1, graph[1].op.src_extents(1).num_blocks());
+
+  EXPECT_EQ(2, graph[1].op.dst_extents_size());
+  EXPECT_EQ(3, graph[1].op.dst_extents(0).start_block());
+  EXPECT_EQ(1, graph[1].op.dst_extents(0).num_blocks());
+  EXPECT_EQ(5, graph[1].op.dst_extents(1).start_block());
+  EXPECT_EQ(2, graph[1].op.dst_extents(1).num_blocks());
+  
+  // Ensure it only depends on the next node
+  EXPECT_EQ(1, graph[1].out_edges.size());
+  EXPECT_TRUE(graph[1].out_edges.end() != graph[1].out_edges.find(2));
+}
+
+TEST_F(DeltaDiffGeneratorTest, ReorderBlobsTest) {
+  string orig_blobs;
+  EXPECT_TRUE(
+      utils::MakeTempFile("ReorderBlobsTest.orig.XXXXXX", &orig_blobs, NULL));
+
+  string orig_data = "abcd";
+  EXPECT_TRUE(
+      utils::WriteFile(orig_blobs.c_str(), orig_data.data(), orig_data.size()));
+
+  string new_blobs;
+  EXPECT_TRUE(
+      utils::MakeTempFile("ReorderBlobsTest.new.XXXXXX", &new_blobs, NULL));
+  
+  DeltaArchiveManifest manifest;
+  DeltaArchiveManifest_InstallOperation* op =
+      manifest.add_install_operations();
+  op->set_data_offset(1);
+  op->set_data_length(3);
+  op = manifest.add_install_operations();
+  op->set_data_offset(0);
+  op->set_data_length(1);
+  
+  EXPECT_TRUE(DeltaDiffGenerator::ReorderDataBlobs(&manifest,
+                                                   orig_blobs,
+                                                   new_blobs));
+                                                   
+  string new_data;
+  EXPECT_TRUE(utils::ReadFileToString(new_blobs, &new_data));
+  EXPECT_EQ("bcda", new_data);
+  EXPECT_EQ(2, manifest.install_operations_size());
+  EXPECT_EQ(0, manifest.install_operations(0).data_offset());
+  EXPECT_EQ(3, manifest.install_operations(0).data_length());
+  EXPECT_EQ(3, manifest.install_operations(1).data_offset());
+  EXPECT_EQ(1, manifest.install_operations(1).data_length());
+  
+  unlink(orig_blobs.c_str());
+  unlink(new_blobs.c_str());
+}
 
 }  // namespace chromeos_update_engine
diff --git a/extent_mapper.cc b/extent_mapper.cc
index ed528fd..4af5f5d 100755
--- a/extent_mapper.cc
+++ b/extent_mapper.cc
@@ -16,6 +16,8 @@
 
 #include <linux/fs.h>
 
+#include "update_engine/graph_types.h"
+#include "update_engine/graph_utils.h"
 #include "update_engine/utils.h"
 
 using std::string;
@@ -31,7 +33,6 @@
 
 bool ExtentsForFileFibmap(const std::string& path, std::vector<Extent>* out) {
   CHECK(out);
-  // TODO(adlr): verify path is a file
   struct stat stbuf;
   int rc = stat(path.c_str(), &stbuf);
   TEST_AND_RETURN_FALSE_ERRNO(rc == 0);
@@ -53,32 +54,23 @@
   current.set_num_blocks(0);
 
   for (int i = 0; i < block_count; i++) {
-    unsigned int block = i;
-    rc = ioctl(fd, FIBMAP, &block);
+    unsigned int block32 = i;
+    rc = ioctl(fd, FIBMAP, &block32);
     TEST_AND_RETURN_FALSE_ERRNO(rc == 0);
     
-    // Add next block to extents
-    if (current.num_blocks() == 0) {
-      // We're starting a new extent
-      current.set_start_block(block);
-      current.set_num_blocks(1);
-      continue;
-    }
-    if ((current.start_block() + current.num_blocks()) == block) {
-      // We're continuing the last extent
-      current.set_num_blocks(current.num_blocks() + 1);
-      continue;
-    }
-    // We're starting a new extent and keeping the current one
-    out->push_back(current);
-    current.set_start_block(block);
-    current.set_num_blocks(1);
-    continue;
+    const uint64 block = (block32 == 0 ? kSparseHole : block32);
+    
+    graph_utils::AppendBlockToExtents(out, block);
   }
-  
-  if (current.num_blocks() > 0)
-    out->push_back(current);
+  return true;
+}
 
+bool GetFilesystemBlockSize(const std::string& path, uint32* out_blocksize) {
+  int fd = open(path.c_str(), O_RDONLY, 0);
+  TEST_AND_RETURN_FALSE_ERRNO(fd >= 0);
+  ScopedFdCloser fd_closer(&fd);
+  int rc = ioctl(fd, FIGETBSZ, out_blocksize);
+  TEST_AND_RETURN_FALSE_ERRNO(rc != -1);
   return true;
 }
 
diff --git a/extent_mapper.h b/extent_mapper.h
index acdd418..b15cf4a 100755
--- a/extent_mapper.h
+++ b/extent_mapper.h
@@ -7,14 +7,27 @@
 
 #include <string>
 #include <vector>
+#include "base/basictypes.h"
 #include "update_engine/update_metadata.pb.h"
 
 namespace chromeos_update_engine {
 
 namespace extent_mapper {
 
+// Uses the FIBMAP ioctl to get all blocks used by a file and return them
+// as extents. Blocks are relative to the start of the filesystem. If
+// there is a sparse "hole" in the file, the blocks for that will be
+// represented by an extent whose start block is kSpareseHole.
+// The resulting extents are stored in 'out'. Keep in mind that while
+// the blocksize of a filesystem is often 4096 bytes, that is not always
+// the case, so one should consult GetFilesystemBlockSize(), too.
+// Returns true on success.
 bool ExtentsForFileFibmap(const std::string& path, std::vector<Extent>* out);
 
+// Puts the blocksize of the filesystem, as used by the FIBMAP ioctl, into
+// out_blocksize by using the FIGETBSZ ioctl. Returns true on success.
+bool GetFilesystemBlockSize(const std::string& path, uint32* out_blocksize);
+
 }  // namespace extent_mapper
 
 }  // namespace chromeos_update_engine
diff --git a/extent_mapper_unittest.cc b/extent_mapper_unittest.cc
index 6962004..dc69437 100644
--- a/extent_mapper_unittest.cc
+++ b/extent_mapper_unittest.cc
@@ -11,6 +11,7 @@
 #include <gtest/gtest.h>
 #include "base/basictypes.h"
 #include "update_engine/extent_mapper.h"
+#include "update_engine/graph_types.h"
 #include "update_engine/utils.h"
 
 using std::set;
@@ -27,8 +28,11 @@
   // In lieu of this, we do a weak test: make sure the extents of the unittest
   // executable are consistent and they match with the size of the file.
   const string kFilename = "/proc/self/exe";
-  const off_t kBlockSize = 4096;
   
+  uint32 block_size = 0;
+  EXPECT_TRUE(extent_mapper::GetFilesystemBlockSize(kFilename, &block_size));
+  EXPECT_GT(block_size, 0);
+    
   vector<Extent> extents;
   
   ASSERT_TRUE(extent_mapper::ExtentsForFileFibmap(kFilename, &extents));
@@ -48,7 +52,41 @@
   
   struct stat stbuf;
   EXPECT_EQ(0, stat(kFilename.c_str(), &stbuf));
-  EXPECT_EQ(blocks.size(), (stbuf.st_size + kBlockSize - 1)/kBlockSize);
+  EXPECT_EQ(blocks.size(), (stbuf.st_size + block_size - 1)/block_size);
+}
+
+TEST(ExtentMapperTest, RunAsRootSparseFileTest) {
+  // Create sparse file with one real block, then two sparse ones, then a real
+  // block at the end.
+  const char tmp_name_template[] =
+      "/tmp/ExtentMapperTest.RunAsRootSparseFileTest.XXXXXX";
+  char buf[sizeof(tmp_name_template)];
+  strncpy(buf, tmp_name_template, sizeof(buf));
+  COMPILE_ASSERT(sizeof(buf) > 8, buf_size_incorrect);
+  ASSERT_EQ('\0', buf[sizeof(buf) - 1]);
+
+  int fd = mkstemp(buf);
+  ASSERT_GE(fd, 0);
+
+  uint32 block_size = 0;
+  EXPECT_TRUE(extent_mapper::GetFilesystemBlockSize(buf, &block_size));
+  EXPECT_GT(block_size, 0);
+  
+  EXPECT_EQ(1, pwrite(fd, "x", 1, 0));
+  EXPECT_EQ(1, pwrite(fd, "x", 1, 3 * block_size));
+  close(fd);
+  
+  vector<Extent> extents;
+  EXPECT_TRUE(extent_mapper::ExtentsForFileFibmap(buf, &extents));
+  unlink(buf);
+  EXPECT_EQ(3, extents.size());
+  EXPECT_EQ(1, extents[0].num_blocks());
+  EXPECT_EQ(2, extents[1].num_blocks());
+  EXPECT_EQ(1, extents[2].num_blocks());
+  EXPECT_NE(kSparseHole, extents[0].start_block());
+  EXPECT_EQ(kSparseHole, extents[1].start_block());
+  EXPECT_NE(kSparseHole, extents[2].start_block());
+  EXPECT_NE(extents[2].start_block(), extents[0].start_block());
 }
 
 }  // namespace chromeos_update_engine
diff --git a/extent_writer.cc b/extent_writer.cc
index 1ae565b..9f6fbf0 100644
--- a/extent_writer.cc
+++ b/extent_writer.cc
@@ -6,25 +6,12 @@
 #include <errno.h>
 #include <unistd.h>
 #include <algorithm>
+#include "update_engine/utils.h"
 
 using std::min;
 
 namespace chromeos_update_engine {
 
-namespace {
-// Returns true on success.
-bool WriteAll(int fd, const void *buf, size_t count) {
-  const char* c_buf = reinterpret_cast<const char*>(buf);
-  ssize_t bytes_written = 0;
-  while (bytes_written < static_cast<ssize_t>(count)) {
-    ssize_t rc = write(fd, c_buf + bytes_written, count - bytes_written);
-    TEST_AND_RETURN_FALSE_ERRNO(rc >= 0);
-    bytes_written += rc;
-  }
-  return true;
-}
-}
-
 bool DirectExtentWriter::Write(const void* bytes, size_t count) {
   if (count == 0)
     return true;
@@ -48,7 +35,7 @@
       TEST_AND_RETURN_FALSE_ERRNO(lseek64(fd_, offset, SEEK_SET) !=
                                   static_cast<off64_t>(-1));
       TEST_AND_RETURN_FALSE(
-          WriteAll(fd_, c_bytes + bytes_written, bytes_to_write));
+          utils::WriteAll(fd_, c_bytes + bytes_written, bytes_to_write));
     }
     bytes_written += bytes_to_write;
     extent_bytes_written_ += bytes_to_write;
diff --git a/filesystem_copier_action_unittest.cc b/filesystem_copier_action_unittest.cc
index 1e024f1..86c15ac 100644
--- a/filesystem_copier_action_unittest.cc
+++ b/filesystem_copier_action_unittest.cc
@@ -137,8 +137,8 @@
   g_main_loop_unref(loop);
 
   EXPECT_EQ(0, System(string("losetup -d ") + dev));
-  EXPECT_EQ(0, System(string("umount ") + TestDir() + "/mnt/some_dir/mnt"));
-  EXPECT_EQ(0, System(string("umount ") + TestDir() + "/mnt"));
+  EXPECT_EQ(0, System(string("umount -d ") + TestDir() + "/mnt/some_dir/mnt"));
+  EXPECT_EQ(0, System(string("umount -d ") + TestDir() + "/mnt"));
   EXPECT_EQ(0, unlink(a_image.c_str()));
   EXPECT_EQ(0, unlink(b_image.c_str()));
 
@@ -168,7 +168,7 @@
   EXPECT_TRUE(utils::ReadFileToString(TestDir() + "/mnt/hello", &file_data));
   EXPECT_EQ("hello\n", file_data);
   EXPECT_EQ("/some/target", Readlink(TestDir() + "/mnt/sym"));
-  EXPECT_EQ(0, System(string("umount ") + TestDir() + "/mnt"));
+  EXPECT_EQ(0, System(string("umount -d ") + TestDir() + "/mnt"));
 
   EXPECT_EQ(0, unlink(out_image.c_str()));
   EXPECT_EQ(0, rmdir((TestDir() + "/mnt").c_str()));
diff --git a/filesystem_iterator_unittest.cc b/filesystem_iterator_unittest.cc
index 82b8d3f..da14b87 100644
--- a/filesystem_iterator_unittest.cc
+++ b/filesystem_iterator_unittest.cc
@@ -57,8 +57,8 @@
                              expected_paths_vector.end());
   VerifyAllPaths(kMountPath, expected_paths);
   
-  EXPECT_EQ(0, System(string("umount ") + kMountPath + "/some_dir/mnt"));
-  EXPECT_EQ(0, System(string("umount ") + kMountPath));
+  EXPECT_EQ(0, System(string("umount -d ") + kMountPath + "/some_dir/mnt"));
+  EXPECT_EQ(0, System(string("umount -d ") + kMountPath));
   EXPECT_EQ(0, System(string("rm -f ") + first_image + " " + sub_image));
 }
 
diff --git a/generate_delta_main.cc b/generate_delta_main.cc
index 551c6ef..2f21b2a 100644
--- a/generate_delta_main.cc
+++ b/generate_delta_main.cc
@@ -8,13 +8,23 @@
 #include <unistd.h>
 #include <set>
 #include <string>
+#include <gflags/gflags.h>
 #include <glib.h>
+#include "base/command_line.h"
 #include "chromeos/obsolete_logging.h"
 #include "update_engine/delta_diff_generator.h"
 #include "update_engine/subprocess.h"
 #include "update_engine/update_metadata.pb.h"
 #include "update_engine/utils.h"
 
+DEFINE_string(old_dir, "",
+              "Directory where the old rootfs is loop mounted read-only");
+DEFINE_string(new_dir, "",
+              "Directory where the new rootfs is loop mounted read-only");
+DEFINE_string(old_image, "", "Path to the old rootfs");
+DEFINE_string(new_image, "", "Path to the new rootfs");
+DEFINE_string(out_file, "", "Path to output file");
+
 // This file contains a simple program that takes an old path, a new path,
 // and an output file as arguments and the path to an output file and
 // generates a delta that can be sent to Chrome OS clients.
@@ -26,11 +36,6 @@
 
 namespace {
 
-void usage(const char* argv0) {
-  printf("usage: %s old_dir new_dir out_file\n", argv0);
-  exit(1);
-}
-
 bool IsDir(const char* path) {
   struct stat stbuf;
   TEST_AND_RETURN_FALSE_ERRNO(lstat(path, &stbuf) == 0);
@@ -39,21 +44,27 @@
 
 int Main(int argc, char** argv) {
   g_thread_init(NULL);
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  CommandLine::Init(argc, argv);
   Subprocess::Init();
-  if (argc != 4) {
-    usage(argv[0]);
-  }
-  logging::InitLogging("",
+  logging::InitLogging("delta_generator.log",
                        logging::LOG_ONLY_TO_SYSTEM_DEBUG_LOG,
                        logging::DONT_LOCK_LOG_FILE,
                        logging::APPEND_TO_OLD_LOG_FILE);
-  const char* old_dir = argv[1];
-  const char* new_dir = argv[2];
-  if ((!IsDir(old_dir)) || (!IsDir(new_dir))) {
-    usage(argv[0]);
+  if (FLAGS_old_dir.empty() || FLAGS_new_dir.empty() ||
+      FLAGS_old_image.empty() || FLAGS_new_image.empty() ||
+      FLAGS_out_file.empty()) {
+    LOG(FATAL) << "Missing required argument(s)";
+  }
+  if ((!IsDir(FLAGS_old_dir.c_str())) || (!IsDir(FLAGS_new_dir.c_str()))) {
+    LOG(FATAL) << "old_dir or new_dir not directory";
   }
   
-  // TODO(adlr): generate delta file
+  DeltaDiffGenerator::GenerateDeltaUpdateFile(FLAGS_old_dir,
+                                              FLAGS_old_image,
+                                              FLAGS_new_dir,
+                                              FLAGS_new_image,
+                                              FLAGS_out_file);
 
   return 0;
 }
diff --git a/graph_types.h b/graph_types.h
index afb7f64..109616a 100644
--- a/graph_types.h
+++ b/graph_types.h
@@ -7,6 +7,7 @@
 
 #include <map>
 #include <set>
+#include <string>
 #include <utility>
 #include <vector>
 #include "base/basictypes.h"
@@ -22,7 +23,7 @@
 };
 
 struct Vertex {
-  Vertex() : index(-1), lowlink(-1), op(NULL) {}
+  Vertex() : index(-1), lowlink(-1) {}
   typedef std::map<std::vector<Vertex>::size_type, EdgeProperties> EdgeMap;
   EdgeMap out_edges;
 
@@ -38,7 +39,8 @@
   std::vector<Vertex>::size_type lowlink;
 
   // Other Vertex properties:
-  DeltaArchiveManifest_InstallOperation* op;
+  DeltaArchiveManifest_InstallOperation op;
+  std::string file_name;
 
   typedef std::vector<Vertex>::size_type Index;
   static const Vertex::Index kInvalidIndex = -1;
@@ -48,6 +50,8 @@
 
 typedef std::pair<Vertex::Index, Vertex::Index> Edge;
 
+const uint64 kSparseHole = kuint64max;
+
 }  // namespace chromeos_update_engine
 
 #endif  // CHROMEOS_PLATFORM_UPDATE_ENGINE_GRAPH_TYPES_H__
diff --git a/graph_utils.cc b/graph_utils.cc
index 79d5733..dd3cdcf 100644
--- a/graph_utils.cc
+++ b/graph_utils.cc
@@ -17,7 +17,8 @@
       graph[edge.first].out_edges.find(edge.second)->second.extents;
   for (vector<Extent>::const_iterator it = extents.begin();
        it != extents.end(); ++it) {
-    weight += it->num_blocks();
+    if (it->start_block() != kSparseHole)
+      weight += it->num_blocks();
   }
   return weight;
 }
@@ -25,7 +26,15 @@
 void AppendBlockToExtents(vector<Extent>* extents, uint64 block) {
   if (!extents->empty()) {
     Extent& extent = extents->back();
-    if (extent.start_block() + extent.num_blocks() == block) {
+    if (block == kSparseHole) {
+      if (extent.start_block() == kSparseHole) {
+        // Extend sparse hole extent
+        extent.set_num_blocks(extent.num_blocks() + 1);
+        return;
+      } else {
+        // Create new extent below outer 'if'
+      }
+    } else if (extent.start_block() + extent.num_blocks() == block) {
       extent.set_num_blocks(extent.num_blocks() + 1);
       return;
     }
diff --git a/postinstall_runner_action_unittest.cc b/postinstall_runner_action_unittest.cc
index 5bb515f..7c1993e 100644
--- a/postinstall_runner_action_unittest.cc
+++ b/postinstall_runner_action_unittest.cc
@@ -87,7 +87,7 @@
   ASSERT_TRUE(WriteFileString(mountpoint + "/postinst", script));
   ASSERT_EQ(0, System(string("chmod a+x ") + mountpoint + "/postinst"));
 
-  ASSERT_EQ(0, System(string("umount ") + mountpoint));
+  ASSERT_EQ(0, System(string("umount -d ") + mountpoint));
 
   ASSERT_EQ(0, System(string("rm -f ") + cwd + "/postinst_called"));
 
diff --git a/subprocess.cc b/subprocess.cc
index 299d758..89b6cad 100644
--- a/subprocess.cc
+++ b/subprocess.cc
@@ -80,7 +80,7 @@
 
 bool Subprocess::SynchronousExec(const std::vector<std::string>& cmd,
                                  int* return_code) {
-  GError *err;
+  GError *err = NULL;
   scoped_array<char *> argv(new char*[cmd.size() + 1]);
   for (unsigned int i = 0; i < cmd.size(); i++) {
     argv[i] = strdup(cmd[i].c_str());
@@ -100,6 +100,8 @@
                               return_code,
                               &err);
   FreeArgv(argv.get());
+  if (err)
+    LOG(INFO) << "err is: " << err->code << ", " << err->message;
   return success;
 }
 
diff --git a/test_utils.cc b/test_utils.cc
index 6c1e35b..1c2f895 100644
--- a/test_utils.cc
+++ b/test_utils.cc
@@ -204,7 +204,7 @@
   EXPECT_EQ(0, System(StringPrintf("ln -s /some/target %s/sym", kMountPath)));
   EXPECT_EQ(0, System(StringPrintf("ln %s/some_dir/test %s/testlink",
                                    kMountPath, kMountPath)));
-  EXPECT_EQ(0, System(StringPrintf("umount %s", kMountPath)));
+  EXPECT_EQ(0, System(StringPrintf("umount -d %s", kMountPath)));
   
   if (out_paths) {
     out_paths->clear();
diff --git a/topological_sort.cc b/topological_sort.cc
index 2e34144..8422849 100644
--- a/topological_sort.cc
+++ b/topological_sort.cc
@@ -5,6 +5,7 @@
 #include "update_engine/topological_sort.h"
 #include <set>
 #include <vector>
+#include "base/logging.h"
 
 using std::set;
 using std::vector;
@@ -26,6 +27,8 @@
     TopologicalSortVisit(graph, visited_nodes, nodes, it->first);
   }
   // Visit this node.
+  LOG(INFO) << graph[node].file_name << " " << graph[node].op.type() << " "
+            << graph[node].op.data_length();
   nodes->push_back(node);
 }
 }  // namespace {}
diff --git a/utils.cc b/utils.cc
index 69a59cc..e1ed874 100644
--- a/utils.cc
+++ b/utils.cc
@@ -35,6 +35,17 @@
   return true;
 }
 
+bool WriteAll(int fd, const void *buf, size_t count) {
+  const char* c_buf = static_cast<const char*>(buf);
+  ssize_t bytes_written = 0;
+  while (bytes_written < static_cast<ssize_t>(count)) {
+    ssize_t rc = write(fd, c_buf + bytes_written, count - bytes_written);
+    TEST_AND_RETURN_FALSE_ERRNO(rc >= 0);
+    bytes_written += rc;
+  }
+  return true;
+}
+
 bool ReadFile(const std::string& path, std::vector<char>* out) {
   CHECK(out);
   FILE* fp = fopen(path.c_str(), "r");
@@ -114,7 +125,7 @@
     return true;
   if (!S_ISDIR(stbuf.st_mode)) {
     TEST_AND_RETURN_FALSE_ERRNO((unlink(path.c_str()) == 0) ||
-                                 (errno == ENOENT));
+                                (errno == ENOENT));
     // success or path disappeared before we could unlink.
     return true;
   }
@@ -136,7 +147,7 @@
           !strcmp(dir_entry_p->d_name, ".."))
         continue;
       TEST_AND_RETURN_FALSE(RecursiveUnlinkDir(path + "/" +
-                                                dir_entry_p->d_name));
+                                               dir_entry_p->d_name));
     }
     TEST_AND_RETURN_FALSE(err == 0);
   }
@@ -201,6 +212,27 @@
   return path;
 }
 
+bool MakeTempFile(const std::string& filename_template,
+                  std::string* filename,
+                  int* fd) {
+  DCHECK(filename || fd);
+  vector<char> buf(filename_template.size() + 1);
+  memcpy(&buf[0], filename_template.data(), filename_template.size());
+  buf[filename_template.size()] = '\0';
+  
+  int mkstemp_fd = mkstemp(&buf[0]);
+  TEST_AND_RETURN_FALSE_ERRNO(mkstemp_fd >= 0);
+  if (filename) {
+    *filename = &buf[0];
+  }
+  if (fd) {
+    *fd = mkstemp_fd;
+  } else {
+    close(mkstemp_fd);
+  }
+  return true;
+}
+
 bool StringHasSuffix(const std::string& str, const std::string& suffix) {
   if (suffix.size() > str.size())
     return false;
diff --git a/utils.h b/utils.h
index 22a1ce8..298fb03 100644
--- a/utils.h
+++ b/utils.h
@@ -20,6 +20,10 @@
 // exists. Returns true on success, false otherwise.
 bool WriteFile(const char* path, const char* data, int data_len);
 
+// Calls write() repeatedly until all count bytes at buf are written to
+// fd or an error occurs. Returns true on success.
+bool WriteAll(int fd, const void *buf, size_t count);
+
 // Returns the entire contents of the file at path. Returns true on success.
 bool ReadFile(const std::string& path, std::vector<char>* out);
 bool ReadFileToString(const std::string& path, std::string* out);
@@ -41,6 +45,16 @@
 // THAT YOUR PROCESS WILL BE THE ONLY THING WRITING FILES IN THIS DIRECTORY.
 std::string TempFilename(std::string path);
 
+// Calls mkstemp() with the template passed. Returns the filename in the
+// out param filename. If fd is non-NULL, the file fd returned by mkstemp
+// is not close()d and is returned in the out param 'fd'. However, if
+// fd is NULL, the fd from mkstemp() will be closed.
+// The last six chars of the template must be XXXXXX.
+// Returns true on success.
+bool MakeTempFile(const std::string& filename_template,
+                  std::string* filename,
+                  int* fd);
+
 // Deletes a directory and all its contents synchronously. Returns true
 // on success. This may be called with a regular file--it will just unlink it.
 // This WILL cross filesystem boundaries.
@@ -86,6 +100,20 @@
   return std::find(vect.begin(), vect.end(), value) != vect.end(); 
 }
 
+template<typename T>
+bool VectorIndexOf(const std::vector<T>& vect, const T& value,
+                   typename std::vector<T>::size_type* out_index) {
+  typename std::vector<T>::const_iterator it = std::find(vect.begin(),
+                                                         vect.end(),
+                                                         value);
+  if (it == vect.end()) {
+    return false;
+  } else {
+    *out_index = it - vect.begin();
+    return true;
+  }
+}
+
 // Returns the currently booted device. "/dev/sda1", for example.
 // This will not interpret LABEL= or UUID=. You'll need to use findfs
 // or something with equivalent funcionality to interpret those.