AU: Cycle Breaker for directed graphs.

A new class CycleBreaker that uses Johnson's elementary circuit
finding algorithm to find cycles in a directed graph. This class goes
beyond Johnson's algorithm to also break them using a simple greedy
algorithm.

Like Johnson's elementary circuit finding algorithm, this is not
intended to find cycles that contain only one node (i.e., a node that
points to itself).

Review URL: http://codereview.chromium.org/784001
diff --git a/SConstruct b/SConstruct
index 5bfbec7..f43c68f 100644
--- a/SConstruct
+++ b/SConstruct
@@ -86,6 +86,7 @@
 
 sources = Split("""action_processor.cc
                    bzip_extent_writer.cc
+                   cycle_breaker.cc
                    decompressing_file_writer.cc
                    delta_diff_parser.cc
                    download_action.cc
@@ -113,6 +114,7 @@
                             action_pipe_unittest.cc
                             action_processor_unittest.cc
                             bzip_extent_writer_unittest.cc
+                            cycle_breaker_unittest.cc
                             decompressing_file_writer_unittest.cc
                             delta_diff_generator_unittest.cc
                             download_action_unittest.cc
diff --git a/cycle_breaker.cc b/cycle_breaker.cc
new file mode 100644
index 0000000..f6ed31f
--- /dev/null
+++ b/cycle_breaker.cc
@@ -0,0 +1,144 @@
+// Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "update_engine/cycle_breaker.h"
+#include <set>
+#include <utility>
+#include "update_engine/graph_utils.h"
+#include "update_engine/tarjan.h"
+#include "update_engine/utils.h"
+
+using std::make_pair;
+using std::set;
+using std::vector;
+
+namespace chromeos_update_engine {
+
+// This is the outer function from the original paper.
+void CycleBreaker::BreakCycles(const Graph& graph, set<Edge>* out_cut_edges) {
+  cut_edges_.clear();
+  
+  // Make a copy, which we will modify by removing edges. Thus, in each
+  // iteration subgraph_ is the current subgraph or the original with
+  // vertices we desire. This variable was "A_K" in the original paper.
+  subgraph_ = graph;
+    
+  // The paper calls for the "adjacency structure (i.e., graph) of
+  // strong (-ly connected) component K with least vertex in subgraph
+  // induced by {s, s + 1, ..., n}".
+  // We arbitrarily order each vertex by its index in the graph. Thus,
+  // each iteration, we are looking at the subgraph {s, s + 1, ..., n}
+  // and looking for the strongly connected component with vertex s.
+
+  TarjanAlgorithm tarjan;
+    
+  for (Graph::size_type i = 0; i < subgraph_.size(); i++) {
+    if (i > 0) {
+      // Erase node (i - 1) from subgraph_. First, erase what it points to
+      subgraph_[i - 1].out_edges.clear();
+      // Now, erase any pointers to node (i - 1)
+      for (Graph::size_type j = i; j < subgraph_.size(); j++) {
+        subgraph_[j].out_edges.erase(i - 1);
+      }
+    }
+
+    // Calculate SCC (strongly connected component) with vertex i.
+    vector<Vertex::Index> component_indexes;
+    tarjan.Execute(i, &subgraph_, &component_indexes);
+
+    // Set subgraph edges for the components in the SCC.
+    for (vector<Vertex::Index>::iterator it = component_indexes.begin();
+         it != component_indexes.end(); ++it) {
+      subgraph_[*it].subgraph_edges.clear();
+      for (vector<Vertex::Index>::iterator jt = component_indexes.begin();
+           jt != component_indexes.end(); ++jt) {
+        // If there's a link from *it -> *jt in the graph,
+        // add a subgraph_ edge
+        if (utils::MapContainsKey(subgraph_[*it].out_edges, *jt))
+          subgraph_[*it].subgraph_edges.insert(*jt);
+      }        
+    }
+
+    current_vertex_ = i;
+    blocked_.clear();
+    blocked_.resize(subgraph_.size());
+    blocked_graph_.clear();
+    blocked_graph_.resize(subgraph_.size());
+    Circuit(current_vertex_);
+  }
+  
+  out_cut_edges->swap(cut_edges_);
+  DCHECK(stack_.empty());
+}
+
+void CycleBreaker::HandleCircuit() {
+  stack_.push_back(current_vertex_);
+  CHECK_GE(stack_.size(), 2);
+  Edge min_edge = make_pair(stack_[0], stack_[1]);
+  uint64 min_edge_weight = kuint64max;
+  for (vector<Vertex::Index>::const_iterator it = stack_.begin();
+       it != (stack_.end() - 1); ++it) {
+    Edge edge = make_pair(*it, *(it + 1));
+    if (cut_edges_.find(edge) != cut_edges_.end()) {
+      stack_.pop_back();
+      return;
+    }
+    uint64 edge_weight = graph_utils::EdgeWeight(subgraph_, edge);
+    if (edge_weight < min_edge_weight) {
+      min_edge_weight = edge_weight;
+      min_edge = edge;
+    }
+  }
+  cut_edges_.insert(min_edge);
+  stack_.pop_back();
+}
+
+void CycleBreaker::Unblock(Vertex::Index u) {
+  blocked_[u] = false;
+
+  for (Vertex::EdgeMap::iterator it = blocked_graph_[u].out_edges.begin();
+       it != blocked_graph_[u].out_edges.end(); ) {
+    Vertex::Index w = it->first;
+    blocked_graph_[u].out_edges.erase(it++);
+    if (blocked_[w])
+      Unblock(w);
+  }
+}
+
+bool CycleBreaker::Circuit(Vertex::Index vertex) {
+  // "vertex" was "v" in the original paper.
+  bool found = false;  // Was "f" in the original paper.
+  stack_.push_back(vertex);
+  blocked_[vertex] = true;
+
+  for (Vertex::SubgraphEdgeMap::iterator w =
+           subgraph_[vertex].subgraph_edges.begin();
+       w != subgraph_[vertex].subgraph_edges.end(); ++w) {
+    if (*w == current_vertex_) {
+      // The original paper called for printing stack_ followed by
+      // current_vertex_ here, which is a cycle. Instead, we call
+      // HandleCircuit() to break it.
+      HandleCircuit();
+      found = true;
+    } else if (!blocked_[*w]) {
+      if (Circuit(*w))
+        found = true;
+    }
+  }
+
+  if (found) {
+    Unblock(vertex);
+  } else {
+    for (Vertex::SubgraphEdgeMap::iterator w =
+             subgraph_[vertex].subgraph_edges.begin();
+         w != subgraph_[vertex].subgraph_edges.end(); ++w) {
+      subgraph_[*w].subgraph_edges.insert(vertex);
+    }
+  }
+  CHECK_EQ(vertex, stack_.back());
+  stack_.pop_back();
+  return found;
+}
+
+}  // namespace chromeos_update_engine
diff --git a/cycle_breaker.h b/cycle_breaker.h
new file mode 100644
index 0000000..e9bb1e4
--- /dev/null
+++ b/cycle_breaker.h
@@ -0,0 +1,50 @@
+// Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROMEOS_PLATFORM_UPDATE_ENGINE_CYCLE_BREAKER_H__
+#define CHROMEOS_PLATFORM_UPDATE_ENGINE_CYCLE_BREAKER_H__
+
+// This is a modified implementation of Donald B. Johnson's algorithm for
+// finding all elementary cycles (a.k.a. circuits) in a directed graph.
+// See the paper "Finding All the Elementary Circuits of a Directed Graph"
+// at http://dutta.csc.ncsu.edu/csc791_spring07/wrap/circuits_johnson.pdf
+// for reference.
+
+// Note: this version of the algorithm not only finds cycles, but breaks them.
+// It uses a simple greedy algorithm for cutting: when a cycle is discovered,
+// the edge with the least weight is cut. Longer term we may wish to do
+// something more intelligent, since the goal is (ideally) to minimize the
+// sum of the weights of all cut cycles. In practice, it's intractable
+// to consider all cycles before cutting any; there are simply too many.
+// In a sample graph representative of a typical workload, I found over
+// 5 * 10^15 cycles.
+
+#include <set>
+#include <vector>
+#include "update_engine/graph_types.h"
+
+namespace chromeos_update_engine {
+
+class CycleBreaker {
+ public:
+  // out_cut_edges is replaced with the cut edges.
+  void BreakCycles(const Graph& graph, std::set<Edge>* out_cut_edges);
+
+ private:
+  void HandleCircuit();
+  void Unblock(Vertex::Index u);
+  bool Circuit(Vertex::Index vertex);
+
+  std::vector<bool> blocked_;  // "blocked" in the paper
+  Vertex::Index current_vertex_;  // "s" in the paper
+  std::vector<Vertex::Index> stack_;  // the stack variable in the paper
+  Graph subgraph_;  // "A_K" in the paper
+  Graph blocked_graph_;  // "B" in the paper
+
+  std::set<Edge> cut_edges_;
+};
+
+}  // namespace chromeos_update_engine
+
+#endif  // CHROMEOS_PLATFORM_UPDATE_ENGINE_CYCLE_BREAKER_H__
diff --git a/cycle_breaker_unittest.cc b/cycle_breaker_unittest.cc
new file mode 100644
index 0000000..ec7f8a3
--- /dev/null
+++ b/cycle_breaker_unittest.cc
@@ -0,0 +1,131 @@
+// Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+#include <gtest/gtest.h>
+#include "chromeos/obsolete_logging.h"
+#include "update_engine/cycle_breaker.h"
+#include "update_engine/graph_types.h"
+#include "update_engine/utils.h"
+
+using std::make_pair;
+using std::pair;
+using std::set;
+using std::string;
+using std::vector;
+
+namespace chromeos_update_engine {
+
+class CycleBreakerTest : public ::testing::Test {};
+
+TEST(CycleBreakerTest, SimpleTest) {
+  int counter = 0;
+  const Vertex::Index n_a = counter++;
+  const Vertex::Index n_b = counter++;
+  const Vertex::Index n_c = counter++;
+  const Vertex::Index n_d = counter++;
+  const Vertex::Index n_e = counter++;
+  const Vertex::Index n_f = counter++;
+  const Vertex::Index n_g = counter++;
+  const Vertex::Index n_h = counter++;
+  const Graph::size_type kNodeCount = counter++;
+
+  Graph graph(kNodeCount);
+
+  graph[n_a].out_edges.insert(make_pair(n_e, EdgeProperties()));
+  graph[n_a].out_edges.insert(make_pair(n_f, EdgeProperties()));
+  graph[n_b].out_edges.insert(make_pair(n_a, EdgeProperties()));
+  graph[n_c].out_edges.insert(make_pair(n_d, EdgeProperties()));
+  graph[n_d].out_edges.insert(make_pair(n_e, EdgeProperties()));
+  graph[n_d].out_edges.insert(make_pair(n_f, EdgeProperties()));
+  graph[n_e].out_edges.insert(make_pair(n_b, EdgeProperties()));
+  graph[n_e].out_edges.insert(make_pair(n_c, EdgeProperties()));
+  graph[n_e].out_edges.insert(make_pair(n_f, EdgeProperties()));
+  graph[n_f].out_edges.insert(make_pair(n_g, EdgeProperties()));
+  graph[n_g].out_edges.insert(make_pair(n_h, EdgeProperties()));
+  graph[n_h].out_edges.insert(make_pair(n_g, EdgeProperties()));
+
+  CycleBreaker breaker;
+  
+  set<Edge> broken_edges;
+  breaker.BreakCycles(graph, &broken_edges);
+  
+  // The following cycles must be cut:
+  // A->E->B
+  // C->D->E
+  // G->H
+
+  EXPECT_TRUE(utils::SetContainsKey(broken_edges, make_pair(n_a, n_e)) ||
+              utils::SetContainsKey(broken_edges, make_pair(n_e, n_b)) ||
+              utils::SetContainsKey(broken_edges, make_pair(n_b, n_a)));
+  EXPECT_TRUE(utils::SetContainsKey(broken_edges, make_pair(n_c, n_d)) ||
+              utils::SetContainsKey(broken_edges, make_pair(n_d, n_e)) ||
+              utils::SetContainsKey(broken_edges, make_pair(n_e, n_c)));
+  EXPECT_TRUE(utils::SetContainsKey(broken_edges, make_pair(n_g, n_h)) ||
+              utils::SetContainsKey(broken_edges, make_pair(n_h, n_g)));
+  EXPECT_EQ(3, broken_edges.size());
+}
+
+namespace {
+pair<Vertex::Index, EdgeProperties> EdgeWithWeight(Vertex::Index dest,
+uint64 weight) {
+  EdgeProperties props;
+  props.extents.resize(1);
+  props.extents[0].set_num_blocks(weight);
+  return make_pair(dest, props);
+}
+}  // namespace {}
+
+TEST(CycleBreakerTest, WeightTest) {
+  int counter = 0;
+  const Vertex::Index n_a = counter++;
+  const Vertex::Index n_b = counter++;
+  const Vertex::Index n_c = counter++;
+  const Vertex::Index n_d = counter++;
+  const Vertex::Index n_e = counter++;
+  const Vertex::Index n_f = counter++;
+  const Vertex::Index n_g = counter++;
+  const Vertex::Index n_h = counter++;
+  const Vertex::Index n_i = counter++;
+  const Vertex::Index n_j = counter++;
+  const Graph::size_type kNodeCount = counter++;
+
+  Graph graph(kNodeCount);
+
+  graph[n_a].out_edges.insert(EdgeWithWeight(n_b, 4));
+  graph[n_a].out_edges.insert(EdgeWithWeight(n_f, 3));
+  graph[n_a].out_edges.insert(EdgeWithWeight(n_h, 2));
+  graph[n_b].out_edges.insert(EdgeWithWeight(n_a, 3));
+  graph[n_b].out_edges.insert(EdgeWithWeight(n_c, 4));
+  graph[n_c].out_edges.insert(EdgeWithWeight(n_b, 5));
+  graph[n_c].out_edges.insert(EdgeWithWeight(n_d, 3));
+  graph[n_d].out_edges.insert(EdgeWithWeight(n_a, 6));
+  graph[n_d].out_edges.insert(EdgeWithWeight(n_e, 3));
+  graph[n_e].out_edges.insert(EdgeWithWeight(n_d, 4));
+  graph[n_e].out_edges.insert(EdgeWithWeight(n_g, 5));
+  graph[n_f].out_edges.insert(EdgeWithWeight(n_g, 2));
+  graph[n_g].out_edges.insert(EdgeWithWeight(n_f, 3));
+  graph[n_g].out_edges.insert(EdgeWithWeight(n_d, 5));
+  graph[n_h].out_edges.insert(EdgeWithWeight(n_i, 8));
+  graph[n_i].out_edges.insert(EdgeWithWeight(n_e, 4));
+  graph[n_i].out_edges.insert(EdgeWithWeight(n_h, 9));
+  graph[n_i].out_edges.insert(EdgeWithWeight(n_j, 6));
+
+  CycleBreaker breaker;
+  
+  set<Edge> broken_edges;
+  breaker.BreakCycles(graph, &broken_edges);
+
+  // These are required to be broken:
+  EXPECT_TRUE(utils::SetContainsKey(broken_edges, make_pair(n_b, n_a)));
+  EXPECT_TRUE(utils::SetContainsKey(broken_edges, make_pair(n_b, n_c)));
+  EXPECT_TRUE(utils::SetContainsKey(broken_edges, make_pair(n_d, n_e)));
+  EXPECT_TRUE(utils::SetContainsKey(broken_edges, make_pair(n_f, n_g)));
+  EXPECT_TRUE(utils::SetContainsKey(broken_edges, make_pair(n_h, n_i)));
+}
+
+}  // namespace chromeos_update_engine