AU: Cycle Breaker for directed graphs.
A new class CycleBreaker that uses Johnson's elementary circuit
finding algorithm to find cycles in a directed graph. This class goes
beyond Johnson's algorithm to also break them using a simple greedy
algorithm.
Like Johnson's elementary circuit finding algorithm, this is not
intended to find cycles that contain only one node (i.e., a node that
points to itself).
Review URL: http://codereview.chromium.org/784001
diff --git a/SConstruct b/SConstruct
index 5bfbec7..f43c68f 100644
--- a/SConstruct
+++ b/SConstruct
@@ -86,6 +86,7 @@
sources = Split("""action_processor.cc
bzip_extent_writer.cc
+ cycle_breaker.cc
decompressing_file_writer.cc
delta_diff_parser.cc
download_action.cc
@@ -113,6 +114,7 @@
action_pipe_unittest.cc
action_processor_unittest.cc
bzip_extent_writer_unittest.cc
+ cycle_breaker_unittest.cc
decompressing_file_writer_unittest.cc
delta_diff_generator_unittest.cc
download_action_unittest.cc
diff --git a/cycle_breaker.cc b/cycle_breaker.cc
new file mode 100644
index 0000000..f6ed31f
--- /dev/null
+++ b/cycle_breaker.cc
@@ -0,0 +1,144 @@
+// Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "update_engine/cycle_breaker.h"
+#include <set>
+#include <utility>
+#include "update_engine/graph_utils.h"
+#include "update_engine/tarjan.h"
+#include "update_engine/utils.h"
+
+using std::make_pair;
+using std::set;
+using std::vector;
+
+namespace chromeos_update_engine {
+
+// This is the outer function from the original paper.
+void CycleBreaker::BreakCycles(const Graph& graph, set<Edge>* out_cut_edges) {
+ cut_edges_.clear();
+
+ // Make a copy, which we will modify by removing edges. Thus, in each
+ // iteration subgraph_ is the current subgraph or the original with
+ // vertices we desire. This variable was "A_K" in the original paper.
+ subgraph_ = graph;
+
+ // The paper calls for the "adjacency structure (i.e., graph) of
+ // strong (-ly connected) component K with least vertex in subgraph
+ // induced by {s, s + 1, ..., n}".
+ // We arbitrarily order each vertex by its index in the graph. Thus,
+ // each iteration, we are looking at the subgraph {s, s + 1, ..., n}
+ // and looking for the strongly connected component with vertex s.
+
+ TarjanAlgorithm tarjan;
+
+ for (Graph::size_type i = 0; i < subgraph_.size(); i++) {
+ if (i > 0) {
+ // Erase node (i - 1) from subgraph_. First, erase what it points to
+ subgraph_[i - 1].out_edges.clear();
+ // Now, erase any pointers to node (i - 1)
+ for (Graph::size_type j = i; j < subgraph_.size(); j++) {
+ subgraph_[j].out_edges.erase(i - 1);
+ }
+ }
+
+ // Calculate SCC (strongly connected component) with vertex i.
+ vector<Vertex::Index> component_indexes;
+ tarjan.Execute(i, &subgraph_, &component_indexes);
+
+ // Set subgraph edges for the components in the SCC.
+ for (vector<Vertex::Index>::iterator it = component_indexes.begin();
+ it != component_indexes.end(); ++it) {
+ subgraph_[*it].subgraph_edges.clear();
+ for (vector<Vertex::Index>::iterator jt = component_indexes.begin();
+ jt != component_indexes.end(); ++jt) {
+ // If there's a link from *it -> *jt in the graph,
+ // add a subgraph_ edge
+ if (utils::MapContainsKey(subgraph_[*it].out_edges, *jt))
+ subgraph_[*it].subgraph_edges.insert(*jt);
+ }
+ }
+
+ current_vertex_ = i;
+ blocked_.clear();
+ blocked_.resize(subgraph_.size());
+ blocked_graph_.clear();
+ blocked_graph_.resize(subgraph_.size());
+ Circuit(current_vertex_);
+ }
+
+ out_cut_edges->swap(cut_edges_);
+ DCHECK(stack_.empty());
+}
+
+void CycleBreaker::HandleCircuit() {
+ stack_.push_back(current_vertex_);
+ CHECK_GE(stack_.size(), 2);
+ Edge min_edge = make_pair(stack_[0], stack_[1]);
+ uint64 min_edge_weight = kuint64max;
+ for (vector<Vertex::Index>::const_iterator it = stack_.begin();
+ it != (stack_.end() - 1); ++it) {
+ Edge edge = make_pair(*it, *(it + 1));
+ if (cut_edges_.find(edge) != cut_edges_.end()) {
+ stack_.pop_back();
+ return;
+ }
+ uint64 edge_weight = graph_utils::EdgeWeight(subgraph_, edge);
+ if (edge_weight < min_edge_weight) {
+ min_edge_weight = edge_weight;
+ min_edge = edge;
+ }
+ }
+ cut_edges_.insert(min_edge);
+ stack_.pop_back();
+}
+
+void CycleBreaker::Unblock(Vertex::Index u) {
+ blocked_[u] = false;
+
+ for (Vertex::EdgeMap::iterator it = blocked_graph_[u].out_edges.begin();
+ it != blocked_graph_[u].out_edges.end(); ) {
+ Vertex::Index w = it->first;
+ blocked_graph_[u].out_edges.erase(it++);
+ if (blocked_[w])
+ Unblock(w);
+ }
+}
+
+bool CycleBreaker::Circuit(Vertex::Index vertex) {
+ // "vertex" was "v" in the original paper.
+ bool found = false; // Was "f" in the original paper.
+ stack_.push_back(vertex);
+ blocked_[vertex] = true;
+
+ for (Vertex::SubgraphEdgeMap::iterator w =
+ subgraph_[vertex].subgraph_edges.begin();
+ w != subgraph_[vertex].subgraph_edges.end(); ++w) {
+ if (*w == current_vertex_) {
+ // The original paper called for printing stack_ followed by
+ // current_vertex_ here, which is a cycle. Instead, we call
+ // HandleCircuit() to break it.
+ HandleCircuit();
+ found = true;
+ } else if (!blocked_[*w]) {
+ if (Circuit(*w))
+ found = true;
+ }
+ }
+
+ if (found) {
+ Unblock(vertex);
+ } else {
+ for (Vertex::SubgraphEdgeMap::iterator w =
+ subgraph_[vertex].subgraph_edges.begin();
+ w != subgraph_[vertex].subgraph_edges.end(); ++w) {
+ subgraph_[*w].subgraph_edges.insert(vertex);
+ }
+ }
+ CHECK_EQ(vertex, stack_.back());
+ stack_.pop_back();
+ return found;
+}
+
+} // namespace chromeos_update_engine
diff --git a/cycle_breaker.h b/cycle_breaker.h
new file mode 100644
index 0000000..e9bb1e4
--- /dev/null
+++ b/cycle_breaker.h
@@ -0,0 +1,50 @@
+// Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROMEOS_PLATFORM_UPDATE_ENGINE_CYCLE_BREAKER_H__
+#define CHROMEOS_PLATFORM_UPDATE_ENGINE_CYCLE_BREAKER_H__
+
+// This is a modified implementation of Donald B. Johnson's algorithm for
+// finding all elementary cycles (a.k.a. circuits) in a directed graph.
+// See the paper "Finding All the Elementary Circuits of a Directed Graph"
+// at http://dutta.csc.ncsu.edu/csc791_spring07/wrap/circuits_johnson.pdf
+// for reference.
+
+// Note: this version of the algorithm not only finds cycles, but breaks them.
+// It uses a simple greedy algorithm for cutting: when a cycle is discovered,
+// the edge with the least weight is cut. Longer term we may wish to do
+// something more intelligent, since the goal is (ideally) to minimize the
+// sum of the weights of all cut cycles. In practice, it's intractable
+// to consider all cycles before cutting any; there are simply too many.
+// In a sample graph representative of a typical workload, I found over
+// 5 * 10^15 cycles.
+
+#include <set>
+#include <vector>
+#include "update_engine/graph_types.h"
+
+namespace chromeos_update_engine {
+
+class CycleBreaker {
+ public:
+ // out_cut_edges is replaced with the cut edges.
+ void BreakCycles(const Graph& graph, std::set<Edge>* out_cut_edges);
+
+ private:
+ void HandleCircuit();
+ void Unblock(Vertex::Index u);
+ bool Circuit(Vertex::Index vertex);
+
+ std::vector<bool> blocked_; // "blocked" in the paper
+ Vertex::Index current_vertex_; // "s" in the paper
+ std::vector<Vertex::Index> stack_; // the stack variable in the paper
+ Graph subgraph_; // "A_K" in the paper
+ Graph blocked_graph_; // "B" in the paper
+
+ std::set<Edge> cut_edges_;
+};
+
+} // namespace chromeos_update_engine
+
+#endif // CHROMEOS_PLATFORM_UPDATE_ENGINE_CYCLE_BREAKER_H__
diff --git a/cycle_breaker_unittest.cc b/cycle_breaker_unittest.cc
new file mode 100644
index 0000000..ec7f8a3
--- /dev/null
+++ b/cycle_breaker_unittest.cc
@@ -0,0 +1,131 @@
+// Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+#include <gtest/gtest.h>
+#include "chromeos/obsolete_logging.h"
+#include "update_engine/cycle_breaker.h"
+#include "update_engine/graph_types.h"
+#include "update_engine/utils.h"
+
+using std::make_pair;
+using std::pair;
+using std::set;
+using std::string;
+using std::vector;
+
+namespace chromeos_update_engine {
+
+class CycleBreakerTest : public ::testing::Test {};
+
+TEST(CycleBreakerTest, SimpleTest) {
+ int counter = 0;
+ const Vertex::Index n_a = counter++;
+ const Vertex::Index n_b = counter++;
+ const Vertex::Index n_c = counter++;
+ const Vertex::Index n_d = counter++;
+ const Vertex::Index n_e = counter++;
+ const Vertex::Index n_f = counter++;
+ const Vertex::Index n_g = counter++;
+ const Vertex::Index n_h = counter++;
+ const Graph::size_type kNodeCount = counter++;
+
+ Graph graph(kNodeCount);
+
+ graph[n_a].out_edges.insert(make_pair(n_e, EdgeProperties()));
+ graph[n_a].out_edges.insert(make_pair(n_f, EdgeProperties()));
+ graph[n_b].out_edges.insert(make_pair(n_a, EdgeProperties()));
+ graph[n_c].out_edges.insert(make_pair(n_d, EdgeProperties()));
+ graph[n_d].out_edges.insert(make_pair(n_e, EdgeProperties()));
+ graph[n_d].out_edges.insert(make_pair(n_f, EdgeProperties()));
+ graph[n_e].out_edges.insert(make_pair(n_b, EdgeProperties()));
+ graph[n_e].out_edges.insert(make_pair(n_c, EdgeProperties()));
+ graph[n_e].out_edges.insert(make_pair(n_f, EdgeProperties()));
+ graph[n_f].out_edges.insert(make_pair(n_g, EdgeProperties()));
+ graph[n_g].out_edges.insert(make_pair(n_h, EdgeProperties()));
+ graph[n_h].out_edges.insert(make_pair(n_g, EdgeProperties()));
+
+ CycleBreaker breaker;
+
+ set<Edge> broken_edges;
+ breaker.BreakCycles(graph, &broken_edges);
+
+ // The following cycles must be cut:
+ // A->E->B
+ // C->D->E
+ // G->H
+
+ EXPECT_TRUE(utils::SetContainsKey(broken_edges, make_pair(n_a, n_e)) ||
+ utils::SetContainsKey(broken_edges, make_pair(n_e, n_b)) ||
+ utils::SetContainsKey(broken_edges, make_pair(n_b, n_a)));
+ EXPECT_TRUE(utils::SetContainsKey(broken_edges, make_pair(n_c, n_d)) ||
+ utils::SetContainsKey(broken_edges, make_pair(n_d, n_e)) ||
+ utils::SetContainsKey(broken_edges, make_pair(n_e, n_c)));
+ EXPECT_TRUE(utils::SetContainsKey(broken_edges, make_pair(n_g, n_h)) ||
+ utils::SetContainsKey(broken_edges, make_pair(n_h, n_g)));
+ EXPECT_EQ(3, broken_edges.size());
+}
+
+namespace {
+pair<Vertex::Index, EdgeProperties> EdgeWithWeight(Vertex::Index dest,
+uint64 weight) {
+ EdgeProperties props;
+ props.extents.resize(1);
+ props.extents[0].set_num_blocks(weight);
+ return make_pair(dest, props);
+}
+} // namespace {}
+
+TEST(CycleBreakerTest, WeightTest) {
+ int counter = 0;
+ const Vertex::Index n_a = counter++;
+ const Vertex::Index n_b = counter++;
+ const Vertex::Index n_c = counter++;
+ const Vertex::Index n_d = counter++;
+ const Vertex::Index n_e = counter++;
+ const Vertex::Index n_f = counter++;
+ const Vertex::Index n_g = counter++;
+ const Vertex::Index n_h = counter++;
+ const Vertex::Index n_i = counter++;
+ const Vertex::Index n_j = counter++;
+ const Graph::size_type kNodeCount = counter++;
+
+ Graph graph(kNodeCount);
+
+ graph[n_a].out_edges.insert(EdgeWithWeight(n_b, 4));
+ graph[n_a].out_edges.insert(EdgeWithWeight(n_f, 3));
+ graph[n_a].out_edges.insert(EdgeWithWeight(n_h, 2));
+ graph[n_b].out_edges.insert(EdgeWithWeight(n_a, 3));
+ graph[n_b].out_edges.insert(EdgeWithWeight(n_c, 4));
+ graph[n_c].out_edges.insert(EdgeWithWeight(n_b, 5));
+ graph[n_c].out_edges.insert(EdgeWithWeight(n_d, 3));
+ graph[n_d].out_edges.insert(EdgeWithWeight(n_a, 6));
+ graph[n_d].out_edges.insert(EdgeWithWeight(n_e, 3));
+ graph[n_e].out_edges.insert(EdgeWithWeight(n_d, 4));
+ graph[n_e].out_edges.insert(EdgeWithWeight(n_g, 5));
+ graph[n_f].out_edges.insert(EdgeWithWeight(n_g, 2));
+ graph[n_g].out_edges.insert(EdgeWithWeight(n_f, 3));
+ graph[n_g].out_edges.insert(EdgeWithWeight(n_d, 5));
+ graph[n_h].out_edges.insert(EdgeWithWeight(n_i, 8));
+ graph[n_i].out_edges.insert(EdgeWithWeight(n_e, 4));
+ graph[n_i].out_edges.insert(EdgeWithWeight(n_h, 9));
+ graph[n_i].out_edges.insert(EdgeWithWeight(n_j, 6));
+
+ CycleBreaker breaker;
+
+ set<Edge> broken_edges;
+ breaker.BreakCycles(graph, &broken_edges);
+
+ // These are required to be broken:
+ EXPECT_TRUE(utils::SetContainsKey(broken_edges, make_pair(n_b, n_a)));
+ EXPECT_TRUE(utils::SetContainsKey(broken_edges, make_pair(n_b, n_c)));
+ EXPECT_TRUE(utils::SetContainsKey(broken_edges, make_pair(n_d, n_e)));
+ EXPECT_TRUE(utils::SetContainsKey(broken_edges, make_pair(n_f, n_g)));
+ EXPECT_TRUE(utils::SetContainsKey(broken_edges, make_pair(n_h, n_i)));
+}
+
+} // namespace chromeos_update_engine