Improve cycle breaking heuristic of merge sequence algorithm

Tested on two different pixel targets, A and B.

Pixel A did not show any change in COW size.

Data for pixel B:

1. OTA size: 52MB, COW size: increase by 0.16%, 325.49MB to 325.99MB
2. OTA size: 143.94MB, COW size: decrease by 71%, 1.52GB to 438MB
3. OTA size: 210MB, COW size: decrease by 59%, 1.82GB to 733MB
4. OTA size: 425MB, COW size: decrase by 30%, 1.55GB to 1.0GB

Test: th
Bug: 289236484
Change-Id: Id99d019c8688ce3d0bd4197330b177b7b3cfd198
diff --git a/payload_generator/merge_sequence_generator.cc b/payload_generator/merge_sequence_generator.cc
index 037021f..fb43bfd 100644
--- a/payload_generator/merge_sequence_generator.cc
+++ b/payload_generator/merge_sequence_generator.cc
@@ -196,6 +196,105 @@
       new MergeSequenceGenerator(sequence, partition_name));
 }
 
+template <typename T>
+CowMergeOperation MaxOutDegree(
+    const T& nodes,
+    const std::map<CowMergeOperation, std::set<CowMergeOperation>>&
+        merge_after) {
+  // Rationale for this algorithm:
+  // We only need to remove nodes from the graph if the graph contains a cycle.
+  // Any graph of N nodes has cycle iff number of edges >= N.
+  // So, to restore the graph back to an acyclic state, we need to keep removing
+  // edges until we have <N edges left. To minimize the number of nodes removed,
+  // we always remove the node with maximum out degree.
+  CowMergeOperation best;
+  size_t max_out_degree = 0;
+  const auto has_xor =
+      std::any_of(nodes.begin(),
+                  nodes.end(),
+                  [&merge_after](const CowMergeOperation& node) {
+                    if (node.type() != CowMergeOperation::COW_XOR) {
+                      return false;
+                    }
+                    auto it = merge_after.find(node);
+                    if (it == merge_after.end()) {
+                      return false;
+                    }
+                    return it->second.size() > 0;
+                  });
+  for (const auto& op : nodes) {
+    if (has_xor && op.type() != CowMergeOperation::COW_XOR) {
+      continue;
+    }
+    const auto out_degree = merge_after.at(op).size();
+    if (out_degree > max_out_degree) {
+      best = op;
+      max_out_degree = out_degree;
+    } else if (out_degree == max_out_degree) {
+      if (op.src_extent().num_blocks() < best.src_extent().num_blocks()) {
+        best = op;
+      }
+    }
+  }
+  CHECK_NE(max_out_degree, 0UL);
+  return best;
+}
+
+template <typename T>
+struct MapKeyIterator {
+  MapKeyIterator<T>& operator++() {
+    ++it;
+    return *this;
+  }
+  MapKeyIterator<T>& operator--() {
+    --it;
+    return *this;
+  }
+  bool operator==(const MapKeyIterator<T>& rhs) const { return it == rhs.it; }
+  bool operator!=(const MapKeyIterator<T>& rhs) const { return it != rhs.it; }
+  auto&& operator->() const { return it->first; }
+  auto&& operator*() const { return it->first; }
+  T it;
+};
+
+template <typename T, typename U>
+struct MapKeyRange {
+  auto begin() const {
+    return MapKeyIterator<typename std::map<T, U>::const_iterator>{map.begin()};
+  }
+  auto end() const {
+    return MapKeyIterator<typename std::map<T, U>::const_iterator>{map.end()};
+  }
+  std::map<T, U> map;
+};
+
+CowMergeOperation MaxOutDegree(
+    const std::map<CowMergeOperation, int>& incoming_edges,
+    const std::map<CowMergeOperation, std::set<CowMergeOperation>>&
+        merge_after) {
+  return MaxOutDegree(MapKeyRange<CowMergeOperation, int>{incoming_edges},
+                      merge_after);
+}
+
+// Given a potentially cyclic graph, return a node to remove to break cycles
+// |incoming_edges| stores nodes' in degree, and |merge_after| is an outgoing
+// edge list. For example, |merge_after[a]| returns all nodes which `a` has an
+// out going edge to.
+// The only requirement of this function is to return a node which is in
+// |incoming_edges| . As long as this is satisfied, merge sequence generation
+// will work. Caller will keep removing nodes returned by this function until
+// the graph has no cycles. However, the choice of which node to remove can
+// greatly impact COW sizes. Nodes removed from the graph will be converted to a
+// COW_REPLACE operation, taking more disk space. So this function should try to
+// pick a node which minimizes number of nodes we have to remove. (Modulo the
+// weight of each node, which is how many blocks a CowMergeOperation touches)
+CowMergeOperation PickConvertToRaw(
+    const std::map<CowMergeOperation, int>& incoming_edges,
+    const std::map<CowMergeOperation, std::set<CowMergeOperation>>&
+        merge_after) {
+  return MaxOutDegree(incoming_edges, merge_after);
+}
+
 std::map<CowMergeOperation, std::set<CowMergeOperation>>
 MergeSequenceGenerator::FindDependency(
     const std::vector<CowMergeOperation>& operations) {
@@ -282,7 +381,11 @@
       merge_sequence.insert(
           merge_sequence.end(), free_operations.begin(), free_operations.end());
     } else {
-      auto to_convert = incoming_edges.begin()->first;
+      auto to_convert = PickConvertToRaw(incoming_edges, merge_after_);
+      // The operation we pick must be one of the nodes not already in merge
+      // sequence.
+      CHECK(incoming_edges.find(to_convert) != incoming_edges.end());
+
       free_operations.insert(to_convert);
       convert_to_raw.insert(to_convert);
       LOG(INFO) << "Converting operation to raw " << to_convert;