Add new script to analyze static/shared library usage

Parses module-info.json, gathers stats on how many times each library is
included shared or statically.

Can print a list of libraries that would be a candidate for changing
from static to shared or visa versa.

Test: m
Bug: 280829178
Change-Id: I4bbffbd673ab2e08c69d0ab6e68402be77c9ffbc
diff --git a/tools/find_static_candidates.py b/tools/find_static_candidates.py
new file mode 100644
index 0000000..7511b36
--- /dev/null
+++ b/tools/find_static_candidates.py
@@ -0,0 +1,232 @@
+#!/usr/bin/env python3
+
+"""Tool to find static libraries that maybe should be shared libraries and shared libraries that maybe should be static libraries.
+
+This tool only looks at the module-info.json for the current target.
+
+Example of "class" types for each of the modules in module-info.json
+  "EXECUTABLES": 2307,
+  "ETC": 9094,
+  "NATIVE_TESTS": 10461,
+  "APPS": 2885,
+  "JAVA_LIBRARIES": 5205,
+  "EXECUTABLES/JAVA_LIBRARIES": 119,
+  "FAKE": 553,
+  "SHARED_LIBRARIES/STATIC_LIBRARIES": 7591,
+  "STATIC_LIBRARIES": 11535,
+  "SHARED_LIBRARIES": 10852,
+  "HEADER_LIBRARIES": 1897,
+  "DYLIB_LIBRARIES": 1262,
+  "RLIB_LIBRARIES": 3413,
+  "ROBOLECTRIC": 39,
+  "PACKAGING": 5,
+  "PROC_MACRO_LIBRARIES": 36,
+  "RENDERSCRIPT_BITCODE": 17,
+  "DYLIB_LIBRARIES/RLIB_LIBRARIES": 8,
+  "ETC/FAKE": 1
+
+None of the "SHARED_LIBRARIES/STATIC_LIBRARIES" are double counted in the
+modules with one class
+RLIB/
+
+All of these classes have shared_libs and/or static_libs
+    "EXECUTABLES",
+    "SHARED_LIBRARIES",
+    "STATIC_LIBRARIES",
+    "SHARED_LIBRARIES/STATIC_LIBRARIES", # cc_library
+    "HEADER_LIBRARIES",
+    "NATIVE_TESTS", # test modules
+    "DYLIB_LIBRARIES", # rust
+    "RLIB_LIBRARIES", # rust
+    "ETC", # rust_bindgen
+"""
+
+from collections import defaultdict
+
+import json, os, argparse
+
+ANDROID_PRODUCT_OUT = os.environ.get("ANDROID_PRODUCT_OUT")
+# If a shared library is used less than MAX_SHARED_INCLUSIONS times in a target,
+# then it will likely save memory by changing it to a static library
+# This move will also use less storage
+MAX_SHARED_INCLUSIONS = 2
+# If a static library is used more than MAX_STATIC_INCLUSIONS times in a target,
+# then it will likely save memory by changing it to a shared library
+# This move will also likely use less storage
+MIN_STATIC_INCLUSIONS = 3
+
+
+def parse_args():
+  parser = argparse.ArgumentParser(
+      description=(
+          "Parse module-info.jso and display information about static and"
+          " shared library dependencies."
+      )
+  )
+  parser.add_argument(
+      "--module", dest="module", help="Print the info for the module."
+  )
+  parser.add_argument(
+      "--shared",
+      dest="print_shared",
+      action=argparse.BooleanOptionalAction,
+      help=(
+          "Print the list of libraries that are shared_libs for fewer than {}"
+          " modules.".format(MAX_SHARED_INCLUSIONS)
+      ),
+  )
+  parser.add_argument(
+      "--static",
+      dest="print_static",
+      action=argparse.BooleanOptionalAction,
+      help=(
+          "Print the list of libraries that are static_libs for more than {}"
+          " modules.".format(MIN_STATIC_INCLUSIONS)
+      ),
+  )
+  parser.add_argument(
+      "--recursive",
+      dest="recursive",
+      action=argparse.BooleanOptionalAction,
+      default=True,
+      help=(
+          "Gather all dependencies of EXECUTABLES recursvily before calculating"
+          " the stats. This eliminates duplicates from multiple libraries"
+          " including the same dependencies in a single binary."
+      ),
+  )
+  parser.add_argument(
+      "--both",
+      dest="both",
+      action=argparse.BooleanOptionalAction,
+      default=False,
+      help=(
+          "Print a list of libraries that are including libraries as both"
+          " static and shared"
+      ),
+  )
+  return parser.parse_args()
+
+
+class TransitiveHelper:
+
+  def __init__(self):
+    # keep a list of already expanded libraries so we don't end up in a cycle
+    self.visited = defaultdict(lambda: defaultdict(set))
+
+  # module is an object from the module-info dictionary
+  # module_info is the dictionary from module-info.json
+  # modify the module's shared_libs and static_libs with all of the transient
+  # dependencies required from all of the explicit dependencies
+  def flattenDeps(self, module, module_info):
+    libs_snapshot = dict(shared_libs = set(module["shared_libs"]), static_libs = set(module["static_libs"]))
+
+    for lib_class in ["shared_libs", "static_libs"]:
+      for lib in libs_snapshot[lib_class]:
+        if not lib or lib not in module_info:
+          continue
+        if lib in self.visited:
+          module[lib_class].update(self.visited[lib][lib_class])
+        else:
+          res = self.flattenDeps(module_info[lib], module_info)
+          module[lib_class].update(res[lib_class])
+          self.visited[lib][lib_class].update(res[lib_class])
+
+    return module
+
+def main():
+  module_info = json.load(open(ANDROID_PRODUCT_OUT + "/module-info.json"))
+  # turn all of the static_libs and shared_libs lists into sets to make them
+  # easier to update
+  for _, module in module_info.items():
+    module["shared_libs"] = set(module["shared_libs"])
+    module["static_libs"] = set(module["static_libs"])
+
+  args = parse_args()
+
+  if args.module:
+    if args.module not in module_info:
+      print("Module {} does not exist".format(args.module))
+      exit(1)
+
+  includedStatically = defaultdict(set)
+  includedSharedly = defaultdict(set)
+  includedBothly = defaultdict(set)
+  transitive = TransitiveHelper()
+  for name, module in module_info.items():
+    if args.recursive:
+      # in this recursive mode we only want to see what is included by the executables
+      if "EXECUTABLES" not in module["class"]:
+        continue
+      module = transitive.flattenDeps(module, module_info)
+      # filter out fuzzers by their dependency on clang
+      if "libclang_rt.fuzzer" in module["static_libs"]:
+        continue
+    else:
+      if "NATIVE_TESTS" in module["class"]:
+        # We don't care about how tests are including libraries
+        continue
+
+    # count all of the shared and static libs included in this module
+    for lib in module["shared_libs"]:
+      includedSharedly[lib].add(name)
+    for lib in module["static_libs"]:
+      includedStatically[lib].add(name)
+
+    intersection = set(module["shared_libs"]).intersection(
+        module["static_libs"]
+    )
+    if intersection:
+      includedBothly[name] = intersection
+
+  if args.print_shared:
+    print(
+        "Shared libraries that are included by fewer than {} modules on a"
+        " device:".format(MAX_SHARED_INCLUSIONS)
+    )
+    for name, libs in includedSharedly.items():
+      if len(libs) < MAX_SHARED_INCLUSIONS:
+        print("{}: {} included by: {}".format(name, len(libs), libs))
+
+  if args.print_static:
+    print(
+        "Libraries that are included statically by more than {} modules on a"
+        " device:".format(MIN_STATIC_INCLUSIONS)
+    )
+    for name, libs in includedStatically.items():
+      if len(libs) > MIN_STATIC_INCLUSIONS:
+        print("{}: {} included by: {}".format(name, len(libs), libs))
+
+  if args.both:
+    allIncludedBothly = set()
+    for name, libs in includedBothly.items():
+      allIncludedBothly.update(libs)
+
+    print(
+        "List of libraries used both statically and shared in the same"
+        " processes:\n {}\n\n".format("\n".join(sorted(allIncludedBothly)))
+    )
+    print(
+        "List of libraries used both statically and shared in any processes:\n {}".format("\n".join(sorted(includedStatically.keys() & includedSharedly.keys()))))
+
+  if args.module:
+    print(json.dumps(module_info[args.module], default=list, indent=2))
+    print(
+        "{} is included in shared_libs {} times by these modules: {}".format(
+            args.module, len(includedSharedly[args.module]),
+            includedSharedly[args.module]
+        )
+    )
+    print(
+        "{} is included in static_libs {} times by these modules: {}".format(
+            args.module, len(includedStatically[args.module]),
+            includedStatically[args.module]
+        )
+    )
+    print("Shared libs included by this module that are used in fewer than {} processes:\n{}".format(
+        MAX_SHARED_INCLUSIONS, [x for x in module_info[args.module]["shared_libs"] if len(includedSharedly[x]) < MAX_SHARED_INCLUSIONS]))
+
+
+
+if __name__ == "__main__":
+  main()