Add tests for verify_overlaps script

Refactor verify_overlaps to make it testable and add tests for the
comparison. It does make one significant change in behavior which is to
read each of the files produced by a bootclasspath_fragment into a dict
before comparison, rather than reading and comparing them a row at a
time. That allows it to reuse the code to read a CSV into a dict.

Bug: 194063708
Test: atest --host verify_overlaps_test
      m out/soong/hiddenapi/hiddenapi-flags.csv
      - manually change files to cause difference in flags to check
        that it detects the differences.
Change-Id: Ib70ac87fe089fc25e3bef18f367d4939bfc0cb8d
diff --git a/scripts/hiddenapi/Android.bp b/scripts/hiddenapi/Android.bp
index c50dc24..c7298a0 100644
--- a/scripts/hiddenapi/Android.bp
+++ b/scripts/hiddenapi/Android.bp
@@ -83,3 +83,24 @@
         },
     },
 }
+
+python_test_host {
+    name: "verify_overlaps_test",
+    main: "verify_overlaps_test.py",
+    srcs: [
+        "verify_overlaps.py",
+        "verify_overlaps_test.py",
+    ],
+    version: {
+        py2: {
+            enabled: false,
+        },
+        py3: {
+            enabled: true,
+            embedded_launcher: true,
+        },
+    },
+    test_options: {
+        unit_test: true,
+    },
+}
diff --git a/scripts/hiddenapi/verify_overlaps.py b/scripts/hiddenapi/verify_overlaps.py
index bb0917e..e24995e 100755
--- a/scripts/hiddenapi/verify_overlaps.py
+++ b/scripts/hiddenapi/verify_overlaps.py
@@ -20,50 +20,82 @@
 import argparse
 import csv
 
-args_parser = argparse.ArgumentParser(description='Verify that one set of hidden API flags is a subset of another.')
-args_parser.add_argument('all', help='All the flags')
-args_parser.add_argument('subsets', nargs=argparse.REMAINDER, help='Subsets of the flags')
-args = args_parser.parse_args()
-
-
 def dict_reader(input):
     return csv.DictReader(input, delimiter=',', quotechar='|', fieldnames=['signature'])
 
-# Read in all the flags into a dict indexed by signature
-allFlagsBySignature = {}
-with open(args.all, 'r') as allFlagsFile:
-    allFlagsReader = dict_reader(allFlagsFile)
-    for row in allFlagsReader:
+def read_signature_csv_from_stream_as_dict(stream):
+    """
+    Read the csv contents from the stream into a dict. The first column is assumed to be the
+    signature and used as the key. The whole row is stored as the value.
+
+    :param stream: the csv contents to read
+    :return: the dict from signature to row.
+    """
+    dict = {}
+    reader = dict_reader(stream)
+    for row in reader:
         signature = row['signature']
-        allFlagsBySignature[signature]=row
+        dict[signature] = row
+    return dict
 
-failed = False
-for subsetPath in args.subsets:
+def read_signature_csv_from_file_as_dict(csvFile):
+    """
+    Read the csvFile into a dict. The first column is assumed to be the
+    signature and used as the key. The whole row is stored as the value.
+
+    :param csvFile: the csv file to read
+    :return: the dict from signature to row.
+    """
+    with open(csvFile, 'r') as f:
+        return read_signature_csv_from_stream_as_dict(f)
+
+def compare_signature_flags(monolithicFlagsDict, modularFlagsDict):
+    """
+    Compare the signature flags between the two dicts.
+
+    :param monolithicFlagsDict: the dict containing the subset of the monolithic
+    flags that should be equal to the modular flags.
+    :param modularFlagsDict:the dict containing the flags produced by a single
+    bootclasspath_fragment module.
+    :return: list of mismatches., each mismatch is a tuple where the first item
+    is the signature, and the second and third items are lists of the flags from
+    modular dict, and monolithic dict respectively.
+    """
     mismatchingSignatures = []
-    with open(subsetPath, 'r') as subsetFlagsFile:
-        subsetReader = dict_reader(subsetFlagsFile)
-        for row in subsetReader:
-            signature = row['signature']
-            if signature in allFlagsBySignature:
-                allFlags = allFlagsBySignature.get(signature)
-                if allFlags != row:
-                    mismatchingSignatures.append((signature, row.get(None, []), allFlags.get(None, [])))
-            else:
-                mismatchingSignatures.append((signature, row.get(None, []), []))
+    for signature, modularRow in modularFlagsDict.items():
+        modularFlags = modularRow.get(None, [])
+        monolithicRow = monolithicFlagsDict.get(signature, {})
+        monolithicFlags = monolithicRow.get(None, [])
+        if monolithicFlags != modularFlags:
+            mismatchingSignatures.append((signature, modularFlags, monolithicFlags))
+    return mismatchingSignatures
 
+def main(argv):
+    args_parser = argparse.ArgumentParser(description='Verify that one set of hidden API flags is a subset of another.')
+    args_parser.add_argument('all', help='All the flags')
+    args_parser.add_argument('subsets', nargs=argparse.REMAINDER, help='Subsets of the flags')
+    args = args_parser.parse_args(argv[1:])
 
-    if mismatchingSignatures:
-        failed = True
-        print("ERROR: Hidden API flags are inconsistent:")
-        print("< " + subsetPath)
-        print("> " + args.all)
-        for mismatch in mismatchingSignatures:
-            print()
-            print("< " + mismatch[0] + "," + ",".join(mismatch[1]))
-            if mismatch[2] != []:
-                print("> " + mismatch[0] + "," + ",".join(mismatch[2]))
-            else:
-                print("> " + mismatch[0] + " - missing")
+    # Read in all the flags into a dict indexed by signature
+    allFlagsBySignature = read_signature_csv_from_file_as_dict(args.all)
 
-if failed:
-    sys.exit(1)
+    failed = False
+    for subsetPath in args.subsets:
+        subsetDict = read_signature_csv_from_file_as_dict(subsetPath)
+        mismatchingSignatures = compare_signature_flags(allFlagsBySignature, subsetDict)
+        if mismatchingSignatures:
+            failed = True
+            print("ERROR: Hidden API flags are inconsistent:")
+            print("< " + subsetPath)
+            print("> " + args.all)
+            for mismatch in mismatchingSignatures:
+                signature = mismatch[0]
+                print()
+                print("< " + ",".join([signature]+ mismatch[1]))
+                print("> " + ",".join([signature]+ mismatch[2]))
+
+    if failed:
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main(sys.argv)
diff --git a/scripts/hiddenapi/verify_overlaps_test.py b/scripts/hiddenapi/verify_overlaps_test.py
new file mode 100755
index 0000000..1248890
--- /dev/null
+++ b/scripts/hiddenapi/verify_overlaps_test.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2021 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Unit tests for verify_overlaps_test.py."""
+import io
+import unittest
+
+from verify_overlaps import *
+
+class TestDetectOverlaps(unittest.TestCase):
+
+    def read_signature_csv_from_string_as_dict(self, csv):
+        with io.StringIO(csv) as f:
+            return read_signature_csv_from_stream_as_dict(f)
+
+    def test_match(self):
+        monolithic = self.read_signature_csv_from_string_as_dict('''
+Ljava/lang/Object;->hashCode()I,public-api,system-api,test-api
+Ljava/lang/Object;->toString()Ljava/lang/String;,blocked
+''')
+        modular = self.read_signature_csv_from_string_as_dict('''
+Ljava/lang/Object;->hashCode()I,public-api,system-api,test-api
+''')
+        mismatches = compare_signature_flags(monolithic, modular)
+        expected = []
+        self.assertEqual(expected, mismatches)
+
+    def test_mismatch_overlapping_flags(self):
+        monolithic = self.read_signature_csv_from_string_as_dict('''
+Ljava/lang/Object;->toString()Ljava/lang/String;,public-api
+''')
+        modular = self.read_signature_csv_from_string_as_dict('''
+Ljava/lang/Object;->toString()Ljava/lang/String;,public-api,system-api,test-api
+''')
+        mismatches = compare_signature_flags(monolithic, modular)
+        expected = [
+            (
+                'Ljava/lang/Object;->toString()Ljava/lang/String;',
+                ['public-api', 'system-api', 'test-api'],
+                ['public-api'],
+            ),
+        ]
+        self.assertEqual(expected, mismatches)
+
+
+    def test_mismatch_monolithic_blocked(self):
+        monolithic = self.read_signature_csv_from_string_as_dict('''
+Ljava/lang/Object;->hashCode()I,public-api,system-api,test-api
+Ljava/lang/Object;->toString()Ljava/lang/String;,blocked
+''')
+        modular = self.read_signature_csv_from_string_as_dict('''
+Ljava/lang/Object;->toString()Ljava/lang/String;,public-api,system-api,test-api
+''')
+        mismatches = compare_signature_flags(monolithic, modular)
+        expected = [
+            (
+                'Ljava/lang/Object;->toString()Ljava/lang/String;',
+                ['public-api', 'system-api', 'test-api'],
+                ['blocked'],
+            ),
+        ]
+        self.assertEqual(expected, mismatches)
+
+    def test_mismatch_modular_blocked(self):
+        monolithic = self.read_signature_csv_from_string_as_dict('''
+Ljava/lang/Object;->hashCode()I,public-api,system-api,test-api
+Ljava/lang/Object;->toString()Ljava/lang/String;,public-api,system-api,test-api
+''')
+        modular = self.read_signature_csv_from_string_as_dict('''
+Ljava/lang/Object;->toString()Ljava/lang/String;,blocked
+''')
+        mismatches = compare_signature_flags(monolithic, modular)
+        expected = [
+            (
+                'Ljava/lang/Object;->toString()Ljava/lang/String;',
+                ['blocked'],
+                ['public-api', 'system-api', 'test-api'],
+            ),
+        ]
+        self.assertEqual(expected, mismatches)
+
+    def test_missing_from_monolithic(self):
+        monolithic = self.read_signature_csv_from_string_as_dict('''
+Ljava/lang/Object;->hashCode()I,public-api,system-api,test-api
+''')
+        modular = self.read_signature_csv_from_string_as_dict('''
+Ljava/lang/Object;->toString()Ljava/lang/String;,public-api,system-api,test-api
+''')
+        mismatches = compare_signature_flags(monolithic, modular)
+        expected = [
+            (
+                'Ljava/lang/Object;->toString()Ljava/lang/String;',
+                ['public-api', 'system-api', 'test-api'],
+                [],
+            ),
+        ]
+        self.assertEqual(expected, mismatches)
+
+    def test_missing_from_modular(self):
+        # The modular dict defines the set of signatures to compare so an entry
+        # in the monolithic dict that does not have a corresponding entry in the
+        # modular dict is ignored.
+        monolithic = self.read_signature_csv_from_string_as_dict('''
+Ljava/lang/Object;->hashCode()I,public-api,system-api,test-api
+''')
+        modular = {}
+        mismatches = compare_signature_flags(monolithic, modular)
+        expected = []
+        self.assertEqual(expected, mismatches)
+
+    def test_blocked_missing_from_modular(self):
+        # The modular dict defines the set of signatures to compare so an entry
+        # in the monolithic dict that does not have a corresponding entry in the
+        # modular dict is ignored.
+        monolithic = self.read_signature_csv_from_string_as_dict('''
+Ljava/lang/Object;->hashCode()I,blocked
+''')
+        modular = {}
+        mismatches = compare_signature_flags(monolithic, modular)
+        expected = []
+        self.assertEqual(expected, mismatches)
+
+if __name__ == '__main__':
+    unittest.main(verbosity=2)