Use trie to store monolithic hidden API flags
Previously, a simple map from dex signature to the flags was used to
store the monolithic hidden API flags. This change replaces that with a
trie that uses packages and class names extracted from the signature
to map from the signature to the flags.
The signature is broken down into pieces consisting of package names,
class names and the remaining member signature. They are used in turn
to navigate through nodes in the trie to a Leaf that contains the flags
associated with the signature.
The flags can be retrieved from the trie in a number of ways:
* Using the complete signature to retrieve the flags for a single
member.
* Using a class name to retrieve the flags for all the members in a
class and any nested classes.
* Using a package wildcard to retrieve the flags for all the members in
the classes in that package.
* Using a recursive package wildcard to retrieve the flags for all the
members in the classes in that package and nested packages.
This will allow a bootclasspath_fragment to select a subset of the
monolithic flags to compare against itself without specifying the
signatures of every member of that set.
Before creating the trie an attempt was made to compute the subset by
iterating over all the signatures in the monolithic flags and matching
against a regular expression created from the patterns but that was too
slow. It took minutes to run whereas using the simple map or the trie
only takes a few seconds.
Bug: 194063708
Test: atest --host verify_overlaps_test
Change-Id: I36f5e319d3e7d62dd34305de1eec990a93cb3a89
diff --git a/scripts/hiddenapi/verify_overlaps.py b/scripts/hiddenapi/verify_overlaps.py
index 8579321..a4a423e 100755
--- a/scripts/hiddenapi/verify_overlaps.py
+++ b/scripts/hiddenapi/verify_overlaps.py
@@ -19,12 +19,244 @@
import argparse
import csv
+import sys
from itertools import chain
+class InteriorNode:
+ """
+ An interior node in a trie.
+
+ Each interior node has a dict that maps from an element of a signature to
+ either another interior node or a leaf. Each interior node represents either
+ a package, class or nested class. Class members are represented by a Leaf.
+
+ Associating the set of flags [public-api] with the signature
+ "Ljava/lang/Object;->String()Ljava/lang/String;" will cause the following
+ nodes to be created:
+ Node()
+ ^- package:java -> Node()
+ ^- package:lang -> Node()
+ ^- class:Object -> Node()
+ ^- member:String()Ljava/lang/String; -> Leaf([public-api])
+
+ Associating the set of flags [blocked,core-platform-api] with the signature
+ "Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;"
+ will cause the following nodes to be created:
+ Node()
+ ^- package:java -> Node()
+ ^- package:lang -> Node()
+ ^- class:Character -> Node()
+ ^- class:UnicodeScript -> Node()
+ ^- member:of(I)Ljava/lang/Character$UnicodeScript;
+ -> Leaf([blocked,core-platform-api])
+
+ Attributes:
+ nodes: a dict from an element of the signature to the Node/Leaf
+ containing the next element/value.
+ """
+ def __init__(self):
+ self.nodes = {}
+
+ def signatureToElements(self, signature):
+ """
+ Split a signature or a prefix into a number of elements:
+ 1. The packages (excluding the leading L preceding the first package).
+ 2. The class names, from outermost to innermost.
+ 3. The member signature.
+
+ e.g. Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;
+ will be broken down into these elements:
+ 1. package:java
+ 2. package:lang
+ 3. class:Character
+ 4. class:UnicodeScript
+ 5. member:of(I)Ljava/lang/Character$UnicodeScript;
+ """
+ # Remove the leading L.
+ # - java/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;
+ text = signature.removeprefix("L")
+ # Split the signature between qualified class name and the class member
+ # signature.
+ # 0 - java/lang/Character$UnicodeScript
+ # 1 - of(I)Ljava/lang/Character$UnicodeScript;
+ parts = text.split(";->")
+ member = parts[1:]
+ # Split the qualified class name into packages, and class name.
+ # 0 - java
+ # 1 - lang
+ # 2 - Character$UnicodeScript
+ elements = parts[0].split("/")
+ packages = elements[0:-1]
+ className = elements[-1]
+ if className == "*" or className == "**":
+ # Cannot specify a wildcard and target a specific member
+ if len(member) != 0:
+ raise Exception("Invalid signature %s: contains wildcard %s and member signature %s"
+ % (signature, className, member[0]))
+ wildcard = [className]
+ # Assemble the parts into a single list, adding prefixes to identify
+ # the different parts.
+ # 0 - package:java
+ # 1 - package:lang
+ # 2 - *
+ return list(chain(map(lambda x : "package:" + x, packages),
+ wildcard))
+ else:
+ # Split the class name into outer / inner classes
+ # 0 - Character
+ # 1 - UnicodeScript
+ classes = className.split("$")
+ # Assemble the parts into a single list, adding prefixes to identify
+ # the different parts.
+ # 0 - package:java
+ # 1 - package:lang
+ # 2 - class:Character
+ # 3 - class:UnicodeScript
+ # 4 - member:of(I)Ljava/lang/Character$UnicodeScript;
+ return list(chain(map(lambda x : "package:" + x, packages),
+ map(lambda x : "class:" + x, classes),
+ map(lambda x : "member:" + x, member)))
+
+ def add(self, signature, value):
+ """
+ Associate the value with the specific signature.
+ :param signature: the member signature
+ :param value: the value to associated with the signature
+ :return: n/a
+ """
+ # Split the signature into elements.
+ elements = self.signatureToElements(signature)
+ # Find the Node associated with the deepest class.
+ node = self
+ for element in elements[:-1]:
+ if element in node.nodes:
+ node = node.nodes[element]
+ else:
+ next = InteriorNode()
+ node.nodes[element] = next
+ node = next
+ # Add a Leaf containing the value and associate it with the member
+ # signature within the class.
+ lastElement = elements[-1]
+ if not lastElement.startswith("member:"):
+ raise Exception("Invalid signature: %s, does not identify a specific member" % signature)
+ if lastElement in node.nodes:
+ raise Exception("Duplicate signature: %s" % signature)
+ node.nodes[lastElement] = Leaf(value)
+
+ def getMatchingRows(self, pattern):
+ """
+ Get the values (plural) associated with the pattern.
+
+ e.g. If the pattern is a full signature then this will return a list
+ containing the value associated with that signature.
+
+ If the pattern is a class then this will return a list containing the
+ values associated with all members of that class.
+
+ If the pattern is a package then this will return a list containing the
+ values associated with all the members of all the classes in that
+ package and sub-packages.
+
+ If the pattern ends with "*" then the preceding part is treated as a
+ package and this will return a list containing the values associated
+ with all the members of all the classes in that package.
+
+ If the pattern ends with "**" then the preceding part is treated
+ as a package and this will return a list containing the values
+ associated with all the members of all the classes in that package and
+ all sub-packages.
+
+ :param pattern: the pattern which could be a complete signature or a
+ class, or package wildcard.
+ :return: an iterable containing all the values associated with the
+ pattern.
+ """
+ elements = self.signatureToElements(pattern)
+ node = self
+ # Include all values from this node and all its children.
+ selector = lambda x : True
+ lastElement = elements[-1]
+ if lastElement == "*" or lastElement == "**":
+ elements = elements[:-1]
+ if lastElement == "*":
+ # Do not include values from sub-packages.
+ selector = lambda x : not x.startswith("package:")
+ for element in elements:
+ if element in node.nodes:
+ node = node.nodes[element]
+ else:
+ return []
+ return chain.from_iterable(node.values(selector))
+
+ def values(self, selector):
+ """
+ :param selector: a function that can be applied to a key in the nodes
+ attribute to determine whether to return its values.
+ :return: A list of iterables of all the values associated with this
+ node and its children.
+ """
+ values = []
+ self.appendValues(values, selector)
+ return values
+
+ def appendValues(self, values, selector):
+ """
+ Append the values associated with this node and its children to the
+ list.
+
+ For each item (key, child) in nodes the child node's values are returned
+ if and only if the selector returns True when called on its key. A child
+ node's values are all the values associated with it and all its
+ descendant nodes.
+
+ :param selector: a function that can be applied to a key in the nodes
+ attribute to determine whether to return its values.
+ :param values: a list of a iterables of values.
+ """
+ for key, node in self.nodes.items():
+ if selector(key):
+ node.appendValues(values, lambda x : True)
+
+class Leaf:
+ """
+ A leaf of the trie
+
+ Attributes:
+ value: the value associated with this leaf.
+ """
+ def __init__(self, value):
+ self.value = value
+
+ def values(self, selector):
+ """
+ :return: A list of a list of the value associated with this node.
+ """
+ return [[self.value]]
+
+ def appendValues(self, values, selector):
+ """
+ Appends a list of the value associated with this node to the list.
+ :param values: a list of a iterables of values.
+ """
+ values.append([self.value])
+
def dict_reader(input):
return csv.DictReader(input, delimiter=',', quotechar='|', fieldnames=['signature'])
-def extract_subset_from_monolithic_flags_as_dict_from_file(monolithicFlagsDict, patternsFile):
+def read_flag_trie_from_file(file):
+ with open(file, 'r') as stream:
+ return read_flag_trie_from_stream(stream)
+
+def read_flag_trie_from_stream(stream):
+ trie = InteriorNode()
+ reader = dict_reader(stream)
+ for row in reader:
+ signature = row['signature']
+ trie.add(signature, row)
+ return trie
+
+def extract_subset_from_monolithic_flags_as_dict_from_file(monolithicTrie, patternsFile):
"""
Extract a subset of flags from the dict containing all the monolithic flags.
@@ -34,21 +266,24 @@
:return: the dict from signature to row.
"""
with open(patternsFile, 'r') as stream:
- return extract_subset_from_monolithic_flags_as_dict_from_stream(monolithicFlagsDict, stream)
+ return extract_subset_from_monolithic_flags_as_dict_from_stream(monolithicTrie, stream)
-def extract_subset_from_monolithic_flags_as_dict_from_stream(monolithicFlagsDict, stream):
+def extract_subset_from_monolithic_flags_as_dict_from_stream(monolithicTrie, stream):
"""
- Extract a subset of flags from the dict containing all the monolithic flags.
+ Extract a subset of flags from the trie containing all the monolithic flags.
- :param monolithicFlagsDict: the dict containing all the monolithic flags.
+ :param monolithicTrie: the trie containing all the monolithic flags.
:param stream: a stream containing a list of signature patterns that define
the subset.
:return: the dict from signature to row.
"""
dict = {}
- for signature in stream:
- signature = signature.rstrip()
- dict[signature] = monolithicFlagsDict.get(signature, {})
+ for pattern in stream:
+ pattern = pattern.rstrip()
+ rows = monolithicTrie.getMatchingRows(pattern)
+ for row in rows:
+ signature = row['signature']
+ dict[signature] = row
return dict
def read_signature_csv_from_stream_as_dict(stream):
@@ -108,20 +343,20 @@
args_parser.add_argument('modularFlags', nargs=argparse.REMAINDER, help='Flags produced by individual bootclasspath_fragment modules')
args = args_parser.parse_args(argv[1:])
- # Read in the monolithic flags into a dict indexed by signature
+ # Read in all the flags into the trie
monolithicFlagsPath = args.monolithicFlags
- monolithicFlagsDict = read_signature_csv_from_file_as_dict(monolithicFlagsPath)
+ monolithicTrie = read_flag_trie_from_file(monolithicFlagsPath)
# For each subset specified on the command line, create dicts for the flags
- # provided by the subset and the corresponding flags from the complete set of
- # flags and compare them.
+ # provided by the subset and the corresponding flags from the complete set
+ # of flags and compare them.
failed = False
for modularPair in args.modularFlags:
parts = modularPair.split(":")
modularFlagsPath = parts[0]
modularPatternsPath = parts[1]
modularFlagsDict = read_signature_csv_from_file_as_dict(modularFlagsPath)
- monolithicFlagsSubsetDict = extract_subset_from_monolithic_flags_as_dict_from_file(monolithicFlagsDict, modularPatternsPath)
+ monolithicFlagsSubsetDict = extract_subset_from_monolithic_flags_as_dict_from_file(monolithicTrie, modularPatternsPath)
mismatchingSignatures = compare_signature_flags(monolithicFlagsSubsetDict, modularFlagsDict)
if mismatchingSignatures:
failed = True
diff --git a/scripts/hiddenapi/verify_overlaps_test.py b/scripts/hiddenapi/verify_overlaps_test.py
index b6d5fa3..7477254 100755
--- a/scripts/hiddenapi/verify_overlaps_test.py
+++ b/scripts/hiddenapi/verify_overlaps_test.py
@@ -20,8 +20,52 @@
from verify_overlaps import *
+class TestSignatureToElements(unittest.TestCase):
+
+ def signatureToElements(self, signature):
+ return InteriorNode().signatureToElements(signature)
+
+ def test_signatureToElements_1(self):
+ expected = [
+ 'package:java',
+ 'package:lang',
+ 'class:ProcessBuilder',
+ 'class:Redirect',
+ 'class:1',
+ 'member:<init>()V',
+ ]
+ self.assertEqual(expected, self.signatureToElements(
+ "Ljava/lang/ProcessBuilder$Redirect$1;-><init>()V"))
+
+ def test_signatureToElements_2(self):
+ expected = [
+ 'package:java',
+ 'package:lang',
+ 'class:Object',
+ 'member:hashCode()I',
+ ]
+ self.assertEqual(expected, self.signatureToElements(
+ "Ljava/lang/Object;->hashCode()I"))
+
+ def test_signatureToElements_3(self):
+ expected = [
+ 'package:java',
+ 'package:lang',
+ 'class:CharSequence',
+ 'class:',
+ 'class:ExternalSyntheticLambda0',
+ 'member:<init>(Ljava/lang/CharSequence;)V',
+ ]
+ self.assertEqual(expected, self.signatureToElements(
+ "Ljava/lang/CharSequence$$ExternalSyntheticLambda0;"
+ "-><init>(Ljava/lang/CharSequence;)V"))
+
class TestDetectOverlaps(unittest.TestCase):
+ def read_flag_trie_from_string(self, csv):
+ with io.StringIO(csv) as f:
+ return read_flag_trie_from_stream(f)
+
def read_signature_csv_from_string_as_dict(self, csv):
with io.StringIO(csv) as f:
return read_signature_csv_from_stream_as_dict(f)
@@ -33,13 +77,14 @@
extractInput = '''
Ljava/lang/Object;->hashCode()I,public-api,system-api,test-api
Ljava/lang/Object;->toString()Ljava/lang/String;,blocked
+Ljava/util/zip/ZipFile;-><clinit>()V,blocked
+Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;,blocked
+Ljava/lang/Character;->serialVersionUID:J,sdk
+Ljava/lang/ProcessBuilder$Redirect$1;-><init>()V,blocked
'''
- def test_extract_subset(self):
- monolithic = self.read_signature_csv_from_string_as_dict(TestDetectOverlaps.extractInput)
- modular = self.read_signature_csv_from_string_as_dict('''
-Ljava/lang/Object;->hashCode()I,public-api,system-api,test-api
-''')
+ def test_extract_subset_signature(self):
+ monolithic = self.read_flag_trie_from_string(TestDetectOverlaps.extractInput)
patterns = 'Ljava/lang/Object;->hashCode()I'
@@ -52,6 +97,144 @@
}
self.assertEqual(expected, subset)
+ def test_extract_subset_class(self):
+ monolithic = self.read_flag_trie_from_string(TestDetectOverlaps.extractInput)
+
+ patterns = 'java/lang/Object'
+
+ subset = self.extract_subset_from_monolithic_flags_as_dict_from_string(monolithic, patterns)
+ expected = {
+ 'Ljava/lang/Object;->hashCode()I': {
+ None: ['public-api', 'system-api', 'test-api'],
+ 'signature': 'Ljava/lang/Object;->hashCode()I',
+ },
+ 'Ljava/lang/Object;->toString()Ljava/lang/String;': {
+ None: ['blocked'],
+ 'signature': 'Ljava/lang/Object;->toString()Ljava/lang/String;',
+ },
+ }
+ self.assertEqual(expected, subset)
+
+ def test_extract_subset_outer_class(self):
+ monolithic = self.read_flag_trie_from_string(TestDetectOverlaps.extractInput)
+
+ patterns = 'java/lang/Character'
+
+ subset = self.extract_subset_from_monolithic_flags_as_dict_from_string(monolithic, patterns)
+ expected = {
+ 'Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;': {
+ None: ['blocked'],
+ 'signature': 'Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;',
+ },
+ 'Ljava/lang/Character;->serialVersionUID:J': {
+ None: ['sdk'],
+ 'signature': 'Ljava/lang/Character;->serialVersionUID:J',
+ },
+ }
+ self.assertEqual(expected, subset)
+
+ def test_extract_subset_nested_class(self):
+ monolithic = self.read_flag_trie_from_string(TestDetectOverlaps.extractInput)
+
+ patterns = 'java/lang/Character$UnicodeScript'
+
+ subset = self.extract_subset_from_monolithic_flags_as_dict_from_string(monolithic, patterns)
+ expected = {
+ 'Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;': {
+ None: ['blocked'],
+ 'signature': 'Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;',
+ },
+ }
+ self.assertEqual(expected, subset)
+
+ def test_extract_subset_package(self):
+ monolithic = self.read_flag_trie_from_string(TestDetectOverlaps.extractInput)
+
+ patterns = 'java/lang/*'
+
+ subset = self.extract_subset_from_monolithic_flags_as_dict_from_string(monolithic, patterns)
+ expected = {
+ 'Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;': {
+ None: ['blocked'],
+ 'signature': 'Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;',
+ },
+ 'Ljava/lang/Character;->serialVersionUID:J': {
+ None: ['sdk'],
+ 'signature': 'Ljava/lang/Character;->serialVersionUID:J',
+ },
+ 'Ljava/lang/Object;->hashCode()I': {
+ None: ['public-api', 'system-api', 'test-api'],
+ 'signature': 'Ljava/lang/Object;->hashCode()I',
+ },
+ 'Ljava/lang/Object;->toString()Ljava/lang/String;': {
+ None: ['blocked'],
+ 'signature': 'Ljava/lang/Object;->toString()Ljava/lang/String;',
+ },
+ 'Ljava/lang/ProcessBuilder$Redirect$1;-><init>()V': {
+ None: ['blocked'],
+ 'signature': 'Ljava/lang/ProcessBuilder$Redirect$1;-><init>()V',
+ },
+ }
+ self.assertEqual(expected, subset)
+
+ def test_extract_subset_recursive_package(self):
+ monolithic = self.read_flag_trie_from_string(TestDetectOverlaps.extractInput)
+
+ patterns = 'java/**'
+
+ subset = self.extract_subset_from_monolithic_flags_as_dict_from_string(monolithic, patterns)
+ expected = {
+ 'Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;': {
+ None: ['blocked'],
+ 'signature': 'Ljava/lang/Character$UnicodeScript;->of(I)Ljava/lang/Character$UnicodeScript;',
+ },
+ 'Ljava/lang/Character;->serialVersionUID:J': {
+ None: ['sdk'],
+ 'signature': 'Ljava/lang/Character;->serialVersionUID:J',
+ },
+ 'Ljava/lang/Object;->hashCode()I': {
+ None: ['public-api', 'system-api', 'test-api'],
+ 'signature': 'Ljava/lang/Object;->hashCode()I',
+ },
+ 'Ljava/lang/Object;->toString()Ljava/lang/String;': {
+ None: ['blocked'],
+ 'signature': 'Ljava/lang/Object;->toString()Ljava/lang/String;',
+ },
+ 'Ljava/lang/ProcessBuilder$Redirect$1;-><init>()V': {
+ None: ['blocked'],
+ 'signature': 'Ljava/lang/ProcessBuilder$Redirect$1;-><init>()V',
+ },
+ 'Ljava/util/zip/ZipFile;-><clinit>()V': {
+ None: ['blocked'],
+ 'signature': 'Ljava/util/zip/ZipFile;-><clinit>()V',
+ },
+ }
+ self.assertEqual(expected, subset)
+
+ def test_extract_subset_invalid_pattern_wildcard_and_member(self):
+ monolithic = self.read_flag_trie_from_string(TestDetectOverlaps.extractInput)
+
+ patterns = 'Ljava/lang/*;->hashCode()I'
+
+ with self.assertRaises(Exception) as context:
+ self.extract_subset_from_monolithic_flags_as_dict_from_string(monolithic, patterns)
+ self.assertTrue("contains wildcard * and member signature hashCode()I" in str(context.exception))
+
+ def test_read_trie_duplicate(self):
+ with self.assertRaises(Exception) as context:
+ self.read_flag_trie_from_string('''
+Ljava/lang/Object;->hashCode()I,public-api,system-api,test-api
+Ljava/lang/Object;->hashCode()I,blocked
+''')
+ self.assertTrue("Duplicate signature: Ljava/lang/Object;->hashCode()I" in str(context.exception))
+
+ def test_read_trie_missing_member(self):
+ with self.assertRaises(Exception) as context:
+ self.read_flag_trie_from_string('''
+Ljava/lang/Object,public-api,system-api,test-api
+''')
+ self.assertTrue("Invalid signature: Ljava/lang/Object, does not identify a specific member" in str(context.exception))
+
def test_match(self):
monolithic = self.read_signature_csv_from_string_as_dict('''
Ljava/lang/Object;->hashCode()I,public-api,system-api,test-api