blob: 345e9f983d46b58a87dc70527c1e611dc47b66b0 [file] [log] [blame]
Dan Albert914449f2016-06-17 16:45:24 -07001#
2# Copyright (C) 2016 The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
Dan Albert06f58af2020-06-22 15:10:31 -070016"""Parser for Android's version script information."""
Dan Albertead21552021-06-04 14:30:40 -070017from __future__ import annotations
18
19from dataclasses import dataclass, field
Dan Albert8bdccb92016-07-29 13:06:22 -070020import logging
Dan Albert914449f2016-06-17 16:45:24 -070021import re
Dan Albertaf7b36d2020-06-23 11:21:21 -070022from typing import (
23 Dict,
24 Iterable,
Dan Albertead21552021-06-04 14:30:40 -070025 Iterator,
Dan Albertaf7b36d2020-06-23 11:21:21 -070026 List,
27 Mapping,
28 NewType,
29 Optional,
30 TextIO,
31 Tuple,
Dan Albertead21552021-06-04 14:30:40 -070032 Union,
Dan Albertaf7b36d2020-06-23 11:21:21 -070033)
34
35
36ApiMap = Mapping[str, int]
37Arch = NewType('Arch', str)
38Tag = NewType('Tag', str)
Dan Albert914449f2016-06-17 16:45:24 -070039
40
41ALL_ARCHITECTURES = (
Dan Albertaf7b36d2020-06-23 11:21:21 -070042 Arch('arm'),
43 Arch('arm64'),
Elliott Hughes8cddc002022-10-10 15:33:50 +000044 Arch('riscv64'),
Dan Albertaf7b36d2020-06-23 11:21:21 -070045 Arch('x86'),
46 Arch('x86_64'),
Dan Albert914449f2016-06-17 16:45:24 -070047)
48
Steven Moreland8d5b9d02023-12-21 02:21:47 +000049# TODO: it would be nice to dedupe with 'has_*_tag' property methods
50SUPPORTED_TAGS = ALL_ARCHITECTURES + (
51 Tag('apex'),
52 Tag('llndk'),
53 Tag('platform-only'),
54 Tag('systemapi'),
55 Tag('var'),
56 Tag('weak'),
57)
Dan Albert914449f2016-06-17 16:45:24 -070058
Dan Albertfd86e9e2016-11-08 13:35:12 -080059# Arbitrary magic number. We use the same one in api-level.h for this purpose.
60FUTURE_API_LEVEL = 10000
61
62
Dan Albertaf7b36d2020-06-23 11:21:21 -070063def logger() -> logging.Logger:
Dan Albert8bdccb92016-07-29 13:06:22 -070064 """Return the main logger for this module."""
65 return logging.getLogger(__name__)
Dan Albert914449f2016-06-17 16:45:24 -070066
67
Dan Albertaf7b36d2020-06-23 11:21:21 -070068@dataclass
Dan Albertead21552021-06-04 14:30:40 -070069class Tags:
70 """Container class for the tags attached to a symbol or version."""
71
72 tags: tuple[Tag, ...] = field(default_factory=tuple)
73
74 @classmethod
75 def from_strs(cls, strs: Iterable[str]) -> Tags:
76 """Constructs tags from a collection of strings.
77
78 Does not decode API levels.
79 """
80 return Tags(tuple(Tag(s) for s in strs))
81
82 def __contains__(self, tag: Union[Tag, str]) -> bool:
83 return tag in self.tags
84
85 def __iter__(self) -> Iterator[Tag]:
86 yield from self.tags
87
88 @property
89 def has_mode_tags(self) -> bool:
90 """Returns True if any mode tags (apex, llndk, etc) are set."""
Jiyong Park85cc35a2022-07-17 11:30:47 +090091 return self.has_apex_tags or self.has_llndk_tags or self.has_systemapi_tags
Dan Albertead21552021-06-04 14:30:40 -070092
93 @property
94 def has_apex_tags(self) -> bool:
95 """Returns True if any APEX tags are set."""
Jiyong Park85cc35a2022-07-17 11:30:47 +090096 return 'apex' in self.tags
97
98 @property
99 def has_systemapi_tags(self) -> bool:
100 """Returns True if any APEX tags are set."""
101 return 'systemapi' in self.tags
Dan Albertead21552021-06-04 14:30:40 -0700102
103 @property
104 def has_llndk_tags(self) -> bool:
105 """Returns True if any LL-NDK tags are set."""
106 return 'llndk' in self.tags
107
108 @property
109 def has_platform_only_tags(self) -> bool:
110 """Returns True if any platform-only tags are set."""
111 return 'platform-only' in self.tags
112
113
114@dataclass
Dan Albertaf7b36d2020-06-23 11:21:21 -0700115class Symbol:
116 """A symbol definition from a symbol file."""
117
118 name: str
Dan Albertead21552021-06-04 14:30:40 -0700119 tags: Tags
Dan Albertaf7b36d2020-06-23 11:21:21 -0700120
121
122@dataclass
123class Version:
124 """A version block of a symbol file."""
125
126 name: str
127 base: Optional[str]
Dan Albertead21552021-06-04 14:30:40 -0700128 tags: Tags
Dan Albertaf7b36d2020-06-23 11:21:21 -0700129 symbols: List[Symbol]
130
Dan Albertead21552021-06-04 14:30:40 -0700131 @property
132 def is_private(self) -> bool:
133 """Returns True if this version block is private (platform only)."""
134 return self.name.endswith('_PRIVATE') or self.name.endswith('_PLATFORM')
Dan Albertaf7b36d2020-06-23 11:21:21 -0700135
Dan Albertead21552021-06-04 14:30:40 -0700136
137def get_tags(line: str, api_map: ApiMap) -> Tags:
Dan Alberta85042a2016-07-28 16:58:27 -0700138 """Returns a list of all tags on this line."""
139 _, _, all_tags = line.strip().partition('#')
Dan Albertead21552021-06-04 14:30:40 -0700140 return Tags(tuple(
141 decode_api_level_tag(Tag(e), api_map)
142 for e in re.split(r'\s+', all_tags) if e.strip()
143 ))
Dan Alberta85042a2016-07-28 16:58:27 -0700144
145
Dan Albertaf7b36d2020-06-23 11:21:21 -0700146def is_api_level_tag(tag: Tag) -> bool:
Dan Albert3f6fb2d2017-03-28 16:04:25 -0700147 """Returns true if this tag has an API level that may need decoding."""
Steven Moreland8d5b9d02023-12-21 02:21:47 +0000148 if tag.startswith('llndk-deprecated='):
149 return True
Dan Albert3f6fb2d2017-03-28 16:04:25 -0700150 if tag.startswith('introduced='):
151 return True
152 if tag.startswith('introduced-'):
153 return True
154 if tag.startswith('versioned='):
155 return True
156 return False
157
158
Dan Albertaf7b36d2020-06-23 11:21:21 -0700159def decode_api_level(api: str, api_map: ApiMap) -> int:
Dan Albert06f58af2020-06-22 15:10:31 -0700160 """Decodes the API level argument into the API level number.
161
162 For the average case, this just decodes the integer value from the string,
163 but for unreleased APIs we need to translate from the API codename (like
164 "O") to the future API level for that codename.
165 """
166 try:
167 return int(api)
168 except ValueError:
169 pass
170
171 if api == "current":
172 return FUTURE_API_LEVEL
173
174 return api_map[api]
175
176
Dan Albertead21552021-06-04 14:30:40 -0700177def decode_api_level_tag(tag: Tag, api_map: ApiMap) -> Tag:
178 """Decodes API level code name in a tag.
Dan Albert3f6fb2d2017-03-28 16:04:25 -0700179
180 Raises:
181 ParseError: An unknown version name was found in a tag.
182 """
Dan Albertead21552021-06-04 14:30:40 -0700183 if not is_api_level_tag(tag):
Steven Moreland8d5b9d02023-12-21 02:21:47 +0000184 if tag not in SUPPORTED_TAGS:
185 raise ParseError(f'Unsupported tag: {tag}')
186
Dan Albertead21552021-06-04 14:30:40 -0700187 return tag
Dan Albert3f6fb2d2017-03-28 16:04:25 -0700188
Dan Albertead21552021-06-04 14:30:40 -0700189 name, value = split_tag(tag)
190 try:
191 decoded = str(decode_api_level(value, api_map))
192 return Tag(f'{name}={decoded}')
193 except KeyError as ex:
194 raise ParseError(f'Unknown version name in tag: {tag}') from ex
Dan Albert3f6fb2d2017-03-28 16:04:25 -0700195
196
Dan Albertaf7b36d2020-06-23 11:21:21 -0700197def split_tag(tag: Tag) -> Tuple[str, str]:
Dan Albert3f6fb2d2017-03-28 16:04:25 -0700198 """Returns a key/value tuple of the tag.
199
200 Raises:
201 ValueError: Tag is not a key/value type tag.
202
203 Returns: Tuple of (key, value) of the tag. Both components are strings.
204 """
205 if '=' not in tag:
206 raise ValueError('Not a key/value tag: ' + tag)
207 key, _, value = tag.partition('=')
208 return key, value
209
210
Dan Albertaf7b36d2020-06-23 11:21:21 -0700211def get_tag_value(tag: Tag) -> str:
Dan Albertc42458e2016-07-29 13:05:39 -0700212 """Returns the value of a key/value tag.
213
214 Raises:
215 ValueError: Tag is not a key/value type tag.
216
217 Returns: Value part of tag as a string.
218 """
Dan Albert3f6fb2d2017-03-28 16:04:25 -0700219 return split_tag(tag)[1]
Dan Albertc42458e2016-07-29 13:05:39 -0700220
Jiyong Park3f9c41d2022-07-16 23:30:09 +0900221class Filter:
222 """A filter encapsulates a condition that tells whether a version or a
223 symbol should be omitted or not
Dan Albertead21552021-06-04 14:30:40 -0700224 """
Jiyong Park3f9c41d2022-07-16 23:30:09 +0900225
Jiyong Park4ecbdb62022-09-26 20:58:27 +0900226 def __init__(self, arch: Arch, api: int, llndk: bool = False, apex: bool = False, systemapi:
227 bool = False, ndk: bool = True):
Jiyong Park3f9c41d2022-07-16 23:30:09 +0900228 self.arch = arch
229 self.api = api
230 self.llndk = llndk
231 self.apex = apex
Jiyong Park85cc35a2022-07-17 11:30:47 +0900232 self.systemapi = systemapi
Jiyong Park4ecbdb62022-09-26 20:58:27 +0900233 self.ndk = ndk
Jiyong Park3f9c41d2022-07-16 23:30:09 +0900234
235 def _should_omit_tags(self, tags: Tags) -> bool:
236 """Returns True if the tagged object should be omitted.
237
238 This defines the rules shared between version tagging and symbol tagging.
239 """
240 # The apex and llndk tags will only exclude APIs from other modes. If in
241 # APEX or LLNDK mode and neither tag is provided, we fall back to the
242 # default behavior because all NDK symbols are implicitly available to
243 # APEX and LLNDK.
244 if tags.has_mode_tags:
Jiyong Park85cc35a2022-07-17 11:30:47 +0900245 if self.apex and tags.has_apex_tags:
246 return False
247 if self.llndk and tags.has_llndk_tags:
248 return False
249 if self.systemapi and tags.has_systemapi_tags:
250 return False
251 return True
Jiyong Park3f9c41d2022-07-16 23:30:09 +0900252 if not symbol_in_arch(tags, self.arch):
Dan Albertead21552021-06-04 14:30:40 -0700253 return True
Jiyong Park3f9c41d2022-07-16 23:30:09 +0900254 if not symbol_in_api(tags, self.arch, self.api):
Dan Albertead21552021-06-04 14:30:40 -0700255 return True
Jiyong Park3f9c41d2022-07-16 23:30:09 +0900256 return False
257
258 def should_omit_version(self, version: Version) -> bool:
259 """Returns True if the version section should be omitted.
260
261 We want to omit any sections that do not have any symbols we'll have in
262 the stub library. Sections that contain entirely future symbols or only
263 symbols for certain architectures.
264 """
265 if version.is_private:
Dan Albertead21552021-06-04 14:30:40 -0700266 return True
Jiyong Park3f9c41d2022-07-16 23:30:09 +0900267 if version.tags.has_platform_only_tags:
268 return True
269 return self._should_omit_tags(version.tags)
Dan Albert914449f2016-06-17 16:45:24 -0700270
Jiyong Park3f9c41d2022-07-16 23:30:09 +0900271 def should_omit_symbol(self, symbol: Symbol) -> bool:
272 """Returns True if the symbol should be omitted."""
Jiyong Park4ecbdb62022-09-26 20:58:27 +0900273 if not symbol.tags.has_mode_tags and not self.ndk:
274 # Symbols that don't have mode tags are NDK. They are usually
275 # included, but have to be omitted if NDK symbols are explicitly
276 # filtered-out
277 return True
Dan Albert08532b62016-07-28 18:09:47 -0700278
Jiyong Park4ecbdb62022-09-26 20:58:27 +0900279 return self._should_omit_tags(symbol.tags)
Dan Albert08532b62016-07-28 18:09:47 -0700280
Dan Albertead21552021-06-04 14:30:40 -0700281def symbol_in_arch(tags: Tags, arch: Arch) -> bool:
Dan Albert914449f2016-06-17 16:45:24 -0700282 """Returns true if the symbol is present for the given architecture."""
283 has_arch_tags = False
284 for tag in tags:
285 if tag == arch:
286 return True
287 if tag in ALL_ARCHITECTURES:
288 has_arch_tags = True
289
290 # If there were no arch tags, the symbol is available for all
291 # architectures. If there were any arch tags, the symbol is only available
292 # for the tagged architectures.
293 return not has_arch_tags
294
295
Dan Albertaf7b36d2020-06-23 11:21:21 -0700296def symbol_in_api(tags: Iterable[Tag], arch: Arch, api: int) -> bool:
Dan Albertc42458e2016-07-29 13:05:39 -0700297 """Returns true if the symbol is present for the given API level."""
Dan Albert914449f2016-06-17 16:45:24 -0700298 introduced_tag = None
299 arch_specific = False
300 for tag in tags:
301 # If there is an arch-specific tag, it should override the common one.
302 if tag.startswith('introduced=') and not arch_specific:
303 introduced_tag = tag
304 elif tag.startswith('introduced-' + arch + '='):
305 introduced_tag = tag
306 arch_specific = True
Dan Alberta85042a2016-07-28 16:58:27 -0700307 elif tag == 'future':
Dan Albertfd86e9e2016-11-08 13:35:12 -0800308 return api == FUTURE_API_LEVEL
Dan Albert914449f2016-06-17 16:45:24 -0700309
310 if introduced_tag is None:
311 # We found no "introduced" tags, so the symbol has always been
312 # available.
313 return True
314
Dan Albertc42458e2016-07-29 13:05:39 -0700315 return api >= int(get_tag_value(introduced_tag))
316
317
Dan Albertaf7b36d2020-06-23 11:21:21 -0700318def symbol_versioned_in_api(tags: Iterable[Tag], api: int) -> bool:
Dan Albertc42458e2016-07-29 13:05:39 -0700319 """Returns true if the symbol should be versioned for the given API.
320
321 This models the `versioned=API` tag. This should be a very uncommonly
322 needed tag, and is really only needed to fix versioning mistakes that are
323 already out in the wild.
324
325 For example, some of libc's __aeabi_* functions were originally placed in
326 the private version, but that was incorrect. They are now in LIBC_N, but
327 when building against any version prior to N we need the symbol to be
328 unversioned (otherwise it won't resolve on M where it is private).
329 """
330 for tag in tags:
331 if tag.startswith('versioned='):
332 return api >= int(get_tag_value(tag))
333 # If there is no "versioned" tag, the tag has been versioned for as long as
334 # it was introduced.
335 return True
336
Dan Albert914449f2016-06-17 16:45:24 -0700337
Dan Albert8bdccb92016-07-29 13:06:22 -0700338class ParseError(RuntimeError):
339 """An error that occurred while parsing a symbol file."""
Dan Albert914449f2016-06-17 16:45:24 -0700340
341
Dan Albert756f2d02018-10-09 16:36:03 -0700342class MultiplyDefinedSymbolError(RuntimeError):
343 """A symbol name was multiply defined."""
Dan Albertaf7b36d2020-06-23 11:21:21 -0700344 def __init__(self, multiply_defined_symbols: Iterable[str]) -> None:
345 super().__init__(
Dan Albert756f2d02018-10-09 16:36:03 -0700346 'Version script contains multiple definitions for: {}'.format(
347 ', '.join(multiply_defined_symbols)))
348 self.multiply_defined_symbols = multiply_defined_symbols
349
350
Dan Albert802cc822020-06-22 15:59:12 -0700351class SymbolFileParser:
Dan Albert8bdccb92016-07-29 13:06:22 -0700352 """Parses NDK symbol files."""
Jiyong Park3f9c41d2022-07-16 23:30:09 +0900353 def __init__(self, input_file: TextIO, api_map: ApiMap, filt: Filter) -> None:
Dan Albert8bdccb92016-07-29 13:06:22 -0700354 self.input_file = input_file
Dan Albert3f6fb2d2017-03-28 16:04:25 -0700355 self.api_map = api_map
Jiyong Park3f9c41d2022-07-16 23:30:09 +0900356 self.filter = filt
Dan Albertaf7b36d2020-06-23 11:21:21 -0700357 self.current_line: Optional[str] = None
Dan Albert8bdccb92016-07-29 13:06:22 -0700358
Dan Albertaf7b36d2020-06-23 11:21:21 -0700359 def parse(self) -> List[Version]:
Dan Albert8bdccb92016-07-29 13:06:22 -0700360 """Parses the symbol file and returns a list of Version objects."""
361 versions = []
Spandan Das3f5659f2021-08-19 19:31:54 +0000362 while self.next_line():
Dan Albertaf7b36d2020-06-23 11:21:21 -0700363 assert self.current_line is not None
Dan Albert8bdccb92016-07-29 13:06:22 -0700364 if '{' in self.current_line:
365 versions.append(self.parse_version())
366 else:
367 raise ParseError(
Dan Albertaf7b36d2020-06-23 11:21:21 -0700368 f'Unexpected contents at top level: {self.current_line}')
Dan Albert756f2d02018-10-09 16:36:03 -0700369
370 self.check_no_duplicate_symbols(versions)
Dan Albert8bdccb92016-07-29 13:06:22 -0700371 return versions
372
Dan Albertaf7b36d2020-06-23 11:21:21 -0700373 def check_no_duplicate_symbols(self, versions: Iterable[Version]) -> None:
Dan Albert756f2d02018-10-09 16:36:03 -0700374 """Raises errors for multiply defined symbols.
375
376 This situation is the normal case when symbol versioning is actually
377 used, but this script doesn't currently handle that. The error message
378 will be a not necessarily obvious "error: redefition of 'foo'" from
379 stub.c, so it's better for us to catch this situation and raise a
380 better error.
381 """
382 symbol_names = set()
383 multiply_defined_symbols = set()
384 for version in versions:
Jiyong Park3f9c41d2022-07-16 23:30:09 +0900385 if self.filter.should_omit_version(version):
Dan Albert756f2d02018-10-09 16:36:03 -0700386 continue
387
388 for symbol in version.symbols:
Jiyong Park3f9c41d2022-07-16 23:30:09 +0900389 if self.filter.should_omit_symbol(symbol):
Dan Albert756f2d02018-10-09 16:36:03 -0700390 continue
391
392 if symbol.name in symbol_names:
393 multiply_defined_symbols.add(symbol.name)
394 symbol_names.add(symbol.name)
395 if multiply_defined_symbols:
396 raise MultiplyDefinedSymbolError(
397 sorted(list(multiply_defined_symbols)))
398
Dan Albertaf7b36d2020-06-23 11:21:21 -0700399 def parse_version(self) -> Version:
Dan Albert8bdccb92016-07-29 13:06:22 -0700400 """Parses a single version section and returns a Version object."""
Dan Albertaf7b36d2020-06-23 11:21:21 -0700401 assert self.current_line is not None
Dan Albert8bdccb92016-07-29 13:06:22 -0700402 name = self.current_line.split('{')[0].strip()
Dan Albertead21552021-06-04 14:30:40 -0700403 tags = get_tags(self.current_line, self.api_map)
Dan Albertaf7b36d2020-06-23 11:21:21 -0700404 symbols: List[Symbol] = []
Dan Albert8bdccb92016-07-29 13:06:22 -0700405 global_scope = True
dimitry2be7fa92017-11-21 17:47:33 +0100406 cpp_symbols = False
Spandan Das3f5659f2021-08-19 19:31:54 +0000407 while self.next_line():
Dan Albert8bdccb92016-07-29 13:06:22 -0700408 if '}' in self.current_line:
409 # Line is something like '} BASE; # tags'. Both base and tags
410 # are optional here.
411 base = self.current_line.partition('}')[2]
412 base = base.partition('#')[0].strip()
413 if not base.endswith(';'):
414 raise ParseError(
dimitry2be7fa92017-11-21 17:47:33 +0100415 'Unterminated version/export "C++" block (expected ;).')
416 if cpp_symbols:
417 cpp_symbols = False
418 else:
419 base = base.rstrip(';').rstrip()
Dan Albertaf7b36d2020-06-23 11:21:21 -0700420 return Version(name, base or None, tags, symbols)
dimitry2be7fa92017-11-21 17:47:33 +0100421 elif 'extern "C++" {' in self.current_line:
422 cpp_symbols = True
423 elif not cpp_symbols and ':' in self.current_line:
Dan Albert8bdccb92016-07-29 13:06:22 -0700424 visibility = self.current_line.split(':')[0].strip()
425 if visibility == 'local':
426 global_scope = False
427 elif visibility == 'global':
428 global_scope = True
429 else:
430 raise ParseError('Unknown visiblity label: ' + visibility)
dimitry2be7fa92017-11-21 17:47:33 +0100431 elif global_scope and not cpp_symbols:
Dan Albert8bdccb92016-07-29 13:06:22 -0700432 symbols.append(self.parse_symbol())
433 else:
Dan Albertf50b6ce2018-09-25 13:39:25 -0700434 # We're in a hidden scope or in 'extern "C++"' block. Ignore
435 # everything.
Dan Albert8bdccb92016-07-29 13:06:22 -0700436 pass
437 raise ParseError('Unexpected EOF in version block.')
438
Dan Albertaf7b36d2020-06-23 11:21:21 -0700439 def parse_symbol(self) -> Symbol:
Dan Albert8bdccb92016-07-29 13:06:22 -0700440 """Parses a single symbol line and returns a Symbol object."""
Dan Albertaf7b36d2020-06-23 11:21:21 -0700441 assert self.current_line is not None
Dan Albert8bdccb92016-07-29 13:06:22 -0700442 if ';' not in self.current_line:
443 raise ParseError(
444 'Expected ; to terminate symbol: ' + self.current_line)
445 if '*' in self.current_line:
446 raise ParseError(
447 'Wildcard global symbols are not permitted.')
448 # Line is now in the format "<symbol-name>; # tags"
449 name, _, _ = self.current_line.strip().partition(';')
Dan Albertead21552021-06-04 14:30:40 -0700450 tags = get_tags(self.current_line, self.api_map)
Dan Albert8bdccb92016-07-29 13:06:22 -0700451 return Symbol(name, tags)
452
Dan Albertaf7b36d2020-06-23 11:21:21 -0700453 def next_line(self) -> str:
Dan Albert8bdccb92016-07-29 13:06:22 -0700454 """Returns the next non-empty non-comment line.
455
456 A return value of '' indicates EOF.
457 """
458 line = self.input_file.readline()
Spandan Das3f5659f2021-08-19 19:31:54 +0000459 while not line.strip() or line.strip().startswith('#'):
Dan Albert8bdccb92016-07-29 13:06:22 -0700460 line = self.input_file.readline()
461
462 # We want to skip empty lines, but '' indicates EOF.
Spandan Das3f5659f2021-08-19 19:31:54 +0000463 if not line:
Dan Albert8bdccb92016-07-29 13:06:22 -0700464 break
465 self.current_line = line
466 return self.current_line