blob: 94c8567265ed3d6aa3a5b3c0846e4ee0809f00d4 [file] [log] [blame]
Dan Albert914449f2016-06-17 16:45:24 -07001#
2# Copyright (C) 2016 The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
Dan Albert06f58af2020-06-22 15:10:31 -070016"""Parser for Android's version script information."""
Dan Albertead21552021-06-04 14:30:40 -070017from __future__ import annotations
18
19from dataclasses import dataclass, field
Dan Albert8bdccb92016-07-29 13:06:22 -070020import logging
Dan Albert914449f2016-06-17 16:45:24 -070021import re
Dan Albertaf7b36d2020-06-23 11:21:21 -070022from typing import (
23 Dict,
24 Iterable,
Dan Albertead21552021-06-04 14:30:40 -070025 Iterator,
Dan Albertaf7b36d2020-06-23 11:21:21 -070026 List,
27 Mapping,
28 NewType,
29 Optional,
30 TextIO,
31 Tuple,
Dan Albertead21552021-06-04 14:30:40 -070032 Union,
Dan Albertaf7b36d2020-06-23 11:21:21 -070033)
34
35
36ApiMap = Mapping[str, int]
37Arch = NewType('Arch', str)
38Tag = NewType('Tag', str)
Dan Albert914449f2016-06-17 16:45:24 -070039
40
41ALL_ARCHITECTURES = (
Dan Albertaf7b36d2020-06-23 11:21:21 -070042 Arch('arm'),
43 Arch('arm64'),
Elliott Hughes8cddc002022-10-10 15:33:50 +000044 Arch('riscv64'),
Dan Albertaf7b36d2020-06-23 11:21:21 -070045 Arch('x86'),
46 Arch('x86_64'),
Dan Albert914449f2016-06-17 16:45:24 -070047)
48
49
Dan Albertfd86e9e2016-11-08 13:35:12 -080050# Arbitrary magic number. We use the same one in api-level.h for this purpose.
51FUTURE_API_LEVEL = 10000
52
53
Dan Albertaf7b36d2020-06-23 11:21:21 -070054def logger() -> logging.Logger:
Dan Albert8bdccb92016-07-29 13:06:22 -070055 """Return the main logger for this module."""
56 return logging.getLogger(__name__)
Dan Albert914449f2016-06-17 16:45:24 -070057
58
Dan Albertaf7b36d2020-06-23 11:21:21 -070059@dataclass
Dan Albertead21552021-06-04 14:30:40 -070060class Tags:
61 """Container class for the tags attached to a symbol or version."""
62
63 tags: tuple[Tag, ...] = field(default_factory=tuple)
64
65 @classmethod
66 def from_strs(cls, strs: Iterable[str]) -> Tags:
67 """Constructs tags from a collection of strings.
68
69 Does not decode API levels.
70 """
71 return Tags(tuple(Tag(s) for s in strs))
72
73 def __contains__(self, tag: Union[Tag, str]) -> bool:
74 return tag in self.tags
75
76 def __iter__(self) -> Iterator[Tag]:
77 yield from self.tags
78
79 @property
80 def has_mode_tags(self) -> bool:
81 """Returns True if any mode tags (apex, llndk, etc) are set."""
Jiyong Park85cc35a2022-07-17 11:30:47 +090082 return self.has_apex_tags or self.has_llndk_tags or self.has_systemapi_tags
Dan Albertead21552021-06-04 14:30:40 -070083
84 @property
85 def has_apex_tags(self) -> bool:
86 """Returns True if any APEX tags are set."""
Jiyong Park85cc35a2022-07-17 11:30:47 +090087 return 'apex' in self.tags
88
89 @property
90 def has_systemapi_tags(self) -> bool:
91 """Returns True if any APEX tags are set."""
92 return 'systemapi' in self.tags
Dan Albertead21552021-06-04 14:30:40 -070093
94 @property
95 def has_llndk_tags(self) -> bool:
96 """Returns True if any LL-NDK tags are set."""
97 return 'llndk' in self.tags
98
99 @property
100 def has_platform_only_tags(self) -> bool:
101 """Returns True if any platform-only tags are set."""
102 return 'platform-only' in self.tags
103
104
105@dataclass
Dan Albertaf7b36d2020-06-23 11:21:21 -0700106class Symbol:
107 """A symbol definition from a symbol file."""
108
109 name: str
Dan Albertead21552021-06-04 14:30:40 -0700110 tags: Tags
Dan Albertaf7b36d2020-06-23 11:21:21 -0700111
112
113@dataclass
114class Version:
115 """A version block of a symbol file."""
116
117 name: str
118 base: Optional[str]
Dan Albertead21552021-06-04 14:30:40 -0700119 tags: Tags
Dan Albertaf7b36d2020-06-23 11:21:21 -0700120 symbols: List[Symbol]
121
Dan Albertead21552021-06-04 14:30:40 -0700122 @property
123 def is_private(self) -> bool:
124 """Returns True if this version block is private (platform only)."""
125 return self.name.endswith('_PRIVATE') or self.name.endswith('_PLATFORM')
Dan Albertaf7b36d2020-06-23 11:21:21 -0700126
Dan Albertead21552021-06-04 14:30:40 -0700127
128def get_tags(line: str, api_map: ApiMap) -> Tags:
Dan Alberta85042a2016-07-28 16:58:27 -0700129 """Returns a list of all tags on this line."""
130 _, _, all_tags = line.strip().partition('#')
Dan Albertead21552021-06-04 14:30:40 -0700131 return Tags(tuple(
132 decode_api_level_tag(Tag(e), api_map)
133 for e in re.split(r'\s+', all_tags) if e.strip()
134 ))
Dan Alberta85042a2016-07-28 16:58:27 -0700135
136
Dan Albertaf7b36d2020-06-23 11:21:21 -0700137def is_api_level_tag(tag: Tag) -> bool:
Dan Albert3f6fb2d2017-03-28 16:04:25 -0700138 """Returns true if this tag has an API level that may need decoding."""
139 if tag.startswith('introduced='):
140 return True
141 if tag.startswith('introduced-'):
142 return True
143 if tag.startswith('versioned='):
144 return True
145 return False
146
147
Dan Albertaf7b36d2020-06-23 11:21:21 -0700148def decode_api_level(api: str, api_map: ApiMap) -> int:
Dan Albert06f58af2020-06-22 15:10:31 -0700149 """Decodes the API level argument into the API level number.
150
151 For the average case, this just decodes the integer value from the string,
152 but for unreleased APIs we need to translate from the API codename (like
153 "O") to the future API level for that codename.
154 """
155 try:
156 return int(api)
157 except ValueError:
158 pass
159
160 if api == "current":
161 return FUTURE_API_LEVEL
162
163 return api_map[api]
164
165
Dan Albertead21552021-06-04 14:30:40 -0700166def decode_api_level_tag(tag: Tag, api_map: ApiMap) -> Tag:
167 """Decodes API level code name in a tag.
Dan Albert3f6fb2d2017-03-28 16:04:25 -0700168
169 Raises:
170 ParseError: An unknown version name was found in a tag.
171 """
Dan Albertead21552021-06-04 14:30:40 -0700172 if not is_api_level_tag(tag):
173 return tag
Dan Albert3f6fb2d2017-03-28 16:04:25 -0700174
Dan Albertead21552021-06-04 14:30:40 -0700175 name, value = split_tag(tag)
176 try:
177 decoded = str(decode_api_level(value, api_map))
178 return Tag(f'{name}={decoded}')
179 except KeyError as ex:
180 raise ParseError(f'Unknown version name in tag: {tag}') from ex
Dan Albert3f6fb2d2017-03-28 16:04:25 -0700181
182
Dan Albertaf7b36d2020-06-23 11:21:21 -0700183def split_tag(tag: Tag) -> Tuple[str, str]:
Dan Albert3f6fb2d2017-03-28 16:04:25 -0700184 """Returns a key/value tuple of the tag.
185
186 Raises:
187 ValueError: Tag is not a key/value type tag.
188
189 Returns: Tuple of (key, value) of the tag. Both components are strings.
190 """
191 if '=' not in tag:
192 raise ValueError('Not a key/value tag: ' + tag)
193 key, _, value = tag.partition('=')
194 return key, value
195
196
Dan Albertaf7b36d2020-06-23 11:21:21 -0700197def get_tag_value(tag: Tag) -> str:
Dan Albertc42458e2016-07-29 13:05:39 -0700198 """Returns the value of a key/value tag.
199
200 Raises:
201 ValueError: Tag is not a key/value type tag.
202
203 Returns: Value part of tag as a string.
204 """
Dan Albert3f6fb2d2017-03-28 16:04:25 -0700205 return split_tag(tag)[1]
Dan Albertc42458e2016-07-29 13:05:39 -0700206
Jiyong Park3f9c41d2022-07-16 23:30:09 +0900207class Filter:
208 """A filter encapsulates a condition that tells whether a version or a
209 symbol should be omitted or not
Dan Albertead21552021-06-04 14:30:40 -0700210 """
Jiyong Park3f9c41d2022-07-16 23:30:09 +0900211
Jiyong Park4ecbdb62022-09-26 20:58:27 +0900212 def __init__(self, arch: Arch, api: int, llndk: bool = False, apex: bool = False, systemapi:
213 bool = False, ndk: bool = True):
Jiyong Park3f9c41d2022-07-16 23:30:09 +0900214 self.arch = arch
215 self.api = api
216 self.llndk = llndk
217 self.apex = apex
Jiyong Park85cc35a2022-07-17 11:30:47 +0900218 self.systemapi = systemapi
Jiyong Park4ecbdb62022-09-26 20:58:27 +0900219 self.ndk = ndk
Jiyong Park3f9c41d2022-07-16 23:30:09 +0900220
221 def _should_omit_tags(self, tags: Tags) -> bool:
222 """Returns True if the tagged object should be omitted.
223
224 This defines the rules shared between version tagging and symbol tagging.
225 """
226 # The apex and llndk tags will only exclude APIs from other modes. If in
227 # APEX or LLNDK mode and neither tag is provided, we fall back to the
228 # default behavior because all NDK symbols are implicitly available to
229 # APEX and LLNDK.
230 if tags.has_mode_tags:
Jiyong Park85cc35a2022-07-17 11:30:47 +0900231 if self.apex and tags.has_apex_tags:
232 return False
233 if self.llndk and tags.has_llndk_tags:
234 return False
235 if self.systemapi and tags.has_systemapi_tags:
236 return False
237 return True
Jiyong Park3f9c41d2022-07-16 23:30:09 +0900238 if not symbol_in_arch(tags, self.arch):
Dan Albertead21552021-06-04 14:30:40 -0700239 return True
Jiyong Park3f9c41d2022-07-16 23:30:09 +0900240 if not symbol_in_api(tags, self.arch, self.api):
Dan Albertead21552021-06-04 14:30:40 -0700241 return True
Jiyong Park3f9c41d2022-07-16 23:30:09 +0900242 return False
243
244 def should_omit_version(self, version: Version) -> bool:
245 """Returns True if the version section should be omitted.
246
247 We want to omit any sections that do not have any symbols we'll have in
248 the stub library. Sections that contain entirely future symbols or only
249 symbols for certain architectures.
250 """
251 if version.is_private:
Dan Albertead21552021-06-04 14:30:40 -0700252 return True
Jiyong Park3f9c41d2022-07-16 23:30:09 +0900253 if version.tags.has_platform_only_tags:
254 return True
255 return self._should_omit_tags(version.tags)
Dan Albert914449f2016-06-17 16:45:24 -0700256
Jiyong Park3f9c41d2022-07-16 23:30:09 +0900257 def should_omit_symbol(self, symbol: Symbol) -> bool:
258 """Returns True if the symbol should be omitted."""
Jiyong Park4ecbdb62022-09-26 20:58:27 +0900259 if not symbol.tags.has_mode_tags and not self.ndk:
260 # Symbols that don't have mode tags are NDK. They are usually
261 # included, but have to be omitted if NDK symbols are explicitly
262 # filtered-out
263 return True
Dan Albert08532b62016-07-28 18:09:47 -0700264
Jiyong Park4ecbdb62022-09-26 20:58:27 +0900265 return self._should_omit_tags(symbol.tags)
Dan Albert08532b62016-07-28 18:09:47 -0700266
Dan Albertead21552021-06-04 14:30:40 -0700267def symbol_in_arch(tags: Tags, arch: Arch) -> bool:
Dan Albert914449f2016-06-17 16:45:24 -0700268 """Returns true if the symbol is present for the given architecture."""
269 has_arch_tags = False
270 for tag in tags:
271 if tag == arch:
272 return True
273 if tag in ALL_ARCHITECTURES:
274 has_arch_tags = True
275
276 # If there were no arch tags, the symbol is available for all
277 # architectures. If there were any arch tags, the symbol is only available
278 # for the tagged architectures.
279 return not has_arch_tags
280
281
Dan Albertaf7b36d2020-06-23 11:21:21 -0700282def symbol_in_api(tags: Iterable[Tag], arch: Arch, api: int) -> bool:
Dan Albertc42458e2016-07-29 13:05:39 -0700283 """Returns true if the symbol is present for the given API level."""
Dan Albert914449f2016-06-17 16:45:24 -0700284 introduced_tag = None
285 arch_specific = False
286 for tag in tags:
287 # If there is an arch-specific tag, it should override the common one.
288 if tag.startswith('introduced=') and not arch_specific:
289 introduced_tag = tag
290 elif tag.startswith('introduced-' + arch + '='):
291 introduced_tag = tag
292 arch_specific = True
Dan Alberta85042a2016-07-28 16:58:27 -0700293 elif tag == 'future':
Dan Albertfd86e9e2016-11-08 13:35:12 -0800294 return api == FUTURE_API_LEVEL
Dan Albert914449f2016-06-17 16:45:24 -0700295
296 if introduced_tag is None:
297 # We found no "introduced" tags, so the symbol has always been
298 # available.
299 return True
300
Dan Albertc42458e2016-07-29 13:05:39 -0700301 return api >= int(get_tag_value(introduced_tag))
302
303
Dan Albertaf7b36d2020-06-23 11:21:21 -0700304def symbol_versioned_in_api(tags: Iterable[Tag], api: int) -> bool:
Dan Albertc42458e2016-07-29 13:05:39 -0700305 """Returns true if the symbol should be versioned for the given API.
306
307 This models the `versioned=API` tag. This should be a very uncommonly
308 needed tag, and is really only needed to fix versioning mistakes that are
309 already out in the wild.
310
311 For example, some of libc's __aeabi_* functions were originally placed in
312 the private version, but that was incorrect. They are now in LIBC_N, but
313 when building against any version prior to N we need the symbol to be
314 unversioned (otherwise it won't resolve on M where it is private).
315 """
316 for tag in tags:
317 if tag.startswith('versioned='):
318 return api >= int(get_tag_value(tag))
319 # If there is no "versioned" tag, the tag has been versioned for as long as
320 # it was introduced.
321 return True
322
Dan Albert914449f2016-06-17 16:45:24 -0700323
Dan Albert8bdccb92016-07-29 13:06:22 -0700324class ParseError(RuntimeError):
325 """An error that occurred while parsing a symbol file."""
Dan Albert914449f2016-06-17 16:45:24 -0700326
327
Dan Albert756f2d02018-10-09 16:36:03 -0700328class MultiplyDefinedSymbolError(RuntimeError):
329 """A symbol name was multiply defined."""
Dan Albertaf7b36d2020-06-23 11:21:21 -0700330 def __init__(self, multiply_defined_symbols: Iterable[str]) -> None:
331 super().__init__(
Dan Albert756f2d02018-10-09 16:36:03 -0700332 'Version script contains multiple definitions for: {}'.format(
333 ', '.join(multiply_defined_symbols)))
334 self.multiply_defined_symbols = multiply_defined_symbols
335
336
Dan Albert802cc822020-06-22 15:59:12 -0700337class SymbolFileParser:
Dan Albert8bdccb92016-07-29 13:06:22 -0700338 """Parses NDK symbol files."""
Jiyong Park3f9c41d2022-07-16 23:30:09 +0900339 def __init__(self, input_file: TextIO, api_map: ApiMap, filt: Filter) -> None:
Dan Albert8bdccb92016-07-29 13:06:22 -0700340 self.input_file = input_file
Dan Albert3f6fb2d2017-03-28 16:04:25 -0700341 self.api_map = api_map
Jiyong Park3f9c41d2022-07-16 23:30:09 +0900342 self.filter = filt
Dan Albertaf7b36d2020-06-23 11:21:21 -0700343 self.current_line: Optional[str] = None
Dan Albert8bdccb92016-07-29 13:06:22 -0700344
Dan Albertaf7b36d2020-06-23 11:21:21 -0700345 def parse(self) -> List[Version]:
Dan Albert8bdccb92016-07-29 13:06:22 -0700346 """Parses the symbol file and returns a list of Version objects."""
347 versions = []
Spandan Das3f5659f2021-08-19 19:31:54 +0000348 while self.next_line():
Dan Albertaf7b36d2020-06-23 11:21:21 -0700349 assert self.current_line is not None
Dan Albert8bdccb92016-07-29 13:06:22 -0700350 if '{' in self.current_line:
351 versions.append(self.parse_version())
352 else:
353 raise ParseError(
Dan Albertaf7b36d2020-06-23 11:21:21 -0700354 f'Unexpected contents at top level: {self.current_line}')
Dan Albert756f2d02018-10-09 16:36:03 -0700355
356 self.check_no_duplicate_symbols(versions)
Dan Albert8bdccb92016-07-29 13:06:22 -0700357 return versions
358
Dan Albertaf7b36d2020-06-23 11:21:21 -0700359 def check_no_duplicate_symbols(self, versions: Iterable[Version]) -> None:
Dan Albert756f2d02018-10-09 16:36:03 -0700360 """Raises errors for multiply defined symbols.
361
362 This situation is the normal case when symbol versioning is actually
363 used, but this script doesn't currently handle that. The error message
364 will be a not necessarily obvious "error: redefition of 'foo'" from
365 stub.c, so it's better for us to catch this situation and raise a
366 better error.
367 """
368 symbol_names = set()
369 multiply_defined_symbols = set()
370 for version in versions:
Jiyong Park3f9c41d2022-07-16 23:30:09 +0900371 if self.filter.should_omit_version(version):
Dan Albert756f2d02018-10-09 16:36:03 -0700372 continue
373
374 for symbol in version.symbols:
Jiyong Park3f9c41d2022-07-16 23:30:09 +0900375 if self.filter.should_omit_symbol(symbol):
Dan Albert756f2d02018-10-09 16:36:03 -0700376 continue
377
378 if symbol.name in symbol_names:
379 multiply_defined_symbols.add(symbol.name)
380 symbol_names.add(symbol.name)
381 if multiply_defined_symbols:
382 raise MultiplyDefinedSymbolError(
383 sorted(list(multiply_defined_symbols)))
384
Dan Albertaf7b36d2020-06-23 11:21:21 -0700385 def parse_version(self) -> Version:
Dan Albert8bdccb92016-07-29 13:06:22 -0700386 """Parses a single version section and returns a Version object."""
Dan Albertaf7b36d2020-06-23 11:21:21 -0700387 assert self.current_line is not None
Dan Albert8bdccb92016-07-29 13:06:22 -0700388 name = self.current_line.split('{')[0].strip()
Dan Albertead21552021-06-04 14:30:40 -0700389 tags = get_tags(self.current_line, self.api_map)
Dan Albertaf7b36d2020-06-23 11:21:21 -0700390 symbols: List[Symbol] = []
Dan Albert8bdccb92016-07-29 13:06:22 -0700391 global_scope = True
dimitry2be7fa92017-11-21 17:47:33 +0100392 cpp_symbols = False
Spandan Das3f5659f2021-08-19 19:31:54 +0000393 while self.next_line():
Dan Albert8bdccb92016-07-29 13:06:22 -0700394 if '}' in self.current_line:
395 # Line is something like '} BASE; # tags'. Both base and tags
396 # are optional here.
397 base = self.current_line.partition('}')[2]
398 base = base.partition('#')[0].strip()
399 if not base.endswith(';'):
400 raise ParseError(
dimitry2be7fa92017-11-21 17:47:33 +0100401 'Unterminated version/export "C++" block (expected ;).')
402 if cpp_symbols:
403 cpp_symbols = False
404 else:
405 base = base.rstrip(';').rstrip()
Dan Albertaf7b36d2020-06-23 11:21:21 -0700406 return Version(name, base or None, tags, symbols)
dimitry2be7fa92017-11-21 17:47:33 +0100407 elif 'extern "C++" {' in self.current_line:
408 cpp_symbols = True
409 elif not cpp_symbols and ':' in self.current_line:
Dan Albert8bdccb92016-07-29 13:06:22 -0700410 visibility = self.current_line.split(':')[0].strip()
411 if visibility == 'local':
412 global_scope = False
413 elif visibility == 'global':
414 global_scope = True
415 else:
416 raise ParseError('Unknown visiblity label: ' + visibility)
dimitry2be7fa92017-11-21 17:47:33 +0100417 elif global_scope and not cpp_symbols:
Dan Albert8bdccb92016-07-29 13:06:22 -0700418 symbols.append(self.parse_symbol())
419 else:
Dan Albertf50b6ce2018-09-25 13:39:25 -0700420 # We're in a hidden scope or in 'extern "C++"' block. Ignore
421 # everything.
Dan Albert8bdccb92016-07-29 13:06:22 -0700422 pass
423 raise ParseError('Unexpected EOF in version block.')
424
Dan Albertaf7b36d2020-06-23 11:21:21 -0700425 def parse_symbol(self) -> Symbol:
Dan Albert8bdccb92016-07-29 13:06:22 -0700426 """Parses a single symbol line and returns a Symbol object."""
Dan Albertaf7b36d2020-06-23 11:21:21 -0700427 assert self.current_line is not None
Dan Albert8bdccb92016-07-29 13:06:22 -0700428 if ';' not in self.current_line:
429 raise ParseError(
430 'Expected ; to terminate symbol: ' + self.current_line)
431 if '*' in self.current_line:
432 raise ParseError(
433 'Wildcard global symbols are not permitted.')
434 # Line is now in the format "<symbol-name>; # tags"
435 name, _, _ = self.current_line.strip().partition(';')
Dan Albertead21552021-06-04 14:30:40 -0700436 tags = get_tags(self.current_line, self.api_map)
Dan Albert8bdccb92016-07-29 13:06:22 -0700437 return Symbol(name, tags)
438
Dan Albertaf7b36d2020-06-23 11:21:21 -0700439 def next_line(self) -> str:
Dan Albert8bdccb92016-07-29 13:06:22 -0700440 """Returns the next non-empty non-comment line.
441
442 A return value of '' indicates EOF.
443 """
444 line = self.input_file.readline()
Spandan Das3f5659f2021-08-19 19:31:54 +0000445 while not line.strip() or line.strip().startswith('#'):
Dan Albert8bdccb92016-07-29 13:06:22 -0700446 line = self.input_file.readline()
447
448 # We want to skip empty lines, but '' indicates EOF.
Spandan Das3f5659f2021-08-19 19:31:54 +0000449 if not line:
Dan Albert8bdccb92016-07-29 13:06:22 -0700450 break
451 self.current_line = line
452 return self.current_line