blob: eaa1854af3e0018b3a3441e834549c1b060e4941 [file] [log] [blame]
Spandan Das60b81952022-10-27 00:44:24 +00001#!/usr/bin/env python3
Logan Chien0e53d882018-11-06 17:32:40 +08002#
3# Copyright (C) 2019 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""ELF file checker.
18
19This command ensures all undefined symbols in an ELF file can be resolved to
20global (or weak) symbols defined in shared objects specified in DT_NEEDED
21entries.
22"""
23
24from __future__ import print_function
25
26import argparse
27import collections
28import os
29import os.path
30import re
31import struct
32import subprocess
33import sys
34
35
36_ELF_MAGIC = b'\x7fELF'
37
38
39# Known machines
40_EM_386 = 3
41_EM_ARM = 40
42_EM_X86_64 = 62
43_EM_AARCH64 = 183
44
45_KNOWN_MACHINES = {_EM_386, _EM_ARM, _EM_X86_64, _EM_AARCH64}
46
47
48# ELF header struct
49_ELF_HEADER_STRUCT = (
50 ('ei_magic', '4s'),
51 ('ei_class', 'B'),
52 ('ei_data', 'B'),
53 ('ei_version', 'B'),
54 ('ei_osabi', 'B'),
55 ('ei_pad', '8s'),
56 ('e_type', 'H'),
57 ('e_machine', 'H'),
58 ('e_version', 'I'),
59)
60
61_ELF_HEADER_STRUCT_FMT = ''.join(_fmt for _, _fmt in _ELF_HEADER_STRUCT)
62
63
64ELFHeader = collections.namedtuple(
65 'ELFHeader', [_name for _name, _ in _ELF_HEADER_STRUCT])
66
67
68ELF = collections.namedtuple(
69 'ELF',
70 ('dt_soname', 'dt_needed', 'imported', 'exported', 'header'))
71
72
73def _get_os_name():
74 """Get the host OS name."""
jiajia tang7e0c12b2022-06-15 22:31:42 +080075 if sys.platform.startswith('linux'):
Logan Chien0e53d882018-11-06 17:32:40 +080076 return 'linux'
jiajia tang7e0c12b2022-06-15 22:31:42 +080077 if sys.platform.startswith('darwin'):
Logan Chien0e53d882018-11-06 17:32:40 +080078 return 'darwin'
79 raise ValueError(sys.platform + ' is not supported')
80
81
82def _get_build_top():
83 """Find the build top of the source tree ($ANDROID_BUILD_TOP)."""
84 prev_path = None
85 curr_path = os.path.abspath(os.getcwd())
86 while prev_path != curr_path:
87 if os.path.exists(os.path.join(curr_path, '.repo')):
88 return curr_path
89 prev_path = curr_path
90 curr_path = os.path.dirname(curr_path)
91 return None
92
93
94def _select_latest_llvm_version(versions):
95 """Select the latest LLVM prebuilts version from a set of versions."""
96 pattern = re.compile('clang-r([0-9]+)([a-z]?)')
97 found_rev = 0
98 found_ver = None
99 for curr_ver in versions:
100 match = pattern.match(curr_ver)
101 if not match:
102 continue
103 curr_rev = int(match.group(1))
104 if not found_ver or curr_rev > found_rev or (
105 curr_rev == found_rev and curr_ver > found_ver):
106 found_rev = curr_rev
107 found_ver = curr_ver
108 return found_ver
109
110
111def _get_latest_llvm_version(llvm_dir):
112 """Find the latest LLVM prebuilts version from `llvm_dir`."""
113 return _select_latest_llvm_version(os.listdir(llvm_dir))
114
115
116def _get_llvm_dir():
117 """Find the path to LLVM prebuilts."""
118 build_top = _get_build_top()
119
120 llvm_prebuilts_base = os.environ.get('LLVM_PREBUILTS_BASE')
121 if not llvm_prebuilts_base:
122 llvm_prebuilts_base = os.path.join('prebuilts', 'clang', 'host')
123
124 llvm_dir = os.path.join(
125 build_top, llvm_prebuilts_base, _get_os_name() + '-x86')
126
127 if not os.path.exists(llvm_dir):
128 return None
129
130 llvm_prebuilts_version = os.environ.get('LLVM_PREBUILTS_VERSION')
131 if not llvm_prebuilts_version:
132 llvm_prebuilts_version = _get_latest_llvm_version(llvm_dir)
133
134 llvm_dir = os.path.join(llvm_dir, llvm_prebuilts_version)
135
136 if not os.path.exists(llvm_dir):
137 return None
138
139 return llvm_dir
140
141
142def _get_llvm_readobj():
143 """Find the path to llvm-readobj executable."""
144 llvm_dir = _get_llvm_dir()
145 llvm_readobj = os.path.join(llvm_dir, 'bin', 'llvm-readobj')
146 return llvm_readobj if os.path.exists(llvm_readobj) else 'llvm-readobj'
147
148
149class ELFError(ValueError):
150 """Generic ELF parse error"""
151 pass
152
153
154class ELFInvalidMagicError(ELFError):
155 """Invalid ELF magic word error"""
156 def __init__(self):
157 super(ELFInvalidMagicError, self).__init__('bad ELF magic')
158
159
160class ELFParser(object):
161 """ELF file parser"""
162
163 @classmethod
164 def _read_elf_header(cls, elf_file_path):
165 """Read the ELF magic word from the beginning of the file."""
166 with open(elf_file_path, 'rb') as elf_file:
167 buf = elf_file.read(struct.calcsize(_ELF_HEADER_STRUCT_FMT))
168 try:
169 return ELFHeader(*struct.unpack(_ELF_HEADER_STRUCT_FMT, buf))
170 except struct.error:
171 return None
172
173
174 @classmethod
175 def open(cls, elf_file_path, llvm_readobj):
176 """Open and parse the ELF file."""
Yo Chiang1237c1f2020-07-29 01:20:01 +0800177 # Parse the ELF header to check the magic word.
Logan Chien0e53d882018-11-06 17:32:40 +0800178 header = cls._read_elf_header(elf_file_path)
179 if not header or header.ei_magic != _ELF_MAGIC:
180 raise ELFInvalidMagicError()
181
182 # Run llvm-readobj and parse the output.
183 return cls._read_llvm_readobj(elf_file_path, header, llvm_readobj)
184
185
186 @classmethod
187 def _find_prefix(cls, pattern, lines_it):
188 """Iterate `lines_it` until finding a string that starts with `pattern`."""
189 for line in lines_it:
190 if line.startswith(pattern):
191 return True
192 return False
193
194
195 @classmethod
196 def _read_llvm_readobj(cls, elf_file_path, header, llvm_readobj):
197 """Run llvm-readobj and parse the output."""
Yi Kong8b50dea2021-09-10 20:56:11 +0800198 cmd = [llvm_readobj, '--dynamic-table', '--dyn-symbols', elf_file_path]
Spandan Das60b81952022-10-27 00:44:24 +0000199 out = subprocess.check_output(cmd, text=True)
Logan Chien0e53d882018-11-06 17:32:40 +0800200 lines = out.splitlines()
201 return cls._parse_llvm_readobj(elf_file_path, header, lines)
202
203
204 @classmethod
205 def _parse_llvm_readobj(cls, elf_file_path, header, lines):
206 """Parse the output of llvm-readobj."""
207 lines_it = iter(lines)
Logan Chien0e53d882018-11-06 17:32:40 +0800208 dt_soname, dt_needed = cls._parse_dynamic_table(elf_file_path, lines_it)
Stephen Hines7f5d3262020-09-22 20:54:21 -0700209 imported, exported = cls._parse_dynamic_symbols(lines_it)
Logan Chien0e53d882018-11-06 17:32:40 +0800210 return ELF(dt_soname, dt_needed, imported, exported, header)
211
212
213 _DYNAMIC_SECTION_START_PATTERN = 'DynamicSection ['
214
215 _DYNAMIC_SECTION_NEEDED_PATTERN = re.compile(
216 '^ 0x[0-9a-fA-F]+\\s+NEEDED\\s+Shared library: \\[(.*)\\]$')
217
218 _DYNAMIC_SECTION_SONAME_PATTERN = re.compile(
219 '^ 0x[0-9a-fA-F]+\\s+SONAME\\s+Library soname: \\[(.*)\\]$')
220
221 _DYNAMIC_SECTION_END_PATTERN = ']'
222
223
224 @classmethod
225 def _parse_dynamic_table(cls, elf_file_path, lines_it):
226 """Parse the dynamic table section."""
227 dt_soname = os.path.basename(elf_file_path)
228 dt_needed = []
229
230 dynamic = cls._find_prefix(cls._DYNAMIC_SECTION_START_PATTERN, lines_it)
231 if not dynamic:
232 return (dt_soname, dt_needed)
233
234 for line in lines_it:
235 if line == cls._DYNAMIC_SECTION_END_PATTERN:
236 break
237
238 match = cls._DYNAMIC_SECTION_NEEDED_PATTERN.match(line)
239 if match:
240 dt_needed.append(match.group(1))
241 continue
242
243 match = cls._DYNAMIC_SECTION_SONAME_PATTERN.match(line)
244 if match:
245 dt_soname = match.group(1)
246 continue
247
248 return (dt_soname, dt_needed)
249
250
251 _DYNAMIC_SYMBOLS_START_PATTERN = 'DynamicSymbols ['
252 _DYNAMIC_SYMBOLS_END_PATTERN = ']'
253
254 _SYMBOL_ENTRY_START_PATTERN = ' Symbol {'
255 _SYMBOL_ENTRY_PATTERN = re.compile('^ ([A-Za-z0-9_]+): (.*)$')
256 _SYMBOL_ENTRY_PAREN_PATTERN = re.compile(
257 '\\s+\\((?:(?:\\d+)|(?:0x[0-9a-fA-F]+))\\)$')
258 _SYMBOL_ENTRY_END_PATTERN = ' }'
259
260
Logan Chien99cdf532019-03-20 15:10:03 +0800261 @staticmethod
262 def _parse_symbol_name(name_with_version):
Logan Chien0e53d882018-11-06 17:32:40 +0800263 """Split `name_with_version` into name and version. This function may split
264 at last occurrence of `@@` or `@`."""
Logan Chien99cdf532019-03-20 15:10:03 +0800265 pos = name_with_version.rfind('@')
266 if pos == -1:
267 name = name_with_version
268 version = ''
269 else:
270 if pos > 0 and name_with_version[pos - 1] == '@':
271 name = name_with_version[0:pos - 1]
272 else:
273 name = name_with_version[0:pos]
274 version = name_with_version[pos + 1:]
Logan Chien0e53d882018-11-06 17:32:40 +0800275 return (name, version)
276
277
278 @classmethod
279 def _parse_dynamic_symbols(cls, lines_it):
280 """Parse dynamic symbol table and collect imported and exported symbols."""
281 imported = collections.defaultdict(set)
282 exported = collections.defaultdict(set)
283
284 for symbol in cls._parse_dynamic_symbols_internal(lines_it):
285 name, version = cls._parse_symbol_name(symbol['Name'])
286 if name:
287 if symbol['Section'] == 'Undefined':
288 if symbol['Binding'] != 'Weak':
289 imported[name].add(version)
290 else:
291 if symbol['Binding'] != 'Local':
292 exported[name].add(version)
293
294 # Freeze the returned imported/exported dict.
295 return (dict(imported), dict(exported))
296
297
298 @classmethod
299 def _parse_dynamic_symbols_internal(cls, lines_it):
300 """Parse symbols entries and yield each symbols."""
301
302 if not cls._find_prefix(cls._DYNAMIC_SYMBOLS_START_PATTERN, lines_it):
303 return
304
305 for line in lines_it:
306 if line == cls._DYNAMIC_SYMBOLS_END_PATTERN:
307 return
308
309 if line == cls._SYMBOL_ENTRY_START_PATTERN:
310 symbol = {}
311 continue
312
313 if line == cls._SYMBOL_ENTRY_END_PATTERN:
314 yield symbol
315 symbol = None
316 continue
317
318 match = cls._SYMBOL_ENTRY_PATTERN.match(line)
319 if match:
320 key = match.group(1)
321 value = cls._SYMBOL_ENTRY_PAREN_PATTERN.sub('', match.group(2))
322 symbol[key] = value
323 continue
324
325
326class Checker(object):
327 """ELF file checker that checks DT_SONAME, DT_NEEDED, and symbols."""
328
329 def __init__(self, llvm_readobj):
330 self._file_path = ''
331 self._file_under_test = None
332 self._shared_libs = []
333
334 self._llvm_readobj = llvm_readobj
335
336
337 if sys.stderr.isatty():
338 _ERROR_TAG = '\033[0;1;31merror:\033[m' # Red error
339 _NOTE_TAG = '\033[0;1;30mnote:\033[m' # Black note
340 else:
341 _ERROR_TAG = 'error:' # Red error
342 _NOTE_TAG = 'note:' # Black note
343
344
345 def _error(self, *args):
346 """Emit an error to stderr."""
347 print(self._file_path + ': ' + self._ERROR_TAG, *args, file=sys.stderr)
348
349
350 def _note(self, *args):
351 """Emit a note to stderr."""
352 print(self._file_path + ': ' + self._NOTE_TAG, *args, file=sys.stderr)
353
354
355 def _load_elf_file(self, path, skip_bad_elf_magic):
356 """Load an ELF file from the `path`."""
357 try:
358 return ELFParser.open(path, self._llvm_readobj)
359 except (IOError, OSError):
360 self._error('Failed to open "{}".'.format(path))
361 sys.exit(2)
362 except ELFInvalidMagicError:
363 if skip_bad_elf_magic:
364 sys.exit(0)
365 else:
366 self._error('File "{}" must have a valid ELF magic word.'.format(path))
367 sys.exit(2)
368 except:
369 self._error('An unknown error occurred while opening "{}".'.format(path))
370 raise
371
372
373 def load_file_under_test(self, path, skip_bad_elf_magic,
374 skip_unknown_elf_machine):
375 """Load file-under-test (either an executable or a shared lib)."""
376 self._file_path = path
377 self._file_under_test = self._load_elf_file(path, skip_bad_elf_magic)
378
379 if skip_unknown_elf_machine and \
380 self._file_under_test.header.e_machine not in _KNOWN_MACHINES:
381 sys.exit(0)
382
383
384 def load_shared_libs(self, shared_lib_paths):
385 """Load shared libraries."""
386 for path in shared_lib_paths:
387 self._shared_libs.append(self._load_elf_file(path, False))
388
389
390 def check_dt_soname(self, soname):
391 """Check whether DT_SONAME matches installation file name."""
392 if self._file_under_test.dt_soname != soname:
393 self._error('DT_SONAME "{}" must be equal to the file name "{}".'
394 .format(self._file_under_test.dt_soname, soname))
395 sys.exit(2)
396
397
Logan Chien751a9872019-12-16 15:55:16 -0800398 def check_dt_needed(self, system_shared_lib_names):
Logan Chien0e53d882018-11-06 17:32:40 +0800399 """Check whether all DT_NEEDED entries are specified in the build
400 system."""
401
402 missing_shared_libs = False
403
404 # Collect the DT_SONAMEs from shared libs specified in the build system.
405 specified_sonames = {lib.dt_soname for lib in self._shared_libs}
406
407 # Chech whether all DT_NEEDED entries are specified.
408 for lib in self._file_under_test.dt_needed:
409 if lib not in specified_sonames:
410 self._error('DT_NEEDED "{}" is not specified in shared_libs.'
411 .format(lib.decode('utf-8')))
412 missing_shared_libs = True
413
414 if missing_shared_libs:
415 dt_needed = sorted(set(self._file_under_test.dt_needed))
416 modules = [re.sub('\\.so$', '', lib) for lib in dt_needed]
417
Logan Chien751a9872019-12-16 15:55:16 -0800418 # Remove system shared libraries from the suggestion since they are added
419 # by default.
420 modules = [name for name in modules
421 if name not in system_shared_lib_names]
422
Logan Chien0e53d882018-11-06 17:32:40 +0800423 self._note()
424 self._note('Fix suggestions:')
425 self._note(
426 ' Android.bp: shared_libs: [' +
427 ', '.join('"' + module + '"' for module in modules) + '],')
428 self._note(
429 ' Android.mk: LOCAL_SHARED_LIBRARIES := ' + ' '.join(modules))
430
431 self._note()
432 self._note('If the fix above doesn\'t work, bypass this check with:')
433 self._note(' Android.bp: check_elf_files: false,')
434 self._note(' Android.mk: LOCAL_CHECK_ELF_FILES := false')
435
436 sys.exit(2)
437
438
439 @staticmethod
440 def _find_symbol(lib, name, version):
441 """Check whether the symbol name and version matches a definition in
442 lib."""
443 try:
444 lib_sym_vers = lib.exported[name]
445 except KeyError:
446 return False
447 if version == '': # Symbol version is not requested
448 return True
449 return version in lib_sym_vers
450
451
452 @classmethod
453 def _find_symbol_from_libs(cls, libs, name, version):
454 """Check whether the symbol name and version is defined in one of the
455 shared libraries in libs."""
456 for lib in libs:
457 if cls._find_symbol(lib, name, version):
458 return lib
459 return None
460
461
462 def check_symbols(self):
463 """Check whether all undefined symbols are resolved to a definition."""
464 all_elf_files = [self._file_under_test] + self._shared_libs
465 missing_symbols = []
Spandan Das60b81952022-10-27 00:44:24 +0000466 for sym, imported_vers in self._file_under_test.imported.items():
Logan Chien0e53d882018-11-06 17:32:40 +0800467 for imported_ver in imported_vers:
468 lib = self._find_symbol_from_libs(all_elf_files, sym, imported_ver)
469 if not lib:
470 missing_symbols.append((sym, imported_ver))
471
472 if missing_symbols:
473 for sym, ver in sorted(missing_symbols):
474 sym = sym.decode('utf-8')
475 if ver:
476 sym += '@' + ver.decode('utf-8')
477 self._error('Unresolved symbol: {}'.format(sym))
478
479 self._note()
480 self._note('Some dependencies might be changed, thus the symbol(s) '
481 'above cannot be resolved.')
482 self._note('Please re-build the prebuilt file: "{}".'
483 .format(self._file_path))
484
485 self._note()
486 self._note('If this is a new prebuilt file and it is designed to have '
487 'unresolved symbols, add one of the following properties:')
488 self._note(' Android.bp: allow_undefined_symbols: true,')
489 self._note(' Android.mk: LOCAL_ALLOW_UNDEFINED_SYMBOLS := true')
490
491 sys.exit(2)
492
493
494def _parse_args():
495 """Parse command line options."""
496 parser = argparse.ArgumentParser()
497
498 # Input file
499 parser.add_argument('file',
500 help='Path to the input file to be checked')
501 parser.add_argument('--soname',
502 help='Shared object name of the input file')
503
504 # Shared library dependencies
505 parser.add_argument('--shared-lib', action='append', default=[],
506 help='Path to shared library dependencies')
507
Logan Chien751a9872019-12-16 15:55:16 -0800508 # System Shared library names
509 parser.add_argument('--system-shared-lib', action='append', default=[],
510 help='System shared libraries to be hidden from fix '
511 'suggestions')
512
Logan Chien0e53d882018-11-06 17:32:40 +0800513 # Check options
514 parser.add_argument('--skip-bad-elf-magic', action='store_true',
515 help='Ignore the input file without the ELF magic word')
516 parser.add_argument('--skip-unknown-elf-machine', action='store_true',
517 help='Ignore the input file with unknown machine ID')
518 parser.add_argument('--allow-undefined-symbols', action='store_true',
519 help='Ignore unresolved undefined symbols')
520
521 # Other options
522 parser.add_argument('--llvm-readobj',
523 help='Path to the llvm-readobj executable')
524
525 return parser.parse_args()
526
527
528def main():
529 """Main function"""
530 args = _parse_args()
531
532 llvm_readobj = args.llvm_readobj
533 if not llvm_readobj:
534 llvm_readobj = _get_llvm_readobj()
535
536 # Load ELF files
537 checker = Checker(llvm_readobj)
538 checker.load_file_under_test(
539 args.file, args.skip_bad_elf_magic, args.skip_unknown_elf_machine)
540 checker.load_shared_libs(args.shared_lib)
541
542 # Run checks
543 if args.soname:
544 checker.check_dt_soname(args.soname)
545
Logan Chien751a9872019-12-16 15:55:16 -0800546 checker.check_dt_needed(args.system_shared_lib)
Logan Chien0e53d882018-11-06 17:32:40 +0800547
548 if not args.allow_undefined_symbols:
549 checker.check_symbols()
550
551
552if __name__ == '__main__':
553 main()