blob: 165da05fdededece1db17181abd3d8c39c2fd646 [file] [log] [blame]
Ryan Prichard41f19702019-12-23 13:21:42 -08001#!/usr/bin/env python3
2#
3# Copyright (C) 2019 The Android Open Source Project
4# All rights reserved.
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions
8# are met:
9# * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11# * Redistributions in binary form must reproduce the above copyright
12# notice, this list of conditions and the following disclaimer in
13# the documentation and/or other materials provided with the
14# distribution.
15#
16# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
23# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
26# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27# SUCH DAMAGE.
28
29# Scan an ELF file and its tree of DT_NEEDED ELF files, and dump out a JSON file listing:
30# - each ELF file
31# - its DT_NEEDED entries
32# - its defined symbols
33# - its relocations
34
35import argparse
36import json
37import os
38import re
39import shlex
40import shutil
41import subprocess
42import sys
43import tempfile
44import textwrap
45import typing
46from enum import Enum
47from typing import Any, Set, List, Dict, Optional
48from subprocess import PIPE, DEVNULL
49from pathlib import Path
50
51from common_types import LoadedLibrary, SymBind, SymKind, DynSymbol, DynSymbols, Relocations, \
52 SymbolRef, bfs_walk, elf_tree_to_json
53
54
55g_readelf_cache: Dict[str, str] = {}
56g_path_to_soname_cache: Dict[Path, str] = {}
57
58def do_readelf_query(arguments: List[str]) -> List[str]:
59 cmdline = ['llvm-readelf'] + arguments
60 key = repr(cmdline)
61 if key in g_readelf_cache: return g_readelf_cache[key].splitlines()
62 out = subprocess.run(cmdline, check=True, stdout=PIPE).stdout.decode()
63 g_readelf_cache[key] = out
64 return out.splitlines()
65
66
67def get_elf_soname(path: Path) -> str:
68 if path in g_path_to_soname_cache: return g_path_to_soname_cache[path]
69 out = do_readelf_query(['-d', str(path)])
70 for line in out:
71 m = re.search(r'\(SONAME\)\s+Library soname: \[(.+)\]$', line)
72 if not m: continue
73 result = m.group(1)
74 break
75 else:
76 result = os.path.basename(path)
77 g_path_to_soname_cache[path] = result
78 return result
79
80
81def get_elf_needed(path: Path) -> List[str]:
82 result = []
83 out = do_readelf_query(['-d', str(path)])
84 for line in out:
85 m = re.search(r'\(NEEDED\)\s+Shared library: \[(.+)\]$', line)
86 if not m: continue
87 result.append(m.group(1))
88 return result
89
90
91kSymbolMatcher = re.compile(r'''
92 \s+ (\d+) : \s* # number
93 [0-9a-f]+ \s+ # value
94 [0-9a-f]+ \s+ # size
95 (FUNC|IFUNC|OBJECT|NOTYPE) \s+ # type
96 (GLOBAL|WEAK) \s+ # bind
97 \w+ \s+ # vis
98 (\d+|UND) \s+ # ndx
99 ([\.\w]+) # name
100 (?:(@@?)(\w+))? # version
101 $
102''', re.VERBOSE)
103
104
105def get_dyn_symbols(path: Path) -> DynSymbols:
106 kind_lookup = {
107 'FUNC': SymKind.Func,
108 'IFUNC': SymKind.Func,
109 'OBJECT': SymKind.Var,
110 'NOTYPE': SymKind.Func,
111 }
112 bind_lookup = { 'GLOBAL': SymBind.Global, 'WEAK': SymBind.Weak }
113
114 result = {}
115 out = do_readelf_query(['--dyn-syms', str(path)])
116 for line in out:
117 m = kSymbolMatcher.match(line)
118 if not m:
119 # gLinux currently has a version of llvm-readelf whose output is very different from
120 # the current versions of llvm-readelf (or GNU readelf).
121 if 'Symbol table of .gnu.hash for image:' in line:
122 sys.exit(f'error: obsolete version of llvm-readelf')
123 continue
124
125 num, kind, bind, ndx, name, ver_type, ver_name = m.groups()
126
127 if name == '__cfi_check':
128 # The linker gives an error like:
129 # CANNOT LINK EXECUTABLE "/data/local/tmp/out-linker-bench/b_libandroid_servers": unaligned __cfi_check in the library "(null)"
130 # I am probably breaking some kind of CFI invariant, so strip these out for now.
131 continue
132
133 result[int(num)] = DynSymbol(name, kind_lookup[kind], bind_lookup[bind], ndx != 'UND',
134 ver_type, ver_name)
135
136 return result
137
138
139kRelocationMatcher = re.compile(r'''
140 ([0-9a-f]+) \s+ # offset
141 ([0-9a-f]+) \s+ # info
142 (\w+) # type
143 (?:
144 \s+ [0-9a-f]+ \s+ # symbol value
145 ([\.\w]+) # symbol name
146 (?: @@? ([\.\w]+) )? # version
147 )?
148 \b
149''', re.VERBOSE)
150
151
152def scan_relocations(path: Path, syms: DynSymbols) -> Relocations:
153 result: Relocations = Relocations()
154 out = do_readelf_query(['-r', str(path)])
155 for line in out:
156 m = kRelocationMatcher.match(line)
157 if not m: continue
158
159 offset_str, info_str, reloc_name, sym_name, ver = m.groups()
160
161 if len(offset_str) == 8:
162 offset = int(offset_str, 16) // 4
163 sym_idx = int(info_str, 16) >> 8
164 elif len(offset_str) == 16:
165 offset = int(offset_str, 16) // 8
166 sym_idx = int(info_str, 16) >> 32
167 else:
168 sys.exit(f'error: invalid offset length: {repr(offset_str)}')
169
170 # TODO: R_ARM_IRELATIVE doesn't work, so skip it.
171 if reloc_name == 'R_ARM_IRELATIVE': continue
172
173 if reloc_name in ['R_ARM_RELATIVE', 'R_AARCH64_RELATIVE']:
174 assert sym_name is None
175 result.relative.append(offset)
176 else:
177 if sym_name is None:
178 sys.exit(f'error: missing symbol for reloc {m.groups()} in {path}')
179
180 is_weak = syms[sym_idx].bind == SymBind.Weak
181 symbol = SymbolRef(sym_name, is_weak, ver)
182
183 if reloc_name in ['R_ARM_JUMP_SLOT', 'R_AARCH64_JUMP_SLOT']:
184 result.jump_slots.append(symbol)
185 elif reloc_name in ['R_ARM_GLOB_DAT', 'R_AARCH64_GLOB_DAT']:
186 result.got.append(symbol)
187 elif reloc_name in ['R_ARM_ABS32', 'R_AARCH64_ABS64']:
188 result.symbolic.append((offset, symbol))
189 else:
190 sys.exit(f'error: unrecognized reloc {m.groups()} in {path}')
191
192 return result
193
194
195def load_elf_tree(search_path: List[Path], path: Path) -> LoadedLibrary:
196
197 libraries: Dict[str, LoadedLibrary] = {}
198
199 def find_library(needed: str) -> Optional[LoadedLibrary]:
200 nonlocal libraries
201
202 if needed in libraries: return libraries[needed]
203
204 for candidate_dir in search_path:
205 candidate_path = candidate_dir / needed
206 if candidate_path.exists():
207 return load(candidate_path)
208
209 sys.exit(f'error: missing DT_NEEDED lib {needed}!')
210
211 def load(path: Path) -> LoadedLibrary:
212 nonlocal libraries
213
214 lib = LoadedLibrary()
215 lib.soname = get_elf_soname(path)
216 if lib.soname in libraries: sys.exit(f'soname already loaded: {lib.soname}')
217 libraries[lib.soname] = lib
218
219 lib.syms = get_dyn_symbols(path)
220 lib.rels = scan_relocations(path, lib.syms)
221
222 for needed in get_elf_needed(path):
223 needed_lib = find_library(needed)
224 if needed_lib is not None:
225 lib.needed.append(needed_lib)
226
227 return lib
228
229 return load(path)
230
231
232def main() -> None:
233 parser = argparse.ArgumentParser()
234 parser.add_argument('input', type=str)
235 parser.add_argument('output', type=str)
236 parser.add_argument('-L', dest='search_path', metavar='PATH', action='append', type=str, default=[])
237
238 args = parser.parse_args()
239 search_path = [Path(p) for p in args.search_path]
240
241 with open(Path(args.output), 'w') as f:
242 root = load_elf_tree(search_path, Path(args.input))
243 json.dump(elf_tree_to_json(root), f, sort_keys=True, indent=2)
244
245
246if __name__ == '__main__':
247 main()