Add a linker relocation benchmark

The benchmark creates a set of DSOs that mimic the work involved in
loading the current version of libandroid_servers.so. The synthetic
benchmark has roughly the same number of libraries with roughly the same
relocations.

Currently, on a local aosp_walleye build that includes recent performance
improvements (including the Neon-based CL
I3983bca1dddc9241bb70290ad3651d895f046660), using the "performance"
governor, the benchmark reports these scores:

$ adb shell taskset 10 \
  /data/benchmarktest64/linker-reloc-bench/linker-reloc-bench \
  --benchmark_repetitions=20 --benchmark_display_aggregates_only=true
...
--------------------------------------------------------------------------------
Benchmark                                      Time             CPU   Iterations
--------------------------------------------------------------------------------
BM_linker_relocation/real_time_mean        70048 us          465 us           20
BM_linker_relocation/real_time_median      70091 us          466 us           20
BM_linker_relocation/real_time_stddev        329 us         8.29 us           20

$ adb shell taskset 10 \
  /data/benchmarktest/linker-reloc-bench/linker-reloc-bench \
  --benchmark_repetitions=20 --benchmark_display_aggregates_only=true
...
--------------------------------------------------------------------------------
Benchmark                                      Time             CPU   Iterations
--------------------------------------------------------------------------------
BM_linker_relocation/real_time_mean        83051 us          462 us           20
BM_linker_relocation/real_time_median      83069 us          464 us           20
BM_linker_relocation/real_time_stddev        184 us         8.91 us           20

Test: manual
Bug: none
Change-Id: I6dac66978f8666f95c76387093bda6be0151bfce
diff --git a/benchmarks/linker_relocation/regen/dump_relocs.py b/benchmarks/linker_relocation/regen/dump_relocs.py
new file mode 100755
index 0000000..165da05
--- /dev/null
+++ b/benchmarks/linker_relocation/regen/dump_relocs.py
@@ -0,0 +1,247 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2019 The Android Open Source Project
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in
+#    the documentation and/or other materials provided with the
+#    distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+# Scan an ELF file and its tree of DT_NEEDED ELF files, and dump out a JSON file listing:
+#  - each ELF file
+#  - its DT_NEEDED entries
+#  - its defined symbols
+#  - its relocations
+
+import argparse
+import json
+import os
+import re
+import shlex
+import shutil
+import subprocess
+import sys
+import tempfile
+import textwrap
+import typing
+from enum import Enum
+from typing import Any, Set, List, Dict, Optional
+from subprocess import PIPE, DEVNULL
+from pathlib import Path
+
+from common_types import LoadedLibrary, SymBind, SymKind, DynSymbol, DynSymbols, Relocations, \
+    SymbolRef, bfs_walk, elf_tree_to_json
+
+
+g_readelf_cache: Dict[str, str] = {}
+g_path_to_soname_cache: Dict[Path, str] = {}
+
+def do_readelf_query(arguments: List[str]) -> List[str]:
+    cmdline = ['llvm-readelf'] + arguments
+    key = repr(cmdline)
+    if key in g_readelf_cache: return g_readelf_cache[key].splitlines()
+    out = subprocess.run(cmdline, check=True, stdout=PIPE).stdout.decode()
+    g_readelf_cache[key] = out
+    return out.splitlines()
+
+
+def get_elf_soname(path: Path) -> str:
+    if path in g_path_to_soname_cache: return g_path_to_soname_cache[path]
+    out = do_readelf_query(['-d', str(path)])
+    for line in out:
+        m = re.search(r'\(SONAME\)\s+Library soname: \[(.+)\]$', line)
+        if not m: continue
+        result = m.group(1)
+        break
+    else:
+        result = os.path.basename(path)
+    g_path_to_soname_cache[path] = result
+    return result
+
+
+def get_elf_needed(path: Path) -> List[str]:
+    result = []
+    out = do_readelf_query(['-d', str(path)])
+    for line in out:
+        m = re.search(r'\(NEEDED\)\s+Shared library: \[(.+)\]$', line)
+        if not m: continue
+        result.append(m.group(1))
+    return result
+
+
+kSymbolMatcher = re.compile(r'''
+    \s+ (\d+) : \s*                 # number
+    [0-9a-f]+ \s+                   # value
+    [0-9a-f]+ \s+                   # size
+    (FUNC|IFUNC|OBJECT|NOTYPE) \s+  # type
+    (GLOBAL|WEAK) \s+               # bind
+    \w+ \s+                         # vis
+    (\d+|UND) \s+                   # ndx
+    ([\.\w]+)                       # name
+    (?:(@@?)(\w+))?                 # version
+    $
+''', re.VERBOSE)
+
+
+def get_dyn_symbols(path: Path) -> DynSymbols:
+    kind_lookup = {
+        'FUNC': SymKind.Func,
+        'IFUNC': SymKind.Func,
+        'OBJECT': SymKind.Var,
+        'NOTYPE': SymKind.Func,
+    }
+    bind_lookup = { 'GLOBAL': SymBind.Global, 'WEAK': SymBind.Weak }
+
+    result = {}
+    out = do_readelf_query(['--dyn-syms', str(path)])
+    for line in out:
+        m = kSymbolMatcher.match(line)
+        if not m:
+            # gLinux currently has a version of llvm-readelf whose output is very different from
+            # the current versions of llvm-readelf (or GNU readelf).
+            if 'Symbol table of .gnu.hash for image:' in line:
+                sys.exit(f'error: obsolete version of llvm-readelf')
+            continue
+
+        num, kind, bind, ndx, name, ver_type, ver_name = m.groups()
+
+        if name == '__cfi_check':
+            # The linker gives an error like:
+            #    CANNOT LINK EXECUTABLE "/data/local/tmp/out-linker-bench/b_libandroid_servers": unaligned __cfi_check in the library "(null)"
+            # I am probably breaking some kind of CFI invariant, so strip these out for now.
+            continue
+
+        result[int(num)] = DynSymbol(name, kind_lookup[kind], bind_lookup[bind], ndx != 'UND',
+                                     ver_type, ver_name)
+
+    return result
+
+
+kRelocationMatcher = re.compile(r'''
+    ([0-9a-f]+) \s+     # offset
+    ([0-9a-f]+) \s+     # info
+    (\w+)               # type
+    (?:
+        \s+ [0-9a-f]+ \s+       # symbol value
+        ([\.\w]+)               # symbol name
+        (?: @@? ([\.\w]+) )?    # version
+    )?
+    \b
+''', re.VERBOSE)
+
+
+def scan_relocations(path: Path, syms: DynSymbols) -> Relocations:
+    result: Relocations = Relocations()
+    out = do_readelf_query(['-r', str(path)])
+    for line in out:
+        m = kRelocationMatcher.match(line)
+        if not m: continue
+
+        offset_str, info_str, reloc_name, sym_name, ver = m.groups()
+
+        if len(offset_str) == 8:
+            offset = int(offset_str, 16) // 4
+            sym_idx = int(info_str, 16) >> 8
+        elif len(offset_str) == 16:
+            offset = int(offset_str, 16) // 8
+            sym_idx = int(info_str, 16) >> 32
+        else:
+            sys.exit(f'error: invalid offset length: {repr(offset_str)}')
+
+        # TODO: R_ARM_IRELATIVE doesn't work, so skip it.
+        if reloc_name == 'R_ARM_IRELATIVE': continue
+
+        if reloc_name in ['R_ARM_RELATIVE', 'R_AARCH64_RELATIVE']:
+            assert sym_name is None
+            result.relative.append(offset)
+        else:
+            if sym_name is None:
+                sys.exit(f'error: missing symbol for reloc {m.groups()} in {path}')
+
+            is_weak = syms[sym_idx].bind == SymBind.Weak
+            symbol = SymbolRef(sym_name, is_weak, ver)
+
+            if reloc_name in ['R_ARM_JUMP_SLOT', 'R_AARCH64_JUMP_SLOT']:
+                result.jump_slots.append(symbol)
+            elif reloc_name in ['R_ARM_GLOB_DAT', 'R_AARCH64_GLOB_DAT']:
+                result.got.append(symbol)
+            elif reloc_name in ['R_ARM_ABS32', 'R_AARCH64_ABS64']:
+                result.symbolic.append((offset, symbol))
+            else:
+                sys.exit(f'error: unrecognized reloc {m.groups()} in {path}')
+
+    return result
+
+
+def load_elf_tree(search_path: List[Path], path: Path) -> LoadedLibrary:
+
+    libraries: Dict[str, LoadedLibrary] = {}
+
+    def find_library(needed: str) -> Optional[LoadedLibrary]:
+        nonlocal libraries
+
+        if needed in libraries: return libraries[needed]
+
+        for candidate_dir in search_path:
+            candidate_path = candidate_dir / needed
+            if candidate_path.exists():
+                return load(candidate_path)
+
+        sys.exit(f'error: missing DT_NEEDED lib {needed}!')
+
+    def load(path: Path) -> LoadedLibrary:
+        nonlocal libraries
+
+        lib = LoadedLibrary()
+        lib.soname = get_elf_soname(path)
+        if lib.soname in libraries: sys.exit(f'soname already loaded: {lib.soname}')
+        libraries[lib.soname] = lib
+
+        lib.syms = get_dyn_symbols(path)
+        lib.rels = scan_relocations(path, lib.syms)
+
+        for needed in get_elf_needed(path):
+            needed_lib = find_library(needed)
+            if needed_lib is not None:
+                lib.needed.append(needed_lib)
+
+        return lib
+
+    return load(path)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument('input', type=str)
+    parser.add_argument('output', type=str)
+    parser.add_argument('-L', dest='search_path', metavar='PATH', action='append', type=str, default=[])
+
+    args = parser.parse_args()
+    search_path = [Path(p) for p in args.search_path]
+
+    with open(Path(args.output), 'w') as f:
+        root = load_elf_tree(search_path, Path(args.input))
+        json.dump(elf_tree_to_json(root), f, sort_keys=True, indent=2)
+
+
+if __name__ == '__main__':
+    main()