Neon-optimized version of the GNU symbol calculation
On 64-bit walleye, improves the linker relocation benchmark from 71.9ms to
70.7ms (1.7% of the run-time).
On a 32-bit device, it improves the linker relocation benchmark from
205.5ms to 201.2ms (2.1% of the run-time).
$ adb shell taskset 10 /data/benchmarktest64/linker-benchmarks/linker-benchmarks --benchmark_repetitions=100 --benchmark_display_aggregates_only
--------------------------------------------------------------------
Benchmark Time CPU Iterations
--------------------------------------------------------------------
BM_gnu_hash_simple_mean 15232 ns 15212 ns 100
BM_gnu_hash_simple_median 15176 ns 15159 ns 100
BM_gnu_hash_simple_stddev 111 ns 110 ns 100
BM_gnu_hash_neon_mean 10265 ns 10252 ns 100
BM_gnu_hash_neon_median 10261 ns 10249 ns 100
BM_gnu_hash_neon_stddev 28.1 ns 23.9 ns 100
Bug: none
Test: linker-unit-tests
Test: linker-benchmarks
Change-Id: I3983bca1dddc9241bb70290ad3651d895f046660
diff --git a/linker/linker_gnu_hash.h b/linker/linker_gnu_hash.h
index 8375743..f85e801 100644
--- a/linker/linker_gnu_hash.h
+++ b/linker/linker_gnu_hash.h
@@ -32,7 +32,18 @@
#include <utility>
-static inline std::pair<uint32_t, uint32_t> calculate_gnu_hash(const char* name) {
+#if defined(__arm__) || defined(__aarch64__)
+#define USE_GNU_HASH_NEON 1
+#else
+#define USE_GNU_HASH_NEON 0
+#endif
+
+#if USE_GNU_HASH_NEON
+#include "arch/arm_neon/linker_gnu_hash_neon.h"
+#endif
+
+__attribute__((unused))
+static std::pair<uint32_t, uint32_t> calculate_gnu_hash_simple(const char* name) {
uint32_t h = 5381;
const uint8_t* name_bytes = reinterpret_cast<const uint8_t*>(name);
#pragma unroll 8
@@ -41,3 +52,11 @@
}
return { h, reinterpret_cast<const char*>(name_bytes) - name };
}
+
+static inline std::pair<uint32_t, uint32_t> calculate_gnu_hash(const char* name) {
+#if USE_GNU_HASH_NEON
+ return calculate_gnu_hash_neon(name);
+#else
+ return calculate_gnu_hash_simple(name);
+#endif
+}