Add exp and exp2 benchmark

This patch add two benchmarks for both exp and exp2, one which measures
thoughput and one which measures latency. The latency benchmark works by
creating a dependency on the previous iteration and with a zero multiply
to avoid changing the input value.

The input is based on expf/exp2f benchmark (reduced trace based on 2.4
billion samples extracted from specpu2017 521.wrf_r benchmark).

Test: ran 32-bit and 64-bit x86 tests on host
diff --git a/benchmarks/math_benchmark.cpp b/benchmarks/math_benchmark.cpp
index d64830a..8c758ac 100644
--- a/benchmarks/math_benchmark.cpp
+++ b/benchmarks/math_benchmark.cpp
@@ -33,6 +33,7 @@
 volatile float f;
 
 static float zero = 0.0f;
+static double zerod = 0.0f;
 
 static void BM_math_sqrt(benchmark::State& state) {
   d = 0.0;
@@ -257,6 +258,33 @@
 }
 BIONIC_BENCHMARK(BM_math_expf_speccpu2017_latency);
 
+// Create a double version of expf_input to avoid overhead of float to
+// double conversion.
+static const std::vector<double> exp_input (expf_input.begin(),
+                                            expf_input.end());
+
+static void BM_math_exp_speccpu2017(benchmark::State& state) {
+  d = 0.0;
+  auto cin = exp_input.cbegin();
+  for (auto _ : state) {
+    d = exp(*cin);
+    if (++cin == exp_input.cend())
+      cin = exp_input.cbegin();
+  }
+}
+BIONIC_BENCHMARK(BM_math_exp_speccpu2017);
+
+static void BM_math_exp_speccpu2017_latency(benchmark::State& state) {
+  d = 0.0;
+  auto cin = exp_input.cbegin();
+  for (auto _ : state) {
+    d = exp(d * zerod + *cin);
+    if (++cin == exp_input.cend())
+      cin = exp_input.cbegin();
+  }
+}
+BIONIC_BENCHMARK(BM_math_exp_speccpu2017_latency);
+
 static void BM_math_exp2f_speccpu2017(benchmark::State& state) {
   f = 0.0;
   auto cin = expf_input.cbegin();
@@ -279,6 +307,28 @@
 }
 BIONIC_BENCHMARK(BM_math_exp2f_speccpu2017_latency);
 
+static void BM_math_exp2_speccpu2017(benchmark::State& state) {
+  d = 0.0;
+  auto cin = exp_input.cbegin();
+  for (auto _ : state) {
+    f = exp2(*cin);
+    if (++cin == exp_input.cend())
+      cin = exp_input.cbegin();
+  }
+}
+BIONIC_BENCHMARK(BM_math_exp2_speccpu2017);
+
+static void BM_math_exp2_speccpu2017_latency(benchmark::State& state) {
+  d = 0.0;
+  auto cin = exp_input.cbegin();
+  for (auto _ : state) {
+    f = exp2(d * zero + *cin);
+    if (++cin == exp_input.cend())
+      cin = exp_input.cbegin();
+  }
+}
+BIONIC_BENCHMARK(BM_math_exp2_speccpu2017_latency);
+
 #include "powf_input.cpp"
 
 static void BM_math_powf_speccpu2006(benchmark::State& state) {