Improve the <ctype.h> (and <wctype.h>) benchmarks.
From trying to optimize <ctype.h> I've realized that our current
benchmarks -- by reusing the same single input repeatedly -- are
basically just testing the CPU's branch predictor. And modern branch
predictors are pretty good. Subvert this by using randomized data
instead.
Added macros to generate the different ctype/wctype versions of the
benchmarks.
Test: Ran benchmarks and verified they appear to benchmark the calls.
Change-Id: I262181ab43e041ce39d9b684071bc945cf996a1c
diff --git a/benchmarks/ctype_benchmark.cpp b/benchmarks/ctype_benchmark.cpp
index c6c23b0..b162ea7 100644
--- a/benchmarks/ctype_benchmark.cpp
+++ b/benchmarks/ctype_benchmark.cpp
@@ -16,81 +16,47 @@
#include <ctype.h>
+#include <array>
+#include <numeric>
+#include <random>
+
#include <benchmark/benchmark.h>
#include "util.h"
-// Avoid optimization.
-volatile int A = 'A';
-volatile int a = 'a';
-volatile int X = 'X';
-volatile int x = 'x';
-volatile int backspace = '\b';
-volatile int del = '\x7f';
-volatile int space = ' ';
-volatile int tab = '\t';
-volatile int zero = '0';
-volatile int underscore = '_';
-volatile int top_bit_set = 0x88;
+static std::array<int, 128> RandomAscii() {
+ std::array<int, 128> result;
+ std::iota(result.begin(), result.end(), 0);
+ std::shuffle(result.begin(), result.end(), std::mt19937{std::random_device{}()});
+ return result;
+}
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isalnum_y1, isalnum(A));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isalnum_y2, isalnum(a));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isalnum_y3, isalnum(zero));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isalnum_n, isalnum(underscore));
+#define CTYPE_BENCHMARK(__benchmark, fn) \
+ static void __benchmark##_##fn(benchmark::State& state) { \
+ auto chars = RandomAscii(); \
+ for (auto _ : state) { \
+ for (char ch : chars) { \
+ benchmark::DoNotOptimize(fn(ch)); \
+ } \
+ } \
+ state.SetBytesProcessed(state.iterations() * chars.size()); \
+ } \
+ BIONIC_BENCHMARK(__benchmark##_##fn)
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isalpha_y1, isalpha(A));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isalpha_y2, isalpha(a));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isalpha_n, isalpha(underscore));
+CTYPE_BENCHMARK(BM_ctype, isalpha);
+CTYPE_BENCHMARK(BM_ctype, isalnum);
+CTYPE_BENCHMARK(BM_ctype, isascii);
+CTYPE_BENCHMARK(BM_ctype, isblank);
+CTYPE_BENCHMARK(BM_ctype, iscntrl);
+CTYPE_BENCHMARK(BM_ctype, isgraph);
+CTYPE_BENCHMARK(BM_ctype, islower);
+CTYPE_BENCHMARK(BM_ctype, isprint);
+CTYPE_BENCHMARK(BM_ctype, ispunct);
+CTYPE_BENCHMARK(BM_ctype, isspace);
+CTYPE_BENCHMARK(BM_ctype, isupper);
+CTYPE_BENCHMARK(BM_ctype, isxdigit);
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isascii_y, isascii(x));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isascii_n, isascii(top_bit_set));
-
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isblank_y1, isblank(space));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isblank_y2, isblank(tab));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isblank_n, isblank(underscore));
-
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_iscntrl_y1, iscntrl(backspace));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_iscntrl_y2, iscntrl(del));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_iscntrl_n, iscntrl(underscore));
-
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isdigit_y, iscntrl(zero));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isdigit_n, iscntrl(underscore));
-
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isgraph_y1, isgraph(A));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isgraph_y2, isgraph(a));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isgraph_y3, isgraph(zero));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isgraph_y4, isgraph(underscore));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isgraph_n, isgraph(space));
-
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_islower_y, islower(x));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_islower_n, islower(X));
-
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isprint_y1, isprint(A));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isprint_y2, isprint(a));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isprint_y3, isprint(zero));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isprint_y4, isprint(underscore));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isprint_y5, isprint(space));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isprint_n, isprint(backspace));
-
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_ispunct_y, ispunct(underscore));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_ispunct_n, ispunct(A));
-
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isspace_y1, isspace(space));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isspace_y2, isspace(tab));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isspace_n, isspace(A));
-
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isupper_y, isupper(X));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isupper_n, isupper(x));
-
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isxdigit_y1, isxdigit(zero));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isxdigit_y2, isxdigit(a));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isxdigit_y3, isxdigit(A));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isxdigit_n, isxdigit(underscore));
-
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_toascii_y, isascii(x));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_toascii_n, isascii(top_bit_set));
-
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_tolower_y, tolower(X));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_tolower_n, tolower(x));
-
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_toupper_y, toupper(x));
-BIONIC_TRIVIAL_BENCHMARK(BM_ctype_toupper_n, toupper(X));
+CTYPE_BENCHMARK(BM_ctype, toascii);
+CTYPE_BENCHMARK(BM_ctype, tolower);
+CTYPE_BENCHMARK(BM_ctype, _tolower);
+CTYPE_BENCHMARK(BM_ctype, toupper);
+CTYPE_BENCHMARK(BM_ctype, _toupper);
diff --git a/benchmarks/wctype_benchmark.cpp b/benchmarks/wctype_benchmark.cpp
index cdf5568..cf96057 100644
--- a/benchmarks/wctype_benchmark.cpp
+++ b/benchmarks/wctype_benchmark.cpp
@@ -16,17 +16,73 @@
#include <wctype.h>
+#include <numeric>
+#include <random>
+#include <vector>
+
#include <benchmark/benchmark.h>
#include "util.h"
-BIONIC_TRIVIAL_BENCHMARK(BM_wctype_towlower_ascii_y, towlower('X'));
-BIONIC_TRIVIAL_BENCHMARK(BM_wctype_towlower_ascii_n, towlower('x'));
+static std::vector<wint_t> RandomAscii() {
+ std::vector<wint_t> result(128);
+ std::iota(result.begin(), result.end(), 0);
+ std::shuffle(result.begin(), result.end(), std::mt19937{std::random_device{}()});
+ return result;
+}
-BIONIC_TRIVIAL_BENCHMARK(BM_wctype_towlower_unicode_y, towlower(0x0391));
-BIONIC_TRIVIAL_BENCHMARK(BM_wctype_towlower_unicode_n, towlower(0x03b1));
+static std::vector<wint_t> RandomNonAscii() {
+ std::vector<wint_t> result;
+ std::mt19937 rng{std::random_device{}()};
+ std::uniform_int_distribution<> d(0x80, 0xffff);
+ for (size_t i = 0; i < 128; i++) result.push_back(d(rng));
+ return result;
+}
-BIONIC_TRIVIAL_BENCHMARK(BM_wctype_towupper_ascii_y, towupper('x'));
-BIONIC_TRIVIAL_BENCHMARK(BM_wctype_towupper_ascii_n, towupper('X'));
+#define WCTYPE_BENCHMARK(__benchmark, fn, random_fn) \
+ static void __benchmark##_##fn(benchmark::State& state) { \
+ auto chars = random_fn(); \
+ for (auto _ : state) { \
+ for (char ch : chars) { \
+ benchmark::DoNotOptimize(fn(ch)); \
+ } \
+ } \
+ state.SetBytesProcessed(state.iterations() * chars.size()); \
+ } \
+ BIONIC_BENCHMARK(__benchmark##_##fn)
-BIONIC_TRIVIAL_BENCHMARK(BM_wctype_towupper_unicode_y, towupper(0x03b1));
-BIONIC_TRIVIAL_BENCHMARK(BM_wctype_towupper_unicode_n, towupper(0x0391));
+#define WCTYPE_BENCHMARK_ASCII(__benchmark, fn) WCTYPE_BENCHMARK(__benchmark, fn, RandomAscii)
+
+#define WCTYPE_BENCHMARK_NON_ASCII(__benchmark, fn) \
+ WCTYPE_BENCHMARK(__benchmark, fn, RandomNonAscii)
+
+WCTYPE_BENCHMARK_ASCII(BM_wctype_ascii, iswalnum);
+WCTYPE_BENCHMARK_ASCII(BM_wctype_ascii, iswalpha);
+WCTYPE_BENCHMARK_ASCII(BM_wctype_ascii, iswblank);
+WCTYPE_BENCHMARK_ASCII(BM_wctype_ascii, iswcntrl);
+WCTYPE_BENCHMARK_ASCII(BM_wctype_ascii, iswdigit);
+WCTYPE_BENCHMARK_ASCII(BM_wctype_ascii, iswgraph);
+WCTYPE_BENCHMARK_ASCII(BM_wctype_ascii, iswlower);
+WCTYPE_BENCHMARK_ASCII(BM_wctype_ascii, iswprint);
+WCTYPE_BENCHMARK_ASCII(BM_wctype_ascii, iswpunct);
+WCTYPE_BENCHMARK_ASCII(BM_wctype_ascii, iswspace);
+WCTYPE_BENCHMARK_ASCII(BM_wctype_ascii, iswupper);
+WCTYPE_BENCHMARK_ASCII(BM_wctype_ascii, iswxdigit);
+
+WCTYPE_BENCHMARK_ASCII(BM_wctype_ascii_transform, towlower);
+WCTYPE_BENCHMARK_ASCII(BM_wctype_ascii_transform, towupper);
+
+WCTYPE_BENCHMARK_NON_ASCII(BM_wctype_non_ascii, iswalnum);
+WCTYPE_BENCHMARK_NON_ASCII(BM_wctype_non_ascii, iswalpha);
+WCTYPE_BENCHMARK_NON_ASCII(BM_wctype_non_ascii, iswblank);
+WCTYPE_BENCHMARK_NON_ASCII(BM_wctype_non_ascii, iswcntrl);
+WCTYPE_BENCHMARK_NON_ASCII(BM_wctype_non_ascii, iswdigit);
+WCTYPE_BENCHMARK_NON_ASCII(BM_wctype_non_ascii, iswgraph);
+WCTYPE_BENCHMARK_NON_ASCII(BM_wctype_non_ascii, iswlower);
+WCTYPE_BENCHMARK_NON_ASCII(BM_wctype_non_ascii, iswprint);
+WCTYPE_BENCHMARK_NON_ASCII(BM_wctype_non_ascii, iswpunct);
+WCTYPE_BENCHMARK_NON_ASCII(BM_wctype_non_ascii, iswspace);
+WCTYPE_BENCHMARK_NON_ASCII(BM_wctype_non_ascii, iswupper);
+WCTYPE_BENCHMARK_NON_ASCII(BM_wctype_non_ascii, iswxdigit);
+
+WCTYPE_BENCHMARK_NON_ASCII(BM_wctype_non_ascii_transform, towlower);
+WCTYPE_BENCHMARK_NON_ASCII(BM_wctype_non_ascii_transform, towupper);