Support memory alignment tests for string_benchmark.cpp

Test: Manually verify that benchmark times are similar and add a unit test.
Change-Id: Idaaeb4c8c3629f797ffd8d0c86c8d7b7b9653181
diff --git a/benchmarks/Android.bp b/benchmarks/Android.bp
index 3f95aa1..0edba65 100644
--- a/benchmarks/Android.bp
+++ b/benchmarks/Android.bp
@@ -35,6 +35,17 @@
         "time_benchmark.cpp",
         "unistd_benchmark.cpp",
     ],
+    static_libs: ["libBionicBenchmarksUtils"],
+}
+
+cc_defaults {
+    name: "bionic-benchmarks-extras-defaults",
+    cflags: [
+        "-Wall",
+        "-Wextra",
+        "-Werror",
+        "-Wunused",
+    ],
 }
 
 // Build benchmarks for the device (with bionic's .so). Run with:
@@ -63,3 +74,19 @@
         },
     },
 }
+
+cc_library_static {
+    name: "libBionicBenchmarksUtils",
+    defaults: ["bionic-benchmarks-extras-defaults"],
+    srcs: ["util.cpp"],
+    host_supported: true,
+}
+
+cc_test {
+    name: "bionic-benchmarks-tests",
+    defaults: ["bionic-benchmarks-extras-defaults"],
+    srcs: [
+        "tests/benchmark_test.cpp",
+    ],
+    static_libs: ["libBionicBenchmarksUtils"],
+}
diff --git a/benchmarks/string_benchmark.cpp b/benchmarks/string_benchmark.cpp
index 86a7c35..2ab65a8 100644
--- a/benchmarks/string_benchmark.cpp
+++ b/benchmarks/string_benchmark.cpp
@@ -18,188 +18,246 @@
 #include <string.h>
 
 #include <benchmark/benchmark.h>
+#include "util.h"
 
 constexpr auto KB = 1024;
 
-#define AT_COMMON_SIZES \
-    Arg(8)->Arg(64)->Arg(512)->Arg(1*KB)->Arg(8*KB)->Arg(16*KB)->Arg(32*KB)->Arg(64*KB)
+// NOTE: these constants are temporary replacements for AT_COMMON_SIZES until
+// the new interface for Bionic benchmarks is implemented.
 
-// TODO: test unaligned operation too? (currently everything will be 8-byte aligned by malloc.)
+// Set all four to 0 to test normal alignment.
+#define AT_SRC_ALIGN 0
+#define AT_DST_ALIGN 0
+
+#define AT_ALIGNED_TWOBUF \
+    Args({(8), AT_SRC_ALIGN, AT_DST_ALIGN})->Args({(64), AT_SRC_ALIGN, AT_DST_ALIGN})-> \
+    Args({(512), AT_SRC_ALIGN, AT_DST_ALIGN})->Args({(1*KB), AT_SRC_ALIGN, AT_DST_ALIGN})-> \
+    Args({(8*KB), AT_SRC_ALIGN, AT_DST_ALIGN})->Args({(16*KB), AT_SRC_ALIGN, AT_DST_ALIGN})-> \
+    Args({(32*KB), AT_SRC_ALIGN, AT_DST_ALIGN})->Args({(64*KB), AT_SRC_ALIGN, AT_DST_ALIGN})
+
+#define AT_ALIGNED_ONEBUF \
+    Args({(8), AT_SRC_ALIGN})->Args({(64), AT_SRC_ALIGN})->Args({(512), AT_SRC_ALIGN})-> \
+    Args({(1*KB), AT_SRC_ALIGN})->Args({(8*KB), AT_SRC_ALIGN})->Args({(16*KB), AT_SRC_ALIGN})-> \
+    Args({(32*KB), AT_SRC_ALIGN})->Args({(64*KB), AT_SRC_ALIGN})
 
 static void BM_string_memcmp(benchmark::State& state) {
   const size_t nbytes = state.range(0);
-  char* src = new char[nbytes]; char* dst = new char[nbytes];
-  memset(src, 'x', nbytes);
-  memset(dst, 'x', nbytes);
+  const size_t src_alignment = state.range(1);
+  const size_t dst_alignment = state.range(2);
+
+  std::vector<char> src;
+  std::vector<char> dst;
+  char* src_aligned = GetAlignedPtrFilled(&src, src_alignment, nbytes, 'x');
+  char* dst_aligned = GetAlignedPtrFilled(&dst, dst_alignment, nbytes, 'x');
 
   volatile int c __attribute__((unused)) = 0;
   while (state.KeepRunning()) {
-    c += memcmp(dst, src, nbytes);
+    c += memcmp(dst_aligned, src_aligned, nbytes);
   }
 
   state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes));
-  delete[] src;
-  delete[] dst;
 }
-BENCHMARK(BM_string_memcmp)->AT_COMMON_SIZES;
+BENCHMARK(BM_string_memcmp)->AT_ALIGNED_TWOBUF;
 
 static void BM_string_memcpy(benchmark::State& state) {
   const size_t nbytes = state.range(0);
-  char* src = new char[nbytes]; char* dst = new char[nbytes];
-  memset(src, 'x', nbytes);
+  const size_t src_alignment = state.range(1);
+  const size_t dst_alignment = state.range(2);
+
+  std::vector<char> src;
+  std::vector<char> dst;
+  char* src_aligned = GetAlignedPtrFilled(&src, src_alignment, nbytes, 'x');
+  char* dst_aligned = GetAlignedPtr(&dst, dst_alignment, nbytes);
 
   while (state.KeepRunning()) {
-    memcpy(dst, src, nbytes);
+    memcpy(dst_aligned, src_aligned, nbytes);
   }
 
   state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes));
-  delete[] src;
-  delete[] dst;
 }
-BENCHMARK(BM_string_memcpy)->AT_COMMON_SIZES;
+BENCHMARK(BM_string_memcpy)->AT_ALIGNED_TWOBUF;
 
 static void BM_string_memmove_non_overlapping(benchmark::State& state) {
   const size_t nbytes = state.range(0);
-  std::vector<char> src(nbytes, 'x');
-  std::vector<char> dst(nbytes, 'x');
+  const size_t src_alignment = state.range(1);
+  const size_t dst_alignment = state.range(2);
+
+  std::vector<char> src;
+  std::vector<char> dst;
+  char* src_aligned = GetAlignedPtrFilled(&src, src_alignment, nbytes, 'x');
+  char* dst_aligned = GetAlignedPtrFilled(&dst, dst_alignment, nbytes, 'y');
 
   while (state.KeepRunning()) {
-    memmove(dst.data(), src.data(), nbytes);
+    memmove(dst_aligned, src_aligned, nbytes);
   }
 
   state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes));
 }
-BENCHMARK(BM_string_memmove_non_overlapping)->AT_COMMON_SIZES;
+BENCHMARK(BM_string_memmove_non_overlapping)->AT_ALIGNED_TWOBUF;
 
 static void BM_string_memmove_overlap_dst_before_src(benchmark::State& state) {
   const size_t nbytes = state.range(0);
-  std::vector<char> buf(nbytes + 1, 'x');
+  const size_t alignment = state.range(1);
+
+  std::vector<char> buf(3 * alignment + nbytes + 1, 'x');
+  char* buf_aligned = GetAlignedPtrFilled(&buf, alignment, nbytes + 1, 'x');
 
   while (state.KeepRunning()) {
-    memmove(buf.data(), buf.data() + 1, nbytes); // Worst-case overlap.
+    memmove(buf_aligned, buf_aligned + 1, nbytes);  // Worst-case overlap.
   }
 
   state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes));
 }
-BENCHMARK(BM_string_memmove_overlap_dst_before_src)->AT_COMMON_SIZES;
+BENCHMARK(BM_string_memmove_overlap_dst_before_src)->AT_ALIGNED_ONEBUF;
 
 static void BM_string_memmove_overlap_src_before_dst(benchmark::State& state) {
   const size_t nbytes = state.range(0);
-  std::vector<char> buf(nbytes + 1, 'x');
+  const size_t alignment = state.range(1);
+
+  std::vector<char> buf;
+  char* buf_aligned = GetAlignedPtrFilled(&buf, alignment, nbytes + 1, 'x');
 
   while (state.KeepRunning()) {
-    memmove(buf.data() + 1, buf.data(), nbytes); // Worst-case overlap.
+    memmove(buf_aligned + 1, buf_aligned, nbytes);  // Worst-case overlap.
   }
 
   state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes));
 }
-BENCHMARK(BM_string_memmove_overlap_src_before_dst)->AT_COMMON_SIZES;
+BENCHMARK(BM_string_memmove_overlap_src_before_dst)->AT_ALIGNED_ONEBUF;
 
 static void BM_string_memset(benchmark::State& state) {
   const size_t nbytes = state.range(0);
-  char* dst = new char[nbytes];
+  const size_t alignment = state.range(1);
+
+  std::vector<char> buf;
+  char* buf_aligned = GetAlignedPtr(&buf, alignment, nbytes + 1);
 
   while (state.KeepRunning()) {
-    memset(dst, 0, nbytes);
+    memset(buf_aligned, 0, nbytes);
   }
 
   state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes));
-  delete[] dst;
 }
-BENCHMARK(BM_string_memset)->AT_COMMON_SIZES;
+BENCHMARK(BM_string_memset)->AT_ALIGNED_ONEBUF;
 
 static void BM_string_strlen(benchmark::State& state) {
   const size_t nbytes = state.range(0);
-  char* s = new char[nbytes];
-  memset(s, 'x', nbytes);
-  s[nbytes - 1] = 0;
+  const size_t alignment = state.range(1);
+
+  std::vector<char> buf;
+  char* buf_aligned = GetAlignedPtrFilled(&buf, alignment, nbytes + 1, 'x');
+  buf_aligned[nbytes - 1] = '\0';
 
   volatile int c __attribute__((unused)) = 0;
   while (state.KeepRunning()) {
-    c += strlen(s);
+    c += strlen(buf_aligned);
   }
 
   state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes));
-  delete[] s;
 }
-BENCHMARK(BM_string_strlen)->AT_COMMON_SIZES;
+BENCHMARK(BM_string_strlen)->AT_ALIGNED_ONEBUF;
 
 static void BM_string_strcat_copy_only(benchmark::State& state) {
   const size_t nbytes = state.range(0);
-  std::vector<char> src(nbytes, 'x');
-  std::vector<char> dst(nbytes + 2);
-  src[nbytes - 1] = '\0';
-  dst[0] = 'y';
-  dst[1] = 'y';
-  dst[2] = '\0';
+  const size_t src_alignment = state.range(1);
+  const size_t dst_alignment = state.range(2);
+
+  std::vector<char> src;
+  std::vector<char> dst;
+  char* src_aligned = GetAlignedPtrFilled(&src, src_alignment, nbytes, 'x');
+  char* dst_aligned = GetAlignedPtr(&dst, dst_alignment, nbytes + 2);
+  src_aligned[nbytes - 1] = '\0';
+  dst_aligned[0] = 'y';
+  dst_aligned[1] = 'y';
+  dst_aligned[2] = '\0';
 
   while (state.KeepRunning()) {
-    strcat(dst.data(), src.data());
-    dst[2] = '\0';
+    strcat(dst_aligned, src_aligned);
+    dst_aligned[2] = '\0';
   }
 
   state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes));
 }
-BENCHMARK(BM_string_strcat_copy_only)->AT_COMMON_SIZES;
+BENCHMARK(BM_string_strcat_copy_only)->AT_ALIGNED_TWOBUF;
 
 static void BM_string_strcat_seek_only(benchmark::State& state) {
   const size_t nbytes = state.range(0);
-  std::vector<char> src(3, 'x');
-  std::vector<char> dst(nbytes + 2, 'y');
-  src[2] = '\0';
-  dst[nbytes - 1] = '\0';
+  const size_t src_alignment = state.range(1);
+  const size_t dst_alignment = state.range(2);
+
+  std::vector<char> src;
+  std::vector<char> dst;
+  char* src_aligned = GetAlignedPtrFilled(&src, src_alignment, 3, 'x');
+  char* dst_aligned = GetAlignedPtrFilled(&dst, dst_alignment, nbytes + 2, 'y');
+  src_aligned[2] = '\0';
+  dst_aligned[nbytes - 1] = '\0';
 
   while (state.KeepRunning()) {
-    strcat(dst.data(), src.data());
-    dst[nbytes - 1] = '\0';
+    strcat(dst_aligned, src_aligned);
+    dst_aligned[nbytes - 1] = '\0';
   }
 
   state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes));
 }
-BENCHMARK(BM_string_strcat_seek_only)->AT_COMMON_SIZES;
+BENCHMARK(BM_string_strcat_seek_only)->AT_ALIGNED_TWOBUF;
 
 static void BM_string_strcat_half_copy_half_seek(benchmark::State& state) {
   const size_t nbytes = state.range(0);
-  std::vector<char> src(nbytes / 2, 'x');
-  std::vector<char> dst(nbytes / 2, 'y');
-  src[nbytes / 2 - 1] = '\0';
-  dst[nbytes / 2 - 1] = '\0';
+  const size_t src_alignment = state.range(1);
+  const size_t dst_alignment = state.range(2);
+
+  std::vector<char> src;
+  std::vector<char> dst;
+  char* src_aligned = GetAlignedPtrFilled(&src, src_alignment, nbytes / 2, 'x');
+  char* dst_aligned = GetAlignedPtrFilled(&dst, dst_alignment, nbytes, 'y');
+  src_aligned[nbytes / 2 - 1] = '\0';
+  dst_aligned[nbytes / 2 - 1] = '\0';
 
   while (state.KeepRunning()) {
-    strcat(dst.data(), src.data());
-    dst[nbytes / 2 - 1] = '\0';
+    strcat(dst_aligned, src_aligned);
+    dst_aligned[nbytes / 2 - 1] = '\0';
   }
 
   state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes));
 }
-BENCHMARK(BM_string_strcat_half_copy_half_seek)->AT_COMMON_SIZES;
+BENCHMARK(BM_string_strcat_half_copy_half_seek)->AT_ALIGNED_TWOBUF;
 
 static void BM_string_strcpy(benchmark::State& state) {
   const size_t nbytes = state.range(0);
-  std::vector<char> src(nbytes, 'x');
-  std::vector<char> dst(nbytes);
-  src[nbytes - 1] = '\0';
+  const size_t src_alignment = state.range(1);
+  const size_t dst_alignment = state.range(2);
+
+  std::vector<char> src;
+  std::vector<char> dst;
+  char* src_aligned = GetAlignedPtrFilled(&src, src_alignment, nbytes, 'x');
+  char* dst_aligned = GetAlignedPtr(&dst, dst_alignment, nbytes);
+  src_aligned[nbytes - 1] = '\0';
 
   while (state.KeepRunning()) {
-    strcpy(dst.data(), src.data());
+    strcpy(dst_aligned, src_aligned);
   }
 
   state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes));
 }
-BENCHMARK(BM_string_strcpy)->AT_COMMON_SIZES;
+BENCHMARK(BM_string_strcpy)->AT_ALIGNED_TWOBUF;
 
 static void BM_string_strcmp(benchmark::State& state) {
   const size_t nbytes = state.range(0);
-  std::vector<char> s1(nbytes, 'x');
-  std::vector<char> s2(nbytes, 'x');
-  s1[nbytes - 1] = '\0';
-  s2[nbytes - 1] = '\0';
+  const size_t s1_alignment = state.range(1);
+  const size_t s2_alignment = state.range(2);
+
+  std::vector<char> s1;
+  std::vector<char> s2;
+  char* s1_aligned = GetAlignedPtrFilled(&s1, s1_alignment, nbytes, 'x');
+  char* s2_aligned = GetAlignedPtrFilled(&s2, s2_alignment, nbytes, 'x');
+  s1_aligned[nbytes - 1] = '\0';
+  s2_aligned[nbytes - 1] = '\0';
 
   volatile int c __attribute__((unused));
   while (state.KeepRunning()) {
-    c = strcmp(s1.data(), s2.data());
+    c = strcmp(s1_aligned, s2_aligned);
   }
 
   state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes));
 }
-BENCHMARK(BM_string_strcmp)->AT_COMMON_SIZES;
+BENCHMARK(BM_string_strcmp)->AT_ALIGNED_TWOBUF;
diff --git a/benchmarks/tests/benchmark_test.cpp b/benchmarks/tests/benchmark_test.cpp
new file mode 100644
index 0000000..df7b686
--- /dev/null
+++ b/benchmarks/tests/benchmark_test.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <util.h>
+
+TEST(benchmark, memory_align) {
+  std::vector<char> buf(100);
+  for (size_t alignment = 1; alignment <= 32; alignment *= 2) {
+    for (size_t or_mask = 0; or_mask < alignment; ++or_mask) {
+      uintptr_t aligned_ptr = reinterpret_cast<uintptr_t>(GetAlignedMemory(buf.data(), alignment,
+                                                                           or_mask));
+      ASSERT_EQ(aligned_ptr % alignment, or_mask);
+      ASSERT_EQ(aligned_ptr & alignment, alignment);
+    }
+  }
+}
+
+TEST(benchmark, ptr_align) {
+  std::vector<char> buf;
+  for (size_t alignment = 1; alignment <= 2048; alignment *= 2) {
+    uintptr_t aligned_ptr = reinterpret_cast<uintptr_t>(GetAlignedPtr(&buf, alignment, 100));
+    ASSERT_EQ(aligned_ptr & alignment, alignment);
+    ASSERT_EQ(aligned_ptr & (alignment - 1), 0u);
+  }
+}
diff --git a/benchmarks/util.cpp b/benchmarks/util.cpp
new file mode 100644
index 0000000..31d7e04
--- /dev/null
+++ b/benchmarks/util.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util.h"
+
+#include <sched.h>
+#include <stdio.h>
+#include <string.h>
+#include <cstdlib>
+#include <vector>
+
+// This function returns a pointer less than 2 * alignment + or_mask bytes into the array.
+char *GetAlignedMemory(char *orig_ptr, size_t alignment, size_t or_mask) {
+  if ((alignment & (alignment - 1)) != 0) {
+    fprintf(stderr, "warning: alignment passed into GetAlignedMemory is not a power of two.\n");
+    std::abort();
+  }
+  if (or_mask > alignment) {
+    fprintf(stderr, "warning: or_mask passed into GetAlignedMemory is too high.\n");
+    std::abort();
+  }
+  uintptr_t ptr = reinterpret_cast<uintptr_t>(orig_ptr);
+  if (alignment > 0) {
+    // When setting the alignment, set it to exactly the alignment chosen.
+    // The pointer returned will be guaranteed not to be aligned to anything
+    // more than that.
+    ptr += alignment - (ptr & (alignment - 1));
+    ptr |= alignment | or_mask;
+  }
+
+  return reinterpret_cast<char*>(ptr);
+}
+
+char *GetAlignedPtr(std::vector<char>* buf, size_t alignment, size_t nbytes) {
+  buf->resize(nbytes + 3 * alignment);
+  return GetAlignedMemory(buf->data(), alignment, 0);
+}
+
+char *GetAlignedPtrFilled(std::vector<char>* buf, size_t alignment, size_t nbytes, char fill_byte) {
+  char* buf_aligned = GetAlignedPtr(buf, alignment, nbytes);
+  memset(buf_aligned, fill_byte, nbytes);
+  return buf_aligned;
+}
+
+bool LockToCPU(int cpu_to_lock) {
+  cpu_set_t cpuset;
+
+  CPU_ZERO(&cpuset);
+  if (sched_getaffinity(0, sizeof(cpuset), &cpuset) != 0) {
+    perror("sched_getaffinity failed");
+    return false;
+  }
+
+  if (cpu_to_lock < 0) {
+    // Lock to the last active core we find.
+    for (int i = 0; i < CPU_SETSIZE; i++) {
+      if (CPU_ISSET(i, &cpuset)) {
+        cpu_to_lock = i;
+      }
+    }
+  } else if (!CPU_ISSET(cpu_to_lock, &cpuset)) {
+    printf("Cpu %d does not exist.\n", cpu_to_lock);
+    return false;
+  }
+
+  if (cpu_to_lock < 0) {
+    printf("Cannot find any valid cpu to lock.\n");
+    return false;
+  }
+
+  CPU_ZERO(&cpuset);
+  CPU_SET(cpu_to_lock, &cpuset);
+  if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
+    perror("sched_setaffinity failed");
+    return false;
+  }
+
+  return true;
+}
diff --git a/benchmarks/util.h b/benchmarks/util.h
new file mode 100644
index 0000000..bd3d515
--- /dev/null
+++ b/benchmarks/util.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _BIONIC_BENCHMARKS_UTIL_H_
+#define _BIONIC_BENCHMARKS_UTIL_H_
+
+#include <vector>
+
+// This function returns a pointer less than 2 * alignment + or_mask bytes into the array.
+char *GetAlignedMemory(char *orig_ptr, size_t alignment, size_t or_mask);
+
+char *GetAlignedPtr(std::vector<char>* buf, size_t alignment, size_t nbytes);
+
+char *GetAlignedPtrFilled(std::vector<char>* buf, size_t alignment, size_t nbytes, char fill_byte);
+
+bool LockToCPU(int cpu_to_lock);
+
+#endif // _BIONIC_BENCHMARKS_UTIL_H