blob: ea008d4f1ba96af7f00be33ba599c189a61da163 [file] [log] [blame]
Hans Boehm3f557872017-01-23 17:30:44 -08001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17// Our goal is to measure the cost of various C++ atomic operations.
18// Android doesn't really control those. But since some of these operations can be quite
19// expensive, this may be useful input for development of higher level code.
20// Expected mappings from C++ atomics to hardware primitives can be found at
21// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html .
22
Hans Boehm3f557872017-01-23 17:30:44 -080023#include <atomic>
24#include <mutex>
25
Anders Lewisa7b0f882017-07-24 20:01:13 -070026#include <benchmark/benchmark.h>
27#include "util.h"
28
Hans Boehm3f557872017-01-23 17:30:44 -080029// We time atomic operations separated by a volatile (not atomic!) increment. This ensures
30// that the compiler emits memory instructions (e.g. load or store) prior to any fence or the
31// like. That in turn ensures that the CPU has outstanding memory operations when the fence
32// is executed.
33
34// In most respects, we compute best case values. Since there is only one thread, there are no
35// coherence misses.
36
37// We assume that the compiler is not smart enough to optimize away fences in a single-threaded
38// program. If that changes, we'll need to add a second thread.
39
Hans Boehm3e5754c2018-10-18 10:37:34 -070040static volatile unsigned counter;
Hans Boehm3f557872017-01-23 17:30:44 -080041
42std::atomic<int> test_loc(0);
43
Hans Boehm3e5754c2018-10-18 10:37:34 -070044static volatile unsigned sink;
Hans Boehm3f557872017-01-23 17:30:44 -080045
Hans Boehm3e5754c2018-10-18 10:37:34 -070046static std::mutex mtx;
Hans Boehm3f557872017-01-23 17:30:44 -080047
Christopher Ferris858e3362017-11-30 08:53:15 -080048void BM_atomic_empty(benchmark::State& state) {
Hans Boehm3f557872017-01-23 17:30:44 -080049 while (state.KeepRunning()) {
50 ++counter;
51 }
52}
Christopher Ferris858e3362017-11-30 08:53:15 -080053BIONIC_BENCHMARK(BM_atomic_empty);
Hans Boehm3f557872017-01-23 17:30:44 -080054
Christopher Ferris858e3362017-11-30 08:53:15 -080055static void BM_atomic_load_relaxed(benchmark::State& state) {
Hans Boehm3f557872017-01-23 17:30:44 -080056 unsigned result = 0;
57 while (state.KeepRunning()) {
58 result += test_loc.load(std::memory_order_relaxed);
59 ++counter;
60 }
61 sink = result;
62}
Christopher Ferris858e3362017-11-30 08:53:15 -080063BIONIC_BENCHMARK(BM_atomic_load_relaxed);
Hans Boehm3f557872017-01-23 17:30:44 -080064
Christopher Ferris858e3362017-11-30 08:53:15 -080065static void BM_atomic_load_acquire(benchmark::State& state) {
Hans Boehm3f557872017-01-23 17:30:44 -080066 unsigned result = 0;
67 while (state.KeepRunning()) {
68 result += test_loc.load(std::memory_order_acquire);
69 ++counter;
70 }
71 sink = result;
72}
Christopher Ferris858e3362017-11-30 08:53:15 -080073BIONIC_BENCHMARK(BM_atomic_load_acquire);
Hans Boehm3f557872017-01-23 17:30:44 -080074
Christopher Ferris858e3362017-11-30 08:53:15 -080075static void BM_atomic_store_release(benchmark::State& state) {
Hans Boehm3f557872017-01-23 17:30:44 -080076 int i = counter;
77 while (state.KeepRunning()) {
78 test_loc.store(++i, std::memory_order_release);
79 ++counter;
80 }
81}
Christopher Ferris858e3362017-11-30 08:53:15 -080082BIONIC_BENCHMARK(BM_atomic_store_release);
Hans Boehm3f557872017-01-23 17:30:44 -080083
Christopher Ferris858e3362017-11-30 08:53:15 -080084static void BM_atomic_store_seq_cst(benchmark::State& state) {
Hans Boehm3f557872017-01-23 17:30:44 -080085 int i = counter;
86 while (state.KeepRunning()) {
87 test_loc.store(++i, std::memory_order_seq_cst);
88 ++counter;
89 }
90}
Christopher Ferris858e3362017-11-30 08:53:15 -080091BIONIC_BENCHMARK(BM_atomic_store_seq_cst);
Hans Boehm3f557872017-01-23 17:30:44 -080092
Christopher Ferris858e3362017-11-30 08:53:15 -080093static void BM_atomic_fetch_add_relaxed(benchmark::State& state) {
Hans Boehm3f557872017-01-23 17:30:44 -080094 unsigned result = 0;
95 while (state.KeepRunning()) {
96 result += test_loc.fetch_add(1, std::memory_order_relaxed);
97 ++counter;
98 }
99 sink = result;
100}
Christopher Ferris858e3362017-11-30 08:53:15 -0800101BIONIC_BENCHMARK(BM_atomic_fetch_add_relaxed);
Hans Boehm3f557872017-01-23 17:30:44 -0800102
Christopher Ferris858e3362017-11-30 08:53:15 -0800103static void BM_atomic_fetch_add_seq_cst(benchmark::State& state) {
Hans Boehm3f557872017-01-23 17:30:44 -0800104 unsigned result = 0;
105 while (state.KeepRunning()) {
106 result += test_loc.fetch_add(1, std::memory_order_seq_cst);
107 ++counter;
108 }
109 sink = result;
110}
Christopher Ferris858e3362017-11-30 08:53:15 -0800111BIONIC_BENCHMARK(BM_atomic_fetch_add_seq_cst);
Hans Boehm3f557872017-01-23 17:30:44 -0800112
113// The fence benchmarks include a relaxed load to make it much harder to optimize away
114// the fence.
115
Christopher Ferris858e3362017-11-30 08:53:15 -0800116static void BM_atomic_acquire_fence(benchmark::State& state) {
Hans Boehm3f557872017-01-23 17:30:44 -0800117 unsigned result = 0;
118 while (state.KeepRunning()) {
119 result += test_loc.load(std::memory_order_relaxed);
120 std::atomic_thread_fence(std::memory_order_acquire);
121 ++counter;
122 }
123 sink = result;
124}
Christopher Ferris858e3362017-11-30 08:53:15 -0800125BIONIC_BENCHMARK(BM_atomic_acquire_fence);
Hans Boehm3f557872017-01-23 17:30:44 -0800126
Christopher Ferris858e3362017-11-30 08:53:15 -0800127static void BM_atomic_seq_cst_fence(benchmark::State& state) {
Hans Boehm3f557872017-01-23 17:30:44 -0800128 unsigned result = 0;
129 while (state.KeepRunning()) {
130 result += test_loc.load(std::memory_order_relaxed);
131 std::atomic_thread_fence(std::memory_order_seq_cst);
132 ++counter;
133 }
134 sink = result;
135}
Christopher Ferris858e3362017-11-30 08:53:15 -0800136BIONIC_BENCHMARK(BM_atomic_seq_cst_fence);
Hans Boehm3f557872017-01-23 17:30:44 -0800137
138// For comparison, also throw in a critical section version:
139
Christopher Ferris858e3362017-11-30 08:53:15 -0800140static void BM_atomic_fetch_add_cs(benchmark::State& state) {
Hans Boehm3f557872017-01-23 17:30:44 -0800141 unsigned result = 0;
142 while (state.KeepRunning()) {
143 {
144 std::lock_guard<std::mutex> _(mtx);
145 result += ++counter;
146 }
147 }
148 sink = result;
149}
Christopher Ferris858e3362017-11-30 08:53:15 -0800150BIONIC_BENCHMARK(BM_atomic_fetch_add_cs);