blob: 66a0120a126db3cfa11845f0af0fc6fd0c5d8b6c [file] [log] [blame]
Hans Boehm3f557872017-01-23 17:30:44 -08001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17// Our goal is to measure the cost of various C++ atomic operations.
18// Android doesn't really control those. But since some of these operations can be quite
19// expensive, this may be useful input for development of higher level code.
20// Expected mappings from C++ atomics to hardware primitives can be found at
21// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html .
22
23#include <benchmark/benchmark.h>
24#include <atomic>
25#include <mutex>
26
27// We time atomic operations separated by a volatile (not atomic!) increment. This ensures
28// that the compiler emits memory instructions (e.g. load or store) prior to any fence or the
29// like. That in turn ensures that the CPU has outstanding memory operations when the fence
30// is executed.
31
32// In most respects, we compute best case values. Since there is only one thread, there are no
33// coherence misses.
34
35// We assume that the compiler is not smart enough to optimize away fences in a single-threaded
36// program. If that changes, we'll need to add a second thread.
37
38volatile unsigned counter;
39
40std::atomic<int> test_loc(0);
41
42volatile unsigned sink;
43
44std::mutex mtx;
45
46void BM_empty(benchmark::State& state) {
47 while (state.KeepRunning()) {
48 ++counter;
49 }
50}
51BENCHMARK(BM_empty);
52
53static void BM_load_relaxed(benchmark::State& state) {
54 unsigned result = 0;
55 while (state.KeepRunning()) {
56 result += test_loc.load(std::memory_order_relaxed);
57 ++counter;
58 }
59 sink = result;
60}
61BENCHMARK(BM_load_relaxed);
62
63static void BM_load_acquire(benchmark::State& state) {
64 unsigned result = 0;
65 while (state.KeepRunning()) {
66 result += test_loc.load(std::memory_order_acquire);
67 ++counter;
68 }
69 sink = result;
70}
71BENCHMARK(BM_load_acquire);
72
73static void BM_store_release(benchmark::State& state) {
74 int i = counter;
75 while (state.KeepRunning()) {
76 test_loc.store(++i, std::memory_order_release);
77 ++counter;
78 }
79}
80BENCHMARK(BM_store_release);
81
82static void BM_store_seq_cst(benchmark::State& state) {
83 int i = counter;
84 while (state.KeepRunning()) {
85 test_loc.store(++i, std::memory_order_seq_cst);
86 ++counter;
87 }
88}
89BENCHMARK(BM_store_seq_cst);
90
91static void BM_fetch_add_relaxed(benchmark::State& state) {
92 unsigned result = 0;
93 while (state.KeepRunning()) {
94 result += test_loc.fetch_add(1, std::memory_order_relaxed);
95 ++counter;
96 }
97 sink = result;
98}
99BENCHMARK(BM_fetch_add_relaxed);
100
101static void BM_fetch_add_seq_cst(benchmark::State& state) {
102 unsigned result = 0;
103 while (state.KeepRunning()) {
104 result += test_loc.fetch_add(1, std::memory_order_seq_cst);
105 ++counter;
106 }
107 sink = result;
108}
109BENCHMARK(BM_fetch_add_seq_cst);
110
111// The fence benchmarks include a relaxed load to make it much harder to optimize away
112// the fence.
113
114static void BM_acquire_fence(benchmark::State& state) {
115 unsigned result = 0;
116 while (state.KeepRunning()) {
117 result += test_loc.load(std::memory_order_relaxed);
118 std::atomic_thread_fence(std::memory_order_acquire);
119 ++counter;
120 }
121 sink = result;
122}
123BENCHMARK(BM_acquire_fence);
124
125static void BM_seq_cst_fence(benchmark::State& state) {
126 unsigned result = 0;
127 while (state.KeepRunning()) {
128 result += test_loc.load(std::memory_order_relaxed);
129 std::atomic_thread_fence(std::memory_order_seq_cst);
130 ++counter;
131 }
132 sink = result;
133}
134BENCHMARK(BM_seq_cst_fence);
135
136// For comparison, also throw in a critical section version:
137
138static void BM_fetch_add_cs(benchmark::State& state) {
139 unsigned result = 0;
140 while (state.KeepRunning()) {
141 {
142 std::lock_guard<std::mutex> _(mtx);
143 result += ++counter;
144 }
145 }
146 sink = result;
147}
148BENCHMARK(BM_fetch_add_cs);