blob: 487b71cdb5432b704116522198331bbb047fcd2f [file] [log] [blame]
Hans Boehm3f557872017-01-23 17:30:44 -08001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17// Our goal is to measure the cost of various C++ atomic operations.
18// Android doesn't really control those. But since some of these operations can be quite
19// expensive, this may be useful input for development of higher level code.
20// Expected mappings from C++ atomics to hardware primitives can be found at
21// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html .
22
Hans Boehm3f557872017-01-23 17:30:44 -080023#include <atomic>
24#include <mutex>
25
Anders Lewisa7b0f882017-07-24 20:01:13 -070026#include <benchmark/benchmark.h>
27#include "util.h"
28
Hans Boehm3f557872017-01-23 17:30:44 -080029// We time atomic operations separated by a volatile (not atomic!) increment. This ensures
30// that the compiler emits memory instructions (e.g. load or store) prior to any fence or the
31// like. That in turn ensures that the CPU has outstanding memory operations when the fence
32// is executed.
33
34// In most respects, we compute best case values. Since there is only one thread, there are no
35// coherence misses.
36
37// We assume that the compiler is not smart enough to optimize away fences in a single-threaded
38// program. If that changes, we'll need to add a second thread.
39
Elliott Hughes2ee65a22024-02-02 16:59:22 -080040// We're going to use `++` on this volatile in all the tests. This is
41// fine, because we're only using `volatile` in the "don't optimize this out"
42// sense, and don't care whether the increment is atomic or not.
43#pragma clang diagnostic ignored "-Wdeprecated-volatile"
Hans Boehm3e5754c2018-10-18 10:37:34 -070044static volatile unsigned counter;
Hans Boehm3f557872017-01-23 17:30:44 -080045
46std::atomic<int> test_loc(0);
47
Hans Boehm3e5754c2018-10-18 10:37:34 -070048static volatile unsigned sink;
Hans Boehm3f557872017-01-23 17:30:44 -080049
Hans Boehm3e5754c2018-10-18 10:37:34 -070050static std::mutex mtx;
Hans Boehm3f557872017-01-23 17:30:44 -080051
Christopher Ferris858e3362017-11-30 08:53:15 -080052void BM_atomic_empty(benchmark::State& state) {
Hans Boehm3f557872017-01-23 17:30:44 -080053 while (state.KeepRunning()) {
54 ++counter;
55 }
56}
Christopher Ferris858e3362017-11-30 08:53:15 -080057BIONIC_BENCHMARK(BM_atomic_empty);
Hans Boehm3f557872017-01-23 17:30:44 -080058
Christopher Ferris858e3362017-11-30 08:53:15 -080059static void BM_atomic_load_relaxed(benchmark::State& state) {
Hans Boehm3f557872017-01-23 17:30:44 -080060 unsigned result = 0;
61 while (state.KeepRunning()) {
62 result += test_loc.load(std::memory_order_relaxed);
63 ++counter;
64 }
65 sink = result;
66}
Christopher Ferris858e3362017-11-30 08:53:15 -080067BIONIC_BENCHMARK(BM_atomic_load_relaxed);
Hans Boehm3f557872017-01-23 17:30:44 -080068
Christopher Ferris858e3362017-11-30 08:53:15 -080069static void BM_atomic_load_acquire(benchmark::State& state) {
Hans Boehm3f557872017-01-23 17:30:44 -080070 unsigned result = 0;
71 while (state.KeepRunning()) {
72 result += test_loc.load(std::memory_order_acquire);
73 ++counter;
74 }
75 sink = result;
76}
Christopher Ferris858e3362017-11-30 08:53:15 -080077BIONIC_BENCHMARK(BM_atomic_load_acquire);
Hans Boehm3f557872017-01-23 17:30:44 -080078
Christopher Ferris858e3362017-11-30 08:53:15 -080079static void BM_atomic_store_release(benchmark::State& state) {
Hans Boehm3f557872017-01-23 17:30:44 -080080 int i = counter;
81 while (state.KeepRunning()) {
82 test_loc.store(++i, std::memory_order_release);
83 ++counter;
84 }
85}
Christopher Ferris858e3362017-11-30 08:53:15 -080086BIONIC_BENCHMARK(BM_atomic_store_release);
Hans Boehm3f557872017-01-23 17:30:44 -080087
Christopher Ferris858e3362017-11-30 08:53:15 -080088static void BM_atomic_store_seq_cst(benchmark::State& state) {
Hans Boehm3f557872017-01-23 17:30:44 -080089 int i = counter;
90 while (state.KeepRunning()) {
91 test_loc.store(++i, std::memory_order_seq_cst);
92 ++counter;
93 }
94}
Christopher Ferris858e3362017-11-30 08:53:15 -080095BIONIC_BENCHMARK(BM_atomic_store_seq_cst);
Hans Boehm3f557872017-01-23 17:30:44 -080096
Christopher Ferris858e3362017-11-30 08:53:15 -080097static void BM_atomic_fetch_add_relaxed(benchmark::State& state) {
Hans Boehm3f557872017-01-23 17:30:44 -080098 unsigned result = 0;
99 while (state.KeepRunning()) {
100 result += test_loc.fetch_add(1, std::memory_order_relaxed);
101 ++counter;
102 }
103 sink = result;
104}
Christopher Ferris858e3362017-11-30 08:53:15 -0800105BIONIC_BENCHMARK(BM_atomic_fetch_add_relaxed);
Hans Boehm3f557872017-01-23 17:30:44 -0800106
Christopher Ferris858e3362017-11-30 08:53:15 -0800107static void BM_atomic_fetch_add_seq_cst(benchmark::State& state) {
Hans Boehm3f557872017-01-23 17:30:44 -0800108 unsigned result = 0;
109 while (state.KeepRunning()) {
110 result += test_loc.fetch_add(1, std::memory_order_seq_cst);
111 ++counter;
112 }
113 sink = result;
114}
Christopher Ferris858e3362017-11-30 08:53:15 -0800115BIONIC_BENCHMARK(BM_atomic_fetch_add_seq_cst);
Hans Boehm3f557872017-01-23 17:30:44 -0800116
117// The fence benchmarks include a relaxed load to make it much harder to optimize away
118// the fence.
119
Christopher Ferris858e3362017-11-30 08:53:15 -0800120static void BM_atomic_acquire_fence(benchmark::State& state) {
Hans Boehm3f557872017-01-23 17:30:44 -0800121 unsigned result = 0;
122 while (state.KeepRunning()) {
123 result += test_loc.load(std::memory_order_relaxed);
124 std::atomic_thread_fence(std::memory_order_acquire);
125 ++counter;
126 }
127 sink = result;
128}
Christopher Ferris858e3362017-11-30 08:53:15 -0800129BIONIC_BENCHMARK(BM_atomic_acquire_fence);
Hans Boehm3f557872017-01-23 17:30:44 -0800130
Christopher Ferris858e3362017-11-30 08:53:15 -0800131static void BM_atomic_seq_cst_fence(benchmark::State& state) {
Hans Boehm3f557872017-01-23 17:30:44 -0800132 unsigned result = 0;
133 while (state.KeepRunning()) {
134 result += test_loc.load(std::memory_order_relaxed);
135 std::atomic_thread_fence(std::memory_order_seq_cst);
136 ++counter;
137 }
138 sink = result;
139}
Christopher Ferris858e3362017-11-30 08:53:15 -0800140BIONIC_BENCHMARK(BM_atomic_seq_cst_fence);
Hans Boehm3f557872017-01-23 17:30:44 -0800141
142// For comparison, also throw in a critical section version:
143
Christopher Ferris858e3362017-11-30 08:53:15 -0800144static void BM_atomic_fetch_add_cs(benchmark::State& state) {
Hans Boehm3f557872017-01-23 17:30:44 -0800145 unsigned result = 0;
146 while (state.KeepRunning()) {
147 {
148 std::lock_guard<std::mutex> _(mtx);
149 result += ++counter;
150 }
151 }
152 sink = result;
153}
Christopher Ferris858e3362017-11-30 08:53:15 -0800154BIONIC_BENCHMARK(BM_atomic_fetch_add_cs);