Blame - benchmarks/atomic_benchmark.cpp - android_bionic

blob: 66a0120a126db3cfa11845f0af0fc6fd0c5d8b6c [file] [log] [blame]

Hans Boehm	3f55787	2017-01-23 17:30:44 -0800	[diff] [blame^]	1	/*
				2	* Copyright (C) 2017 The Android Open Source Project
				3	*
				4	* Licensed under the Apache License, Version 2.0 (the "License");
				5	* you may not use this file except in compliance with the License.
				6	* You may obtain a copy of the License at
				7	*
				8	* http://www.apache.org/licenses/LICENSE-2.0
				9	*
				10	* Unless required by applicable law or agreed to in writing, software
				11	* distributed under the License is distributed on an "AS IS" BASIS,
				12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	* See the License for the specific language governing permissions and
				14	* limitations under the License.
				15	*/
				16
				17	// Our goal is to measure the cost of various C++ atomic operations.
				18	// Android doesn't really control those. But since some of these operations can be quite
				19	// expensive, this may be useful input for development of higher level code.
				20	// Expected mappings from C++ atomics to hardware primitives can be found at
				21	// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html .
				22
				23	#include <benchmark/benchmark.h>
				24	#include <atomic>
				25	#include <mutex>
				26
				27	// We time atomic operations separated by a volatile (not atomic!) increment. This ensures
				28	// that the compiler emits memory instructions (e.g. load or store) prior to any fence or the
				29	// like. That in turn ensures that the CPU has outstanding memory operations when the fence
				30	// is executed.
				31
				32	// In most respects, we compute best case values. Since there is only one thread, there are no
				33	// coherence misses.
				34
				35	// We assume that the compiler is not smart enough to optimize away fences in a single-threaded
				36	// program. If that changes, we'll need to add a second thread.
				37
				38	volatile unsigned counter;
				39
				40	std::atomic<int> test_loc(0);
				41
				42	volatile unsigned sink;
				43
				44	std::mutex mtx;
				45
				46	void BM_empty(benchmark::State& state) {
				47	while (state.KeepRunning()) {
				48	++counter;
				49	}
				50	}
				51	BENCHMARK(BM_empty);
				52
				53	static void BM_load_relaxed(benchmark::State& state) {
				54	unsigned result = 0;
				55	while (state.KeepRunning()) {
				56	result += test_loc.load(std::memory_order_relaxed);
				57	++counter;
				58	}
				59	sink = result;
				60	}
				61	BENCHMARK(BM_load_relaxed);
				62
				63	static void BM_load_acquire(benchmark::State& state) {
				64	unsigned result = 0;
				65	while (state.KeepRunning()) {
				66	result += test_loc.load(std::memory_order_acquire);
				67	++counter;
				68	}
				69	sink = result;
				70	}
				71	BENCHMARK(BM_load_acquire);
				72
				73	static void BM_store_release(benchmark::State& state) {
				74	int i = counter;
				75	while (state.KeepRunning()) {
				76	test_loc.store(++i, std::memory_order_release);
				77	++counter;
				78	}
				79	}
				80	BENCHMARK(BM_store_release);
				81
				82	static void BM_store_seq_cst(benchmark::State& state) {
				83	int i = counter;
				84	while (state.KeepRunning()) {
				85	test_loc.store(++i, std::memory_order_seq_cst);
				86	++counter;
				87	}
				88	}
				89	BENCHMARK(BM_store_seq_cst);
				90
				91	static void BM_fetch_add_relaxed(benchmark::State& state) {
				92	unsigned result = 0;
				93	while (state.KeepRunning()) {
				94	result += test_loc.fetch_add(1, std::memory_order_relaxed);
				95	++counter;
				96	}
				97	sink = result;
				98	}
				99	BENCHMARK(BM_fetch_add_relaxed);
				100
				101	static void BM_fetch_add_seq_cst(benchmark::State& state) {
				102	unsigned result = 0;
				103	while (state.KeepRunning()) {
				104	result += test_loc.fetch_add(1, std::memory_order_seq_cst);
				105	++counter;
				106	}
				107	sink = result;
				108	}
				109	BENCHMARK(BM_fetch_add_seq_cst);
				110
				111	// The fence benchmarks include a relaxed load to make it much harder to optimize away
				112	// the fence.
				113
				114	static void BM_acquire_fence(benchmark::State& state) {
				115	unsigned result = 0;
				116	while (state.KeepRunning()) {
				117	result += test_loc.load(std::memory_order_relaxed);
				118	std::atomic_thread_fence(std::memory_order_acquire);
				119	++counter;
				120	}
				121	sink = result;
				122	}
				123	BENCHMARK(BM_acquire_fence);
				124
				125	static void BM_seq_cst_fence(benchmark::State& state) {
				126	unsigned result = 0;
				127	while (state.KeepRunning()) {
				128	result += test_loc.load(std::memory_order_relaxed);
				129	std::atomic_thread_fence(std::memory_order_seq_cst);
				130	++counter;
				131	}
				132	sink = result;
				133	}
				134	BENCHMARK(BM_seq_cst_fence);
				135
				136	// For comparison, also throw in a critical section version:
				137
				138	static void BM_fetch_add_cs(benchmark::State& state) {
				139	unsigned result = 0;
				140	while (state.KeepRunning()) {
				141	{
				142	std::lock_guard<std::mutex> _(mtx);
				143	result += ++counter;
				144	}
				145	}
				146	sink = result;
				147	}
				148	BENCHMARK(BM_fetch_add_cs);