Christopher Ferris | 0dc7844 | 2018-08-09 15:19:57 -0700 | [diff] [blame] | 1 | /* |
Christopher Ferris | 5a3c920 | 2019-12-04 15:57:07 -0800 | [diff] [blame] | 2 | * Copyright (C) 2019 The Android Open Source Project |
Christopher Ferris | 0dc7844 | 2018-08-09 15:19:57 -0700 | [diff] [blame] | 3 | * All rights reserved. |
| 4 | * |
| 5 | * Redistribution and use in source and binary forms, with or without |
| 6 | * modification, are permitted provided that the following conditions |
| 7 | * are met: |
| 8 | * * Redistributions of source code must retain the above copyright |
| 9 | * notice, this list of conditions and the following disclaimer. |
| 10 | * * Redistributions in binary form must reproduce the above copyright |
| 11 | * notice, this list of conditions and the following disclaimer in |
| 12 | * the documentation and/or other materials provided with the |
| 13 | * distribution. |
| 14 | * |
| 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 16 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 17 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
| 18 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
| 19 | * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
| 20 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
| 21 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS |
| 22 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED |
| 23 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| 24 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
| 25 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 26 | * SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | #include <malloc.h> |
Christopher Ferris | 5a3c920 | 2019-12-04 15:57:07 -0800 | [diff] [blame] | 30 | #include <unistd.h> |
| 31 | |
Chia-hung Duan | 4414844 | 2023-06-29 21:16:42 +0000 | [diff] [blame^] | 32 | #include <condition_variable> |
| 33 | #include <mutex> |
| 34 | #include <random> |
| 35 | #include <thread> |
Christopher Ferris | 5a3c920 | 2019-12-04 15:57:07 -0800 | [diff] [blame] | 36 | #include <vector> |
Christopher Ferris | 0dc7844 | 2018-08-09 15:19:57 -0700 | [diff] [blame] | 37 | |
| 38 | #include <benchmark/benchmark.h> |
| 39 | #include "util.h" |
| 40 | |
| 41 | #if defined(__BIONIC__) |
| 42 | |
Christopher Ferris | d86eb86 | 2023-02-28 12:45:54 -0800 | [diff] [blame] | 43 | static void RunMalloptPurge(benchmark::State& state, int purge_value) { |
Christopher Ferris | 5a3c920 | 2019-12-04 15:57:07 -0800 | [diff] [blame] | 44 | static size_t sizes[] = {8, 16, 32, 64, 128, 1024, 4096, 16384, 65536, 131072, 1048576}; |
| 45 | static int pagesize = getpagesize(); |
Christopher Ferris | 7ec2c8a | 2019-04-05 12:47:39 -0700 | [diff] [blame] | 46 | mallopt(M_DECAY_TIME, 1); |
Christopher Ferris | d86eb86 | 2023-02-28 12:45:54 -0800 | [diff] [blame] | 47 | mallopt(M_PURGE_ALL, 0); |
Christopher Ferris | 7ec2c8a | 2019-04-05 12:47:39 -0700 | [diff] [blame] | 48 | for (auto _ : state) { |
Christopher Ferris | 5a3c920 | 2019-12-04 15:57:07 -0800 | [diff] [blame] | 49 | state.PauseTiming(); |
| 50 | std::vector<void*> ptrs; |
| 51 | for (auto size : sizes) { |
| 52 | // Allocate at least two pages worth of the allocations. |
| 53 | for (size_t allocated = 0; allocated < 2 * static_cast<size_t>(pagesize); allocated += size) { |
| 54 | void* ptr = malloc(size); |
| 55 | if (ptr == nullptr) { |
| 56 | state.SkipWithError("Failed to allocate memory"); |
| 57 | } |
| 58 | MakeAllocationResident(ptr, size, pagesize); |
| 59 | ptrs.push_back(ptr); |
| 60 | } |
| 61 | } |
| 62 | // Free the memory, which should leave many of the pages resident until |
| 63 | // the purge call. |
| 64 | for (auto ptr : ptrs) { |
| 65 | free(ptr); |
| 66 | } |
| 67 | ptrs.clear(); |
| 68 | state.ResumeTiming(); |
Christopher Ferris | 7ec2c8a | 2019-04-05 12:47:39 -0700 | [diff] [blame] | 69 | |
Christopher Ferris | d86eb86 | 2023-02-28 12:45:54 -0800 | [diff] [blame] | 70 | mallopt(purge_value, 0); |
Christopher Ferris | 5a3c920 | 2019-12-04 15:57:07 -0800 | [diff] [blame] | 71 | } |
Christopher Ferris | 7ec2c8a | 2019-04-05 12:47:39 -0700 | [diff] [blame] | 72 | mallopt(M_DECAY_TIME, 0); |
| 73 | } |
Christopher Ferris | d86eb86 | 2023-02-28 12:45:54 -0800 | [diff] [blame] | 74 | |
Chia-hung Duan | 4414844 | 2023-06-29 21:16:42 +0000 | [diff] [blame^] | 75 | static void RunThreadsThroughput(benchmark::State& state, size_t size, size_t num_threads) { |
| 76 | constexpr size_t kMaxBytes = 1 << 24; |
| 77 | constexpr size_t kMaxThreads = 8; |
| 78 | constexpr size_t kMinRounds = 4; |
| 79 | const size_t MaxAllocCounts = kMaxBytes / size; |
| 80 | std::mutex m; |
| 81 | bool ready = false; |
| 82 | std::condition_variable cv; |
| 83 | std::thread* threads[kMaxThreads]; |
| 84 | |
| 85 | // The goal is to create malloc/free interleaving patterns across threads. |
| 86 | // The bytes processed by each thread will be the same. The difference is the |
| 87 | // patterns. Here's an example: |
| 88 | // |
| 89 | // A: Allocation |
| 90 | // D: Deallocation |
| 91 | // |
| 92 | // T1 T2 T3 |
| 93 | // A A A |
| 94 | // A A D |
| 95 | // A D A |
| 96 | // A D D |
| 97 | // D A A |
| 98 | // D A D |
| 99 | // D D A |
| 100 | // D D D |
| 101 | // |
| 102 | // To do this, `AllocCounts` and `AllocRounds` will be adjusted according to the |
| 103 | // thread id. |
| 104 | auto thread_task = [&](size_t id) { |
| 105 | { |
| 106 | std::unique_lock lock(m); |
| 107 | // Wait until all threads are created. |
| 108 | cv.wait(lock, [&] { return ready; }); |
| 109 | } |
| 110 | |
| 111 | void** MemPool; |
| 112 | const size_t AllocCounts = (MaxAllocCounts >> id); |
| 113 | const size_t AllocRounds = (kMinRounds << id); |
| 114 | MemPool = new void*[AllocCounts]; |
| 115 | |
| 116 | for (size_t i = 0; i < AllocRounds; ++i) { |
| 117 | for (size_t j = 0; j < AllocCounts; ++j) { |
| 118 | void* ptr = malloc(size); |
| 119 | MemPool[j] = ptr; |
| 120 | } |
| 121 | |
| 122 | // Use a fix seed to reduce the noise of different round of benchmark. |
| 123 | const unsigned seed = 33529; |
| 124 | std::shuffle(MemPool, &MemPool[AllocCounts], std::default_random_engine(seed)); |
| 125 | |
| 126 | for (size_t j = 0; j < AllocCounts; ++j) free(MemPool[j]); |
| 127 | } |
| 128 | |
| 129 | delete[] MemPool; |
| 130 | }; |
| 131 | |
| 132 | for (auto _ : state) { |
| 133 | state.PauseTiming(); |
| 134 | // Don't need to acquire the lock because no thread is created. |
| 135 | ready = false; |
| 136 | |
| 137 | for (size_t i = 0; i < num_threads; ++i) threads[i] = new std::thread(thread_task, i); |
| 138 | |
| 139 | state.ResumeTiming(); |
| 140 | |
| 141 | { |
| 142 | std::unique_lock lock(m); |
| 143 | ready = true; |
| 144 | } |
| 145 | |
| 146 | cv.notify_all(); |
| 147 | |
| 148 | for (size_t i = 0; i < num_threads; ++i) { |
| 149 | threads[i]->join(); |
| 150 | delete threads[i]; |
| 151 | } |
| 152 | } |
| 153 | |
| 154 | const size_t ThreadsBytesProcessed = kMaxBytes * kMinRounds * num_threads; |
| 155 | state.SetBytesProcessed(ThreadsBytesProcessed * static_cast<size_t>(state.iterations())); |
| 156 | } |
| 157 | |
Christopher Ferris | d86eb86 | 2023-02-28 12:45:54 -0800 | [diff] [blame] | 158 | static void BM_mallopt_purge(benchmark::State& state) { |
| 159 | RunMalloptPurge(state, M_PURGE); |
| 160 | } |
Christopher Ferris | 5a3c920 | 2019-12-04 15:57:07 -0800 | [diff] [blame] | 161 | BIONIC_BENCHMARK(BM_mallopt_purge); |
Christopher Ferris | 0dc7844 | 2018-08-09 15:19:57 -0700 | [diff] [blame] | 162 | |
Christopher Ferris | d86eb86 | 2023-02-28 12:45:54 -0800 | [diff] [blame] | 163 | static void BM_mallopt_purge_all(benchmark::State& state) { |
| 164 | RunMalloptPurge(state, M_PURGE_ALL); |
| 165 | } |
| 166 | BIONIC_BENCHMARK(BM_mallopt_purge_all); |
| 167 | |
Chia-hung Duan | 4414844 | 2023-06-29 21:16:42 +0000 | [diff] [blame^] | 168 | // Note that this will only test a single size class at a time so that we can |
| 169 | // observe the impact of contention more often. |
| 170 | #define BM_MALLOC_THREADS_THROUGHPUT(SIZE, NUM_THREADS) \ |
| 171 | static void BM_malloc_threads_throughput_##SIZE##_##NUM_THREADS(benchmark::State& state) { \ |
| 172 | RunThreadsThroughput(state, SIZE, NUM_THREADS); \ |
| 173 | } \ |
| 174 | BIONIC_BENCHMARK(BM_malloc_threads_throughput_##SIZE##_##NUM_THREADS); |
| 175 | |
| 176 | // There are three block categories in Scudo, we choose 1 from each category. |
| 177 | BM_MALLOC_THREADS_THROUGHPUT(64, 2); |
| 178 | BM_MALLOC_THREADS_THROUGHPUT(64, 4); |
| 179 | BM_MALLOC_THREADS_THROUGHPUT(64, 8); |
| 180 | BM_MALLOC_THREADS_THROUGHPUT(512, 2); |
| 181 | BM_MALLOC_THREADS_THROUGHPUT(512, 4); |
| 182 | BM_MALLOC_THREADS_THROUGHPUT(512, 8); |
| 183 | BM_MALLOC_THREADS_THROUGHPUT(8192, 2); |
| 184 | BM_MALLOC_THREADS_THROUGHPUT(8192, 4); |
| 185 | BM_MALLOC_THREADS_THROUGHPUT(8192, 8); |
| 186 | |
Christopher Ferris | 0dc7844 | 2018-08-09 15:19:57 -0700 | [diff] [blame] | 187 | #endif |