| Christopher Ferris | 0dc7844 | 2018-08-09 15:19:57 -0700 | [diff] [blame] | 1 | /* | 
| Christopher Ferris | 5a3c920 | 2019-12-04 15:57:07 -0800 | [diff] [blame] | 2 | * Copyright (C) 2019 The Android Open Source Project | 
| Christopher Ferris | 0dc7844 | 2018-08-09 15:19:57 -0700 | [diff] [blame] | 3 | * All rights reserved. | 
|  | 4 | * | 
|  | 5 | * Redistribution and use in source and binary forms, with or without | 
|  | 6 | * modification, are permitted provided that the following conditions | 
|  | 7 | * are met: | 
|  | 8 | *  * Redistributions of source code must retain the above copyright | 
|  | 9 | *    notice, this list of conditions and the following disclaimer. | 
|  | 10 | *  * Redistributions in binary form must reproduce the above copyright | 
|  | 11 | *    notice, this list of conditions and the following disclaimer in | 
|  | 12 | *    the documentation and/or other materials provided with the | 
|  | 13 | *    distribution. | 
|  | 14 | * | 
|  | 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 
|  | 16 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 
|  | 17 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | 
|  | 18 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | 
|  | 19 | * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | 
|  | 20 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, | 
|  | 21 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS | 
|  | 22 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED | 
|  | 23 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | 
|  | 24 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | 
|  | 25 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 
|  | 26 | * SUCH DAMAGE. | 
|  | 27 | */ | 
|  | 28 |  | 
|  | 29 | #include <malloc.h> | 
| Christopher Ferris | 5a3c920 | 2019-12-04 15:57:07 -0800 | [diff] [blame] | 30 | #include <unistd.h> | 
|  | 31 |  | 
| Chia-hung Duan | 4414844 | 2023-06-29 21:16:42 +0000 | [diff] [blame] | 32 | #include <condition_variable> | 
|  | 33 | #include <mutex> | 
|  | 34 | #include <random> | 
|  | 35 | #include <thread> | 
| Christopher Ferris | 5a3c920 | 2019-12-04 15:57:07 -0800 | [diff] [blame] | 36 | #include <vector> | 
| Christopher Ferris | 0dc7844 | 2018-08-09 15:19:57 -0700 | [diff] [blame] | 37 |  | 
|  | 38 | #include <benchmark/benchmark.h> | 
|  | 39 | #include "util.h" | 
|  | 40 |  | 
|  | 41 | #if defined(__BIONIC__) | 
|  | 42 |  | 
| Christopher Ferris | d86eb86 | 2023-02-28 12:45:54 -0800 | [diff] [blame] | 43 | static void RunMalloptPurge(benchmark::State& state, int purge_value) { | 
| Christopher Ferris | 5a3c920 | 2019-12-04 15:57:07 -0800 | [diff] [blame] | 44 | static size_t sizes[] = {8, 16, 32, 64, 128, 1024, 4096, 16384, 65536, 131072, 1048576}; | 
|  | 45 | static int pagesize = getpagesize(); | 
| Christopher Ferris | 7ec2c8a | 2019-04-05 12:47:39 -0700 | [diff] [blame] | 46 | mallopt(M_DECAY_TIME, 1); | 
| Christopher Ferris | d86eb86 | 2023-02-28 12:45:54 -0800 | [diff] [blame] | 47 | mallopt(M_PURGE_ALL, 0); | 
| Christopher Ferris | 7ec2c8a | 2019-04-05 12:47:39 -0700 | [diff] [blame] | 48 | for (auto _ : state) { | 
| Christopher Ferris | 5a3c920 | 2019-12-04 15:57:07 -0800 | [diff] [blame] | 49 | state.PauseTiming(); | 
|  | 50 | std::vector<void*> ptrs; | 
|  | 51 | for (auto size : sizes) { | 
|  | 52 | // Allocate at least two pages worth of the allocations. | 
|  | 53 | for (size_t allocated = 0; allocated < 2 * static_cast<size_t>(pagesize); allocated += size) { | 
|  | 54 | void* ptr = malloc(size); | 
|  | 55 | if (ptr == nullptr) { | 
|  | 56 | state.SkipWithError("Failed to allocate memory"); | 
|  | 57 | } | 
|  | 58 | MakeAllocationResident(ptr, size, pagesize); | 
|  | 59 | ptrs.push_back(ptr); | 
|  | 60 | } | 
|  | 61 | } | 
|  | 62 | // Free the memory, which should leave many of the pages resident until | 
|  | 63 | // the purge call. | 
|  | 64 | for (auto ptr : ptrs) { | 
|  | 65 | free(ptr); | 
|  | 66 | } | 
|  | 67 | ptrs.clear(); | 
|  | 68 | state.ResumeTiming(); | 
| Christopher Ferris | 7ec2c8a | 2019-04-05 12:47:39 -0700 | [diff] [blame] | 69 |  | 
| Christopher Ferris | d86eb86 | 2023-02-28 12:45:54 -0800 | [diff] [blame] | 70 | mallopt(purge_value, 0); | 
| Christopher Ferris | 5a3c920 | 2019-12-04 15:57:07 -0800 | [diff] [blame] | 71 | } | 
| Christopher Ferris | 7ec2c8a | 2019-04-05 12:47:39 -0700 | [diff] [blame] | 72 | mallopt(M_DECAY_TIME, 0); | 
|  | 73 | } | 
| Christopher Ferris | d86eb86 | 2023-02-28 12:45:54 -0800 | [diff] [blame] | 74 |  | 
| Chia-hung Duan | 4414844 | 2023-06-29 21:16:42 +0000 | [diff] [blame] | 75 | static void RunThreadsThroughput(benchmark::State& state, size_t size, size_t num_threads) { | 
|  | 76 | constexpr size_t kMaxBytes = 1 << 24; | 
|  | 77 | constexpr size_t kMaxThreads = 8; | 
|  | 78 | constexpr size_t kMinRounds = 4; | 
|  | 79 | const size_t MaxAllocCounts = kMaxBytes / size; | 
|  | 80 | std::mutex m; | 
|  | 81 | bool ready = false; | 
|  | 82 | std::condition_variable cv; | 
|  | 83 | std::thread* threads[kMaxThreads]; | 
|  | 84 |  | 
|  | 85 | // The goal is to create malloc/free interleaving patterns across threads. | 
|  | 86 | // The bytes processed by each thread will be the same. The difference is the | 
|  | 87 | // patterns. Here's an example: | 
|  | 88 | // | 
|  | 89 | // A: Allocation | 
|  | 90 | // D: Deallocation | 
|  | 91 | // | 
|  | 92 | //   T1    T2    T3 | 
|  | 93 | //   A     A     A | 
|  | 94 | //   A     A     D | 
|  | 95 | //   A     D     A | 
|  | 96 | //   A     D     D | 
|  | 97 | //   D     A     A | 
|  | 98 | //   D     A     D | 
|  | 99 | //   D     D     A | 
|  | 100 | //   D     D     D | 
|  | 101 | // | 
|  | 102 | // To do this, `AllocCounts` and `AllocRounds` will be adjusted according to the | 
|  | 103 | // thread id. | 
|  | 104 | auto thread_task = [&](size_t id) { | 
|  | 105 | { | 
|  | 106 | std::unique_lock lock(m); | 
|  | 107 | // Wait until all threads are created. | 
|  | 108 | cv.wait(lock, [&] { return ready; }); | 
|  | 109 | } | 
|  | 110 |  | 
|  | 111 | void** MemPool; | 
|  | 112 | const size_t AllocCounts = (MaxAllocCounts >> id); | 
|  | 113 | const size_t AllocRounds = (kMinRounds << id); | 
|  | 114 | MemPool = new void*[AllocCounts]; | 
|  | 115 |  | 
|  | 116 | for (size_t i = 0; i < AllocRounds; ++i) { | 
|  | 117 | for (size_t j = 0; j < AllocCounts; ++j) { | 
|  | 118 | void* ptr = malloc(size); | 
|  | 119 | MemPool[j] = ptr; | 
|  | 120 | } | 
|  | 121 |  | 
|  | 122 | // Use a fix seed to reduce the noise of different round of benchmark. | 
|  | 123 | const unsigned seed = 33529; | 
|  | 124 | std::shuffle(MemPool, &MemPool[AllocCounts], std::default_random_engine(seed)); | 
|  | 125 |  | 
|  | 126 | for (size_t j = 0; j < AllocCounts; ++j) free(MemPool[j]); | 
|  | 127 | } | 
|  | 128 |  | 
|  | 129 | delete[] MemPool; | 
|  | 130 | }; | 
|  | 131 |  | 
|  | 132 | for (auto _ : state) { | 
|  | 133 | state.PauseTiming(); | 
|  | 134 | // Don't need to acquire the lock because no thread is created. | 
|  | 135 | ready = false; | 
|  | 136 |  | 
|  | 137 | for (size_t i = 0; i < num_threads; ++i) threads[i] = new std::thread(thread_task, i); | 
|  | 138 |  | 
|  | 139 | state.ResumeTiming(); | 
|  | 140 |  | 
|  | 141 | { | 
|  | 142 | std::unique_lock lock(m); | 
|  | 143 | ready = true; | 
|  | 144 | } | 
|  | 145 |  | 
|  | 146 | cv.notify_all(); | 
|  | 147 |  | 
|  | 148 | for (size_t i = 0; i < num_threads; ++i) { | 
|  | 149 | threads[i]->join(); | 
|  | 150 | delete threads[i]; | 
|  | 151 | } | 
|  | 152 | } | 
|  | 153 |  | 
|  | 154 | const size_t ThreadsBytesProcessed = kMaxBytes * kMinRounds * num_threads; | 
|  | 155 | state.SetBytesProcessed(ThreadsBytesProcessed * static_cast<size_t>(state.iterations())); | 
|  | 156 | } | 
|  | 157 |  | 
| Christopher Ferris | d86eb86 | 2023-02-28 12:45:54 -0800 | [diff] [blame] | 158 | static void BM_mallopt_purge(benchmark::State& state) { | 
|  | 159 | RunMalloptPurge(state, M_PURGE); | 
|  | 160 | } | 
| Christopher Ferris | 5a3c920 | 2019-12-04 15:57:07 -0800 | [diff] [blame] | 161 | BIONIC_BENCHMARK(BM_mallopt_purge); | 
| Christopher Ferris | 0dc7844 | 2018-08-09 15:19:57 -0700 | [diff] [blame] | 162 |  | 
| Christopher Ferris | d86eb86 | 2023-02-28 12:45:54 -0800 | [diff] [blame] | 163 | static void BM_mallopt_purge_all(benchmark::State& state) { | 
|  | 164 | RunMalloptPurge(state, M_PURGE_ALL); | 
|  | 165 | } | 
|  | 166 | BIONIC_BENCHMARK(BM_mallopt_purge_all); | 
|  | 167 |  | 
| Chia-hung Duan | 4414844 | 2023-06-29 21:16:42 +0000 | [diff] [blame] | 168 | // Note that this will only test a single size class at a time so that we can | 
|  | 169 | // observe the impact of contention more often. | 
|  | 170 | #define BM_MALLOC_THREADS_THROUGHPUT(SIZE, NUM_THREADS)                                      \ | 
|  | 171 | static void BM_malloc_threads_throughput_##SIZE##_##NUM_THREADS(benchmark::State& state) { \ | 
|  | 172 | RunThreadsThroughput(state, SIZE, NUM_THREADS);                                          \ | 
|  | 173 | }                                                                                          \ | 
|  | 174 | BIONIC_BENCHMARK(BM_malloc_threads_throughput_##SIZE##_##NUM_THREADS); | 
|  | 175 |  | 
|  | 176 | // There are three block categories in Scudo, we choose 1 from each category. | 
|  | 177 | BM_MALLOC_THREADS_THROUGHPUT(64, 2); | 
|  | 178 | BM_MALLOC_THREADS_THROUGHPUT(64, 4); | 
|  | 179 | BM_MALLOC_THREADS_THROUGHPUT(64, 8); | 
|  | 180 | BM_MALLOC_THREADS_THROUGHPUT(512, 2); | 
|  | 181 | BM_MALLOC_THREADS_THROUGHPUT(512, 4); | 
|  | 182 | BM_MALLOC_THREADS_THROUGHPUT(512, 8); | 
|  | 183 | BM_MALLOC_THREADS_THROUGHPUT(8192, 2); | 
|  | 184 | BM_MALLOC_THREADS_THROUGHPUT(8192, 4); | 
|  | 185 | BM_MALLOC_THREADS_THROUGHPUT(8192, 8); | 
|  | 186 |  | 
| Christopher Ferris | 0dc7844 | 2018-08-09 15:19:57 -0700 | [diff] [blame] | 187 | #endif |