| /* |
| * Copyright (C) 2019 The Android Open Source Project |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * * Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * * Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in |
| * the documentation and/or other materials provided with the |
| * distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
| * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
| * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
| * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS |
| * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED |
| * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
| * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| * SUCH DAMAGE. |
| */ |
| |
| #include <malloc.h> |
| #include <unistd.h> |
| |
| #include <condition_variable> |
| #include <mutex> |
| #include <random> |
| #include <thread> |
| #include <vector> |
| |
| #include <benchmark/benchmark.h> |
| #include "ScopedDecayTimeRestorer.h" |
| #include "util.h" |
| |
| #if defined(__BIONIC__) |
| |
| static void RunMalloptPurge(benchmark::State& state, int purge_value) { |
| ScopedDecayTimeRestorer restorer; |
| |
| static size_t sizes[] = {8, 16, 32, 64, 128, 1024, 4096, 16384, 65536, 131072, 1048576}; |
| static int pagesize = getpagesize(); |
| mallopt(M_DECAY_TIME, 1); |
| mallopt(M_PURGE_ALL, 0); |
| for (auto _ : state) { |
| state.PauseTiming(); |
| std::vector<void*> ptrs; |
| for (auto size : sizes) { |
| // Allocate at least two pages worth of the allocations. |
| for (size_t allocated = 0; allocated < 2 * static_cast<size_t>(pagesize); allocated += size) { |
| void* ptr = malloc(size); |
| if (ptr == nullptr) { |
| state.SkipWithError("Failed to allocate memory"); |
| } |
| MakeAllocationResident(ptr, size, pagesize); |
| ptrs.push_back(ptr); |
| } |
| } |
| // Free the memory, which should leave many of the pages resident until |
| // the purge call. |
| for (auto ptr : ptrs) { |
| free(ptr); |
| } |
| ptrs.clear(); |
| state.ResumeTiming(); |
| |
| mallopt(purge_value, 0); |
| } |
| } |
| |
| static void RunThreadsThroughput(benchmark::State& state, size_t size, size_t num_threads) { |
| constexpr size_t kMaxBytes = 1 << 24; |
| constexpr size_t kMaxThreads = 8; |
| constexpr size_t kMinRounds = 4; |
| const size_t MaxAllocCounts = kMaxBytes / size; |
| std::mutex m; |
| bool ready = false; |
| std::condition_variable cv; |
| std::thread* threads[kMaxThreads]; |
| |
| // The goal is to create malloc/free interleaving patterns across threads. |
| // The bytes processed by each thread will be the same. The difference is the |
| // patterns. Here's an example: |
| // |
| // A: Allocation |
| // D: Deallocation |
| // |
| // T1 T2 T3 |
| // A A A |
| // A A D |
| // A D A |
| // A D D |
| // D A A |
| // D A D |
| // D D A |
| // D D D |
| // |
| // To do this, `AllocCounts` and `AllocRounds` will be adjusted according to the |
| // thread id. |
| auto thread_task = [&](size_t id) { |
| { |
| std::unique_lock lock(m); |
| // Wait until all threads are created. |
| cv.wait(lock, [&] { return ready; }); |
| } |
| |
| void** MemPool; |
| const size_t AllocCounts = (MaxAllocCounts >> id); |
| const size_t AllocRounds = (kMinRounds << id); |
| MemPool = new void*[AllocCounts]; |
| |
| for (size_t i = 0; i < AllocRounds; ++i) { |
| for (size_t j = 0; j < AllocCounts; ++j) { |
| void* ptr = malloc(size); |
| MemPool[j] = ptr; |
| } |
| |
| // Use a fix seed to reduce the noise of different round of benchmark. |
| const unsigned seed = 33529; |
| std::shuffle(MemPool, &MemPool[AllocCounts], std::default_random_engine(seed)); |
| |
| for (size_t j = 0; j < AllocCounts; ++j) free(MemPool[j]); |
| } |
| |
| delete[] MemPool; |
| }; |
| |
| for (auto _ : state) { |
| state.PauseTiming(); |
| // Don't need to acquire the lock because no thread is created. |
| ready = false; |
| |
| for (size_t i = 0; i < num_threads; ++i) threads[i] = new std::thread(thread_task, i); |
| |
| state.ResumeTiming(); |
| |
| { |
| std::unique_lock lock(m); |
| ready = true; |
| } |
| |
| cv.notify_all(); |
| |
| for (size_t i = 0; i < num_threads; ++i) { |
| threads[i]->join(); |
| delete threads[i]; |
| } |
| } |
| |
| const size_t ThreadsBytesProcessed = kMaxBytes * kMinRounds * num_threads; |
| state.SetBytesProcessed(ThreadsBytesProcessed * static_cast<size_t>(state.iterations())); |
| } |
| |
| static void BM_mallopt_purge(benchmark::State& state) { |
| RunMalloptPurge(state, M_PURGE); |
| } |
| BIONIC_BENCHMARK(BM_mallopt_purge); |
| |
| static void BM_mallopt_purge_all(benchmark::State& state) { |
| RunMalloptPurge(state, M_PURGE_ALL); |
| } |
| BIONIC_BENCHMARK(BM_mallopt_purge_all); |
| |
| // Note that this will only test a single size class at a time so that we can |
| // observe the impact of contention more often. |
| #define BM_MALLOC_THREADS_THROUGHPUT(SIZE, NUM_THREADS) \ |
| static void BM_malloc_threads_throughput_##SIZE##_##NUM_THREADS(benchmark::State& state) { \ |
| RunThreadsThroughput(state, SIZE, NUM_THREADS); \ |
| } \ |
| BIONIC_BENCHMARK(BM_malloc_threads_throughput_##SIZE##_##NUM_THREADS); |
| |
| // There are three block categories in Scudo, we choose 1 from each category. |
| BM_MALLOC_THREADS_THROUGHPUT(64, 2); |
| BM_MALLOC_THREADS_THROUGHPUT(64, 4); |
| BM_MALLOC_THREADS_THROUGHPUT(64, 8); |
| BM_MALLOC_THREADS_THROUGHPUT(512, 2); |
| BM_MALLOC_THREADS_THROUGHPUT(512, 4); |
| BM_MALLOC_THREADS_THROUGHPUT(512, 8); |
| BM_MALLOC_THREADS_THROUGHPUT(8192, 2); |
| BM_MALLOC_THREADS_THROUGHPUT(8192, 4); |
| BM_MALLOC_THREADS_THROUGHPUT(8192, 8); |
| |
| #endif |