|  | /* | 
|  | * Copyright (C) 2019 The Android Open Source Project | 
|  | * All rights reserved. | 
|  | * | 
|  | * Redistribution and use in source and binary forms, with or without | 
|  | * modification, are permitted provided that the following conditions | 
|  | * are met: | 
|  | *  * Redistributions of source code must retain the above copyright | 
|  | *    notice, this list of conditions and the following disclaimer. | 
|  | *  * Redistributions in binary form must reproduce the above copyright | 
|  | *    notice, this list of conditions and the following disclaimer in | 
|  | *    the documentation and/or other materials provided with the | 
|  | *    distribution. | 
|  | * | 
|  | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 
|  | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 
|  | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | 
|  | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | 
|  | * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | 
|  | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, | 
|  | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS | 
|  | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED | 
|  | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | 
|  | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | 
|  | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 
|  | * SUCH DAMAGE. | 
|  | */ | 
|  |  | 
|  | #include <malloc.h> | 
|  | #include <unistd.h> | 
|  |  | 
|  | #include <condition_variable> | 
|  | #include <mutex> | 
|  | #include <random> | 
|  | #include <thread> | 
|  | #include <vector> | 
|  |  | 
|  | #include <benchmark/benchmark.h> | 
|  | #include "ScopedDecayTimeRestorer.h" | 
|  | #include "util.h" | 
|  |  | 
|  | #if defined(__BIONIC__) | 
|  |  | 
|  | static void RunMalloptPurge(benchmark::State& state, int purge_value) { | 
|  | ScopedDecayTimeRestorer restorer; | 
|  |  | 
|  | static size_t sizes[] = {8, 16, 32, 64, 128, 1024, 4096, 16384, 65536, 131072, 1048576}; | 
|  | static int pagesize = getpagesize(); | 
|  | mallopt(M_DECAY_TIME, 1); | 
|  | mallopt(M_PURGE_ALL, 0); | 
|  | for (auto _ : state) { | 
|  | state.PauseTiming(); | 
|  | std::vector<void*> ptrs; | 
|  | for (auto size : sizes) { | 
|  | // Allocate at least two pages worth of the allocations. | 
|  | for (size_t allocated = 0; allocated < 2 * static_cast<size_t>(pagesize); allocated += size) { | 
|  | void* ptr = malloc(size); | 
|  | if (ptr == nullptr) { | 
|  | state.SkipWithError("Failed to allocate memory"); | 
|  | } | 
|  | MakeAllocationResident(ptr, size, pagesize); | 
|  | ptrs.push_back(ptr); | 
|  | } | 
|  | } | 
|  | // Free the memory, which should leave many of the pages resident until | 
|  | // the purge call. | 
|  | for (auto ptr : ptrs) { | 
|  | free(ptr); | 
|  | } | 
|  | ptrs.clear(); | 
|  | state.ResumeTiming(); | 
|  |  | 
|  | mallopt(purge_value, 0); | 
|  | } | 
|  | } | 
|  |  | 
|  | static void RunThreadsThroughput(benchmark::State& state, size_t size, size_t num_threads) { | 
|  | constexpr size_t kMaxBytes = 1 << 24; | 
|  | constexpr size_t kMaxThreads = 8; | 
|  | constexpr size_t kMinRounds = 4; | 
|  | const size_t MaxAllocCounts = kMaxBytes / size; | 
|  | std::mutex m; | 
|  | bool ready = false; | 
|  | std::condition_variable cv; | 
|  | std::thread* threads[kMaxThreads]; | 
|  |  | 
|  | // The goal is to create malloc/free interleaving patterns across threads. | 
|  | // The bytes processed by each thread will be the same. The difference is the | 
|  | // patterns. Here's an example: | 
|  | // | 
|  | // A: Allocation | 
|  | // D: Deallocation | 
|  | // | 
|  | //   T1    T2    T3 | 
|  | //   A     A     A | 
|  | //   A     A     D | 
|  | //   A     D     A | 
|  | //   A     D     D | 
|  | //   D     A     A | 
|  | //   D     A     D | 
|  | //   D     D     A | 
|  | //   D     D     D | 
|  | // | 
|  | // To do this, `AllocCounts` and `AllocRounds` will be adjusted according to the | 
|  | // thread id. | 
|  | auto thread_task = [&](size_t id) { | 
|  | { | 
|  | std::unique_lock lock(m); | 
|  | // Wait until all threads are created. | 
|  | cv.wait(lock, [&] { return ready; }); | 
|  | } | 
|  |  | 
|  | void** MemPool; | 
|  | const size_t AllocCounts = (MaxAllocCounts >> id); | 
|  | const size_t AllocRounds = (kMinRounds << id); | 
|  | MemPool = new void*[AllocCounts]; | 
|  |  | 
|  | for (size_t i = 0; i < AllocRounds; ++i) { | 
|  | for (size_t j = 0; j < AllocCounts; ++j) { | 
|  | void* ptr = malloc(size); | 
|  | MemPool[j] = ptr; | 
|  | } | 
|  |  | 
|  | // Use a fix seed to reduce the noise of different round of benchmark. | 
|  | const unsigned seed = 33529; | 
|  | std::shuffle(MemPool, &MemPool[AllocCounts], std::default_random_engine(seed)); | 
|  |  | 
|  | for (size_t j = 0; j < AllocCounts; ++j) free(MemPool[j]); | 
|  | } | 
|  |  | 
|  | delete[] MemPool; | 
|  | }; | 
|  |  | 
|  | for (auto _ : state) { | 
|  | state.PauseTiming(); | 
|  | // Don't need to acquire the lock because no thread is created. | 
|  | ready = false; | 
|  |  | 
|  | for (size_t i = 0; i < num_threads; ++i) threads[i] = new std::thread(thread_task, i); | 
|  |  | 
|  | state.ResumeTiming(); | 
|  |  | 
|  | { | 
|  | std::unique_lock lock(m); | 
|  | ready = true; | 
|  | } | 
|  |  | 
|  | cv.notify_all(); | 
|  |  | 
|  | for (size_t i = 0; i < num_threads; ++i) { | 
|  | threads[i]->join(); | 
|  | delete threads[i]; | 
|  | } | 
|  | } | 
|  |  | 
|  | const size_t ThreadsBytesProcessed = kMaxBytes * kMinRounds * num_threads; | 
|  | state.SetBytesProcessed(ThreadsBytesProcessed * static_cast<size_t>(state.iterations())); | 
|  | } | 
|  |  | 
|  | static void BM_mallopt_purge(benchmark::State& state) { | 
|  | RunMalloptPurge(state, M_PURGE); | 
|  | } | 
|  | BIONIC_BENCHMARK(BM_mallopt_purge); | 
|  |  | 
|  | static void BM_mallopt_purge_all(benchmark::State& state) { | 
|  | RunMalloptPurge(state, M_PURGE_ALL); | 
|  | } | 
|  | BIONIC_BENCHMARK(BM_mallopt_purge_all); | 
|  |  | 
|  | // Note that this will only test a single size class at a time so that we can | 
|  | // observe the impact of contention more often. | 
|  | #define BM_MALLOC_THREADS_THROUGHPUT(SIZE, NUM_THREADS)                                      \ | 
|  | static void BM_malloc_threads_throughput_##SIZE##_##NUM_THREADS(benchmark::State& state) { \ | 
|  | RunThreadsThroughput(state, SIZE, NUM_THREADS);                                          \ | 
|  | }                                                                                          \ | 
|  | BIONIC_BENCHMARK(BM_malloc_threads_throughput_##SIZE##_##NUM_THREADS); | 
|  |  | 
|  | // There are three block categories in Scudo, we choose 1 from each category. | 
|  | BM_MALLOC_THREADS_THROUGHPUT(64, 2); | 
|  | BM_MALLOC_THREADS_THROUGHPUT(64, 4); | 
|  | BM_MALLOC_THREADS_THROUGHPUT(64, 8); | 
|  | BM_MALLOC_THREADS_THROUGHPUT(512, 2); | 
|  | BM_MALLOC_THREADS_THROUGHPUT(512, 4); | 
|  | BM_MALLOC_THREADS_THROUGHPUT(512, 8); | 
|  | BM_MALLOC_THREADS_THROUGHPUT(8192, 2); | 
|  | BM_MALLOC_THREADS_THROUGHPUT(8192, 4); | 
|  | BM_MALLOC_THREADS_THROUGHPUT(8192, 8); | 
|  |  | 
|  | #endif |