blob: 258343fca777ac87cd643d7b3ba7ffd58496cdc9 [file] [log] [blame]
Christopher Ferris0dc78442018-08-09 15:19:57 -07001/*
Christopher Ferris5a3c9202019-12-04 15:57:07 -08002 * Copyright (C) 2019 The Android Open Source Project
Christopher Ferris0dc78442018-08-09 15:19:57 -07003 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <malloc.h>
Christopher Ferris5a3c9202019-12-04 15:57:07 -080030#include <unistd.h>
31
Chia-hung Duan44148442023-06-29 21:16:42 +000032#include <condition_variable>
33#include <mutex>
34#include <random>
35#include <thread>
Christopher Ferris5a3c9202019-12-04 15:57:07 -080036#include <vector>
Christopher Ferris0dc78442018-08-09 15:19:57 -070037
38#include <benchmark/benchmark.h>
39#include "util.h"
40
41#if defined(__BIONIC__)
42
Christopher Ferrisd86eb862023-02-28 12:45:54 -080043static void RunMalloptPurge(benchmark::State& state, int purge_value) {
Christopher Ferris5a3c9202019-12-04 15:57:07 -080044 static size_t sizes[] = {8, 16, 32, 64, 128, 1024, 4096, 16384, 65536, 131072, 1048576};
45 static int pagesize = getpagesize();
Christopher Ferris7ec2c8a2019-04-05 12:47:39 -070046 mallopt(M_DECAY_TIME, 1);
Christopher Ferrisd86eb862023-02-28 12:45:54 -080047 mallopt(M_PURGE_ALL, 0);
Christopher Ferris7ec2c8a2019-04-05 12:47:39 -070048 for (auto _ : state) {
Christopher Ferris5a3c9202019-12-04 15:57:07 -080049 state.PauseTiming();
50 std::vector<void*> ptrs;
51 for (auto size : sizes) {
52 // Allocate at least two pages worth of the allocations.
53 for (size_t allocated = 0; allocated < 2 * static_cast<size_t>(pagesize); allocated += size) {
54 void* ptr = malloc(size);
55 if (ptr == nullptr) {
56 state.SkipWithError("Failed to allocate memory");
57 }
58 MakeAllocationResident(ptr, size, pagesize);
59 ptrs.push_back(ptr);
60 }
61 }
62 // Free the memory, which should leave many of the pages resident until
63 // the purge call.
64 for (auto ptr : ptrs) {
65 free(ptr);
66 }
67 ptrs.clear();
68 state.ResumeTiming();
Christopher Ferris7ec2c8a2019-04-05 12:47:39 -070069
Christopher Ferrisd86eb862023-02-28 12:45:54 -080070 mallopt(purge_value, 0);
Christopher Ferris5a3c9202019-12-04 15:57:07 -080071 }
Christopher Ferris7ec2c8a2019-04-05 12:47:39 -070072 mallopt(M_DECAY_TIME, 0);
73}
Christopher Ferrisd86eb862023-02-28 12:45:54 -080074
Chia-hung Duan44148442023-06-29 21:16:42 +000075static void RunThreadsThroughput(benchmark::State& state, size_t size, size_t num_threads) {
76 constexpr size_t kMaxBytes = 1 << 24;
77 constexpr size_t kMaxThreads = 8;
78 constexpr size_t kMinRounds = 4;
79 const size_t MaxAllocCounts = kMaxBytes / size;
80 std::mutex m;
81 bool ready = false;
82 std::condition_variable cv;
83 std::thread* threads[kMaxThreads];
84
85 // The goal is to create malloc/free interleaving patterns across threads.
86 // The bytes processed by each thread will be the same. The difference is the
87 // patterns. Here's an example:
88 //
89 // A: Allocation
90 // D: Deallocation
91 //
92 // T1 T2 T3
93 // A A A
94 // A A D
95 // A D A
96 // A D D
97 // D A A
98 // D A D
99 // D D A
100 // D D D
101 //
102 // To do this, `AllocCounts` and `AllocRounds` will be adjusted according to the
103 // thread id.
104 auto thread_task = [&](size_t id) {
105 {
106 std::unique_lock lock(m);
107 // Wait until all threads are created.
108 cv.wait(lock, [&] { return ready; });
109 }
110
111 void** MemPool;
112 const size_t AllocCounts = (MaxAllocCounts >> id);
113 const size_t AllocRounds = (kMinRounds << id);
114 MemPool = new void*[AllocCounts];
115
116 for (size_t i = 0; i < AllocRounds; ++i) {
117 for (size_t j = 0; j < AllocCounts; ++j) {
118 void* ptr = malloc(size);
119 MemPool[j] = ptr;
120 }
121
122 // Use a fix seed to reduce the noise of different round of benchmark.
123 const unsigned seed = 33529;
124 std::shuffle(MemPool, &MemPool[AllocCounts], std::default_random_engine(seed));
125
126 for (size_t j = 0; j < AllocCounts; ++j) free(MemPool[j]);
127 }
128
129 delete[] MemPool;
130 };
131
132 for (auto _ : state) {
133 state.PauseTiming();
134 // Don't need to acquire the lock because no thread is created.
135 ready = false;
136
137 for (size_t i = 0; i < num_threads; ++i) threads[i] = new std::thread(thread_task, i);
138
139 state.ResumeTiming();
140
141 {
142 std::unique_lock lock(m);
143 ready = true;
144 }
145
146 cv.notify_all();
147
148 for (size_t i = 0; i < num_threads; ++i) {
149 threads[i]->join();
150 delete threads[i];
151 }
152 }
153
154 const size_t ThreadsBytesProcessed = kMaxBytes * kMinRounds * num_threads;
155 state.SetBytesProcessed(ThreadsBytesProcessed * static_cast<size_t>(state.iterations()));
156}
157
Christopher Ferrisd86eb862023-02-28 12:45:54 -0800158static void BM_mallopt_purge(benchmark::State& state) {
159 RunMalloptPurge(state, M_PURGE);
160}
Christopher Ferris5a3c9202019-12-04 15:57:07 -0800161BIONIC_BENCHMARK(BM_mallopt_purge);
Christopher Ferris0dc78442018-08-09 15:19:57 -0700162
Christopher Ferrisd86eb862023-02-28 12:45:54 -0800163static void BM_mallopt_purge_all(benchmark::State& state) {
164 RunMalloptPurge(state, M_PURGE_ALL);
165}
166BIONIC_BENCHMARK(BM_mallopt_purge_all);
167
Chia-hung Duan44148442023-06-29 21:16:42 +0000168// Note that this will only test a single size class at a time so that we can
169// observe the impact of contention more often.
170#define BM_MALLOC_THREADS_THROUGHPUT(SIZE, NUM_THREADS) \
171 static void BM_malloc_threads_throughput_##SIZE##_##NUM_THREADS(benchmark::State& state) { \
172 RunThreadsThroughput(state, SIZE, NUM_THREADS); \
173 } \
174 BIONIC_BENCHMARK(BM_malloc_threads_throughput_##SIZE##_##NUM_THREADS);
175
176// There are three block categories in Scudo, we choose 1 from each category.
177BM_MALLOC_THREADS_THROUGHPUT(64, 2);
178BM_MALLOC_THREADS_THROUGHPUT(64, 4);
179BM_MALLOC_THREADS_THROUGHPUT(64, 8);
180BM_MALLOC_THREADS_THROUGHPUT(512, 2);
181BM_MALLOC_THREADS_THROUGHPUT(512, 4);
182BM_MALLOC_THREADS_THROUGHPUT(512, 8);
183BM_MALLOC_THREADS_THROUGHPUT(8192, 2);
184BM_MALLOC_THREADS_THROUGHPUT(8192, 4);
185BM_MALLOC_THREADS_THROUGHPUT(8192, 8);
186
Christopher Ferris0dc78442018-08-09 15:19:57 -0700187#endif