Merge "Update to v6.4 kernel headers." into main
diff --git a/benchmarks/malloc_benchmark.cpp b/benchmarks/malloc_benchmark.cpp
index e733cd0..258343f 100644
--- a/benchmarks/malloc_benchmark.cpp
+++ b/benchmarks/malloc_benchmark.cpp
@@ -29,6 +29,10 @@
#include <malloc.h>
#include <unistd.h>
+#include <condition_variable>
+#include <mutex>
+#include <random>
+#include <thread>
#include <vector>
#include <benchmark/benchmark.h>
@@ -68,6 +72,89 @@
mallopt(M_DECAY_TIME, 0);
}
+static void RunThreadsThroughput(benchmark::State& state, size_t size, size_t num_threads) {
+ constexpr size_t kMaxBytes = 1 << 24;
+ constexpr size_t kMaxThreads = 8;
+ constexpr size_t kMinRounds = 4;
+ const size_t MaxAllocCounts = kMaxBytes / size;
+ std::mutex m;
+ bool ready = false;
+ std::condition_variable cv;
+ std::thread* threads[kMaxThreads];
+
+ // The goal is to create malloc/free interleaving patterns across threads.
+ // The bytes processed by each thread will be the same. The difference is the
+ // patterns. Here's an example:
+ //
+ // A: Allocation
+ // D: Deallocation
+ //
+ // T1 T2 T3
+ // A A A
+ // A A D
+ // A D A
+ // A D D
+ // D A A
+ // D A D
+ // D D A
+ // D D D
+ //
+ // To do this, `AllocCounts` and `AllocRounds` will be adjusted according to the
+ // thread id.
+ auto thread_task = [&](size_t id) {
+ {
+ std::unique_lock lock(m);
+ // Wait until all threads are created.
+ cv.wait(lock, [&] { return ready; });
+ }
+
+ void** MemPool;
+ const size_t AllocCounts = (MaxAllocCounts >> id);
+ const size_t AllocRounds = (kMinRounds << id);
+ MemPool = new void*[AllocCounts];
+
+ for (size_t i = 0; i < AllocRounds; ++i) {
+ for (size_t j = 0; j < AllocCounts; ++j) {
+ void* ptr = malloc(size);
+ MemPool[j] = ptr;
+ }
+
+ // Use a fix seed to reduce the noise of different round of benchmark.
+ const unsigned seed = 33529;
+ std::shuffle(MemPool, &MemPool[AllocCounts], std::default_random_engine(seed));
+
+ for (size_t j = 0; j < AllocCounts; ++j) free(MemPool[j]);
+ }
+
+ delete[] MemPool;
+ };
+
+ for (auto _ : state) {
+ state.PauseTiming();
+ // Don't need to acquire the lock because no thread is created.
+ ready = false;
+
+ for (size_t i = 0; i < num_threads; ++i) threads[i] = new std::thread(thread_task, i);
+
+ state.ResumeTiming();
+
+ {
+ std::unique_lock lock(m);
+ ready = true;
+ }
+
+ cv.notify_all();
+
+ for (size_t i = 0; i < num_threads; ++i) {
+ threads[i]->join();
+ delete threads[i];
+ }
+ }
+
+ const size_t ThreadsBytesProcessed = kMaxBytes * kMinRounds * num_threads;
+ state.SetBytesProcessed(ThreadsBytesProcessed * static_cast<size_t>(state.iterations()));
+}
+
static void BM_mallopt_purge(benchmark::State& state) {
RunMalloptPurge(state, M_PURGE);
}
@@ -78,4 +165,23 @@
}
BIONIC_BENCHMARK(BM_mallopt_purge_all);
+// Note that this will only test a single size class at a time so that we can
+// observe the impact of contention more often.
+#define BM_MALLOC_THREADS_THROUGHPUT(SIZE, NUM_THREADS) \
+ static void BM_malloc_threads_throughput_##SIZE##_##NUM_THREADS(benchmark::State& state) { \
+ RunThreadsThroughput(state, SIZE, NUM_THREADS); \
+ } \
+ BIONIC_BENCHMARK(BM_malloc_threads_throughput_##SIZE##_##NUM_THREADS);
+
+// There are three block categories in Scudo, we choose 1 from each category.
+BM_MALLOC_THREADS_THROUGHPUT(64, 2);
+BM_MALLOC_THREADS_THROUGHPUT(64, 4);
+BM_MALLOC_THREADS_THROUGHPUT(64, 8);
+BM_MALLOC_THREADS_THROUGHPUT(512, 2);
+BM_MALLOC_THREADS_THROUGHPUT(512, 4);
+BM_MALLOC_THREADS_THROUGHPUT(512, 8);
+BM_MALLOC_THREADS_THROUGHPUT(8192, 2);
+BM_MALLOC_THREADS_THROUGHPUT(8192, 4);
+BM_MALLOC_THREADS_THROUGHPUT(8192, 8);
+
#endif
diff --git a/libc/include/sys/ifunc.h b/libc/include/sys/ifunc.h
index 7fbca4a..d35600e 100644
--- a/libc/include/sys/ifunc.h
+++ b/libc/include/sys/ifunc.h
@@ -40,13 +40,15 @@
#if defined(__aarch64__)
/**
- * Provides information about hardware capabilities to ifunc resolvers.
+ * Provides information about hardware capabilities to arm64 ifunc resolvers.
*
- * Starting with API level 30, ifunc resolvers on arm64 are passed two arguments. The first is a
- * uint64_t whose value is equal to getauxval(AT_HWCAP) | _IFUNC_ARG_HWCAP. The second is a pointer
- * to a data structure of this type. Prior to API level 30, no arguments are passed to ifunc
- * resolvers. Code that wishes to be compatible with prior API levels should not accept any
- * arguments in the resolver.
+ * Prior to API level 30, arm64 ifunc resolvers are passed no arguments.
+ *
+ * Starting with API level 30, arm64 ifunc resolvers are passed two arguments.
+ * The first is a uint64_t whose value is equal to getauxval(AT_HWCAP) | _IFUNC_ARG_HWCAP.
+ * The second is a pointer to a data structure of this type.
+ *
+ * Code that wishes to be compatible with API levels before 30 must call getauxval() itself.
*/
typedef struct __ifunc_arg_t {
/** Set to sizeof(__ifunc_arg_t). */
@@ -60,9 +62,14 @@
} __ifunc_arg_t;
/**
- * If this bit is set in the first argument to an ifunc resolver, indicates that the second argument
- * is a pointer to a data structure of type __ifunc_arg_t. This bit is always set on Android
- * starting with API level 30.
+ * If this bit is set in the first argument to an ifunc resolver, the second argument
+ * is a pointer to a data structure of type __ifunc_arg_t.
+ *
+ * This bit is always set on Android starting with API level 30.
+ * This bit is meaningless before API level 30 because ifunc resolvers are not passed any arguments.
+ * This bit has no real use on Android, but is included for glibc source compatibility;
+ * glibc used this bit to distinguish the case where the ifunc resolver received a single argument,
+ * which was an evolutionary stage Android never went through.
*/
#define _IFUNC_ARG_HWCAP (1ULL << 62)
diff --git a/libc/malloc_debug/Android.bp b/libc/malloc_debug/Android.bp
index 373d497..24bb18a 100644
--- a/libc/malloc_debug/Android.bp
+++ b/libc/malloc_debug/Android.bp
@@ -197,4 +197,5 @@
"-O0",
],
test_suites: ["general-tests"],
+ test_config: "tests/AndroidTest.xml",
}
diff --git a/libc/malloc_debug/tests/AndroidTest.xml b/libc/malloc_debug/tests/AndroidTest.xml
new file mode 100644
index 0000000..c89cbb5
--- /dev/null
+++ b/libc/malloc_debug/tests/AndroidTest.xml
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Copyright (C) 2023 The Android Open Source Project
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<configuration description="Runs malloc_debug_system_tests.">
+ <option name="test-suite-tag" value="apct" />
+ <option name="test-suite-tag" value="apct-native" />
+
+ <!-- cannot be autogenerated: b/153565474 -->
+ <target_preparer class="com.android.tradefed.targetprep.RebootTargetPreparer">
+ <!-- flake mitigation, in case device is in bad state-->
+ <option name="pre-reboot" value="true" />
+ <!-- This test can become flaky if there are lots of other tests
+ running at the same time so do this to attempt to run this test
+ as isolated as possible.
+ tests. -->
+ <option name="post-reboot" value="true" />
+ </target_preparer>
+
+ <target_preparer class="com.android.tradefed.targetprep.RootTargetPreparer"/>
+
+ <target_preparer class="com.android.tradefed.targetprep.PushFilePreparer">
+ <option name="cleanup" value="true" />
+ <option name="push" value="malloc_debug_system_tests->/data/local/tests/unrestricted/malloc_debug_system_tests" />
+ </target_preparer>
+
+ <test class="com.android.tradefed.testtype.GTest" >
+ <option name="native-test-device-path" value="/data/local/tests/unrestricted/" />
+ <option name="module-name" value="malloc_debug_system_tests" />
+ </test>
+</configuration>
diff --git a/libc/private/bionic_constants.h b/libc/private/bionic_constants.h
index d7f4474..05914f4 100644
--- a/libc/private/bionic_constants.h
+++ b/libc/private/bionic_constants.h
@@ -21,7 +21,9 @@
// Size of the shadow call stack. This can be small because these stacks only
// contain return addresses. This must be a power of 2 so the mask trick works.
// See the SCS commentary in pthread_internal.h for more detail.
-#define SCS_SIZE (8 * 1024)
+// SCS_SIZE must be a multiple of page size.
+// We used 8KiB until V but switched to 16KiB in V to experiment with 16KiB pages.
+#define SCS_SIZE (16 * 1024)
#define SCS_MASK (SCS_SIZE - 1)
// The shadow call stack is allocated at an aligned address within a guard region of this size. The
diff --git a/libc/tools/generate_notice.py b/libc/tools/generate_notice.py
index e004d74..505708a 100755
--- a/libc/tools/generate_notice.py
+++ b/libc/tools/generate_notice.py
@@ -33,6 +33,7 @@
".pyc",
".swp",
".txt",
+ ".xml",
]
if path.suffix in uninteresting_extensions:
return False