versioner: use a single work queue.
Previously, each thread was assigned a fixed list of work, and the main
thread would block until every thread was finished, leading to most
cores sitting idle for the last few hundred milliseconds while a few
particularly long running threads would keep working. Use a single work
queue to evenly distribute load across the threads.
Bug: http://b/32748936
Test: python run_tests.py
Change-Id: I80e231ece3b95e2721a32f658905841b89a8dc3b
diff --git a/tools/versioner/src/versioner.cpp b/tools/versioner/src/versioner.cpp
index 86349e1..b46847b 100644
--- a/tools/versioner/src/versioner.cpp
+++ b/tools/versioner/src/versioner.cpp
@@ -22,7 +22,12 @@
#include <sys/types.h>
#include <unistd.h>
+#if defined(__linux__)
+#include <sched.h>
+#endif
+
#include <atomic>
+#include <chrono>
#include <functional>
#include <iostream>
#include <map>
@@ -36,6 +41,7 @@
#include <llvm/ADT/StringRef.h>
+#include <android-base/macros.h>
#include <android-base/parseint.h>
#include "Arch.h"
@@ -48,11 +54,27 @@
#include "versioner.h"
+using namespace std::chrono_literals;
using namespace std::string_literals;
bool add_include;
bool verbose;
-static int max_thread_count = 48;
+
+static int getCpuCount();
+static int max_thread_count = getCpuCount();
+
+static int getCpuCount() {
+#if defined(__linux__)
+ cpu_set_t cpu_set;
+ int rc = sched_getaffinity(getpid(), sizeof(cpu_set), &cpu_set);
+ if (rc != 0) {
+ err(1, "sched_getaffinity failed");
+ }
+ return CPU_COUNT(&cpu_set);
+#else
+ return 1;
+#endif
+}
static CompilationRequirements collectRequirements(const Arch& arch, const std::string& header_dir,
const std::string& dependency_dir) {
@@ -158,6 +180,7 @@
initializeTargetCC1FlagCache(vfs, types, requirements);
std::vector<std::pair<CompilationType, const std::string&>> jobs;
+ std::atomic<size_t> job_index(0);
for (CompilationType type : types) {
CompilationRequirements& req = requirements[type.arch];
for (const std::string& header : req.headers) {
@@ -173,13 +196,17 @@
}
} else {
// Spawn threads.
+ size_t cpu_count = getCpuCount();
for (size_t i = 0; i < thread_count; ++i) {
- threads.emplace_back([&jobs, &result, &header_dir, vfs, thread_count, i]() {
- size_t index = i;
- while (index < jobs.size()) {
- const auto& job = jobs[index];
+ threads.emplace_back([&jobs, &job_index, &result, &header_dir, vfs, cpu_count, i]() {
+ while (true) {
+ size_t idx = job_index++;
+ if (idx >= jobs.size()) {
+ return;
+ }
+
+ const auto& job = jobs[idx];
compileHeader(vfs, result.get(), job.first, job.second);
- index += thread_count;
}
});
}
@@ -572,8 +599,15 @@
symbol_database = parsePlatforms(compilation_types, platform_dir);
}
+ auto start = std::chrono::high_resolution_clock::now();
std::unique_ptr<HeaderDatabase> declaration_database =
compileHeaders(compilation_types, header_dir, dependency_dir);
+ auto end = std::chrono::high_resolution_clock::now();
+
+ if (verbose) {
+ auto diff = (end - start) / 1.0ms;
+ printf("Compiled headers for %zu targets in %0.2LFms\n", compilation_types.size(), diff);
+ }
bool failed = false;
if (dump) {