versioner: use a single work queue. Previously, each thread was assigned a fixed list of work, and the main thread would block until every thread was finished, leading to most cores sitting idle for the last few hundred milliseconds while a few particularly long running threads would keep working. Use a single work queue to evenly distribute load across the threads. Bug: http://b/32748936 Test: python run_tests.py Change-Id: I80e231ece3b95e2721a32f658905841b89a8dc3b

commit: 338cf129630ca5839f1ab695b04f7afbe18bb92d [log] [tgz]
author: Josh Gao <jmgao@google.com> Wed Nov 09 18:01:41 2016 -0800
committer: Josh Gao <jmgao@google.com> Thu Nov 17 18:11:57 2016 -0800
tree: 72f3aab6dea788568808228b9e22f33c2fb4bb76
parent: 8945d5e43a25737f3f888a90742cc918b1124a05 [diff] [blame]
diff --git a/tools/versioner/src/versioner.cpp b/tools/versioner/src/versioner.cpp
index 86349e1..b46847b 100644
--- a/tools/versioner/src/versioner.cpp
+++ b/tools/versioner/src/versioner.cpp

@@ -22,7 +22,12 @@
 #include <sys/types.h>
 #include <unistd.h>
 
+#if defined(__linux__)
+#include <sched.h>
+#endif
+
 #include <atomic>
+#include <chrono>
 #include <functional>
 #include <iostream>
 #include <map>
@@ -36,6 +41,7 @@
 
 #include <llvm/ADT/StringRef.h>
 
+#include <android-base/macros.h>
 #include <android-base/parseint.h>
 
 #include "Arch.h"
@@ -48,11 +54,27 @@
 
 #include "versioner.h"
 
+using namespace std::chrono_literals;
 using namespace std::string_literals;
 
 bool add_include;
 bool verbose;
-static int max_thread_count = 48;
+
+static int getCpuCount();
+static int max_thread_count = getCpuCount();
+
+static int getCpuCount() {
+#if defined(__linux__)
+  cpu_set_t cpu_set;
+  int rc = sched_getaffinity(getpid(), sizeof(cpu_set), &cpu_set);
+  if (rc != 0) {
+    err(1, "sched_getaffinity failed");
+  }
+  return CPU_COUNT(&cpu_set);
+#else
+  return 1;
+#endif
+}
 
 static CompilationRequirements collectRequirements(const Arch& arch, const std::string& header_dir,
                                                    const std::string& dependency_dir) {
@@ -158,6 +180,7 @@
   initializeTargetCC1FlagCache(vfs, types, requirements);
 
   std::vector<std::pair<CompilationType, const std::string&>> jobs;
+  std::atomic<size_t> job_index(0);
   for (CompilationType type : types) {
     CompilationRequirements& req = requirements[type.arch];
     for (const std::string& header : req.headers) {
@@ -173,13 +196,17 @@
     }
   } else {
     // Spawn threads.
+    size_t cpu_count = getCpuCount();
     for (size_t i = 0; i < thread_count; ++i) {
-      threads.emplace_back([&jobs, &result, &header_dir, vfs, thread_count, i]() {
-        size_t index = i;
-        while (index < jobs.size()) {
-          const auto& job = jobs[index];
+      threads.emplace_back([&jobs, &job_index, &result, &header_dir, vfs, cpu_count, i]() {
+        while (true) {
+          size_t idx = job_index++;
+          if (idx >= jobs.size()) {
+            return;
+          }
+
+          const auto& job = jobs[idx];
           compileHeader(vfs, result.get(), job.first, job.second);
-          index += thread_count;
         }
       });
     }
@@ -572,8 +599,15 @@
     symbol_database = parsePlatforms(compilation_types, platform_dir);
   }
 
+  auto start = std::chrono::high_resolution_clock::now();
   std::unique_ptr<HeaderDatabase> declaration_database =
       compileHeaders(compilation_types, header_dir, dependency_dir);
+  auto end = std::chrono::high_resolution_clock::now();
+
+  if (verbose) {
+    auto diff = (end - start) / 1.0ms;
+    printf("Compiled headers for %zu targets in %0.2LFms\n", compilation_types.size(), diff);
+  }
 
   bool failed = false;
   if (dump) {
commit	338cf129630ca5839f1ab695b04f7afbe18bb92d	[log] [tgz]
author	Josh Gao <jmgao@google.com>	Wed Nov 09 18:01:41 2016 -0800
committer	Josh Gao <jmgao@google.com>	Thu Nov 17 18:11:57 2016 -0800
tree	72f3aab6dea788568808228b9e22f33c2fb4bb76
parent	8945d5e43a25737f3f888a90742cc918b1124a05 [diff] [blame]