Create one decode thread for each CPU

We can start creating more decoding threads now that we handle
rect ordering properly. No point in creating more threads than
there are CPUs though.
diff --git a/common/os/Thread.cxx b/common/os/Thread.cxx
index 7150a7f..f38a10b 100644
--- a/common/os/Thread.cxx
+++ b/common/os/Thread.cxx
@@ -20,6 +20,7 @@
 #include <windows.h>
 #else
 #include <pthread.h>
+#include <unistd.h>
 #endif
 
 #include <rdr/Exception.h>
@@ -99,6 +100,36 @@
   return running;
 }
 
+size_t Thread::getSystemCPUCount()
+{
+#ifdef WIN32
+  SYSTEM_INFO si;
+  size_t count;
+  DWORD mask;
+
+  GetSystemInfo(&si);
+
+  count = 0;
+  for (mask = si.dwActiveProcessorMask;mask != 0;mask >>= 1) {
+    if (mask & 0x1)
+      count++;
+  }
+
+  if (count > si.dwNumberOfProcessors)
+    count = si.dwNumberOfProcessors;
+
+  return count;
+#else
+  long ret;
+
+  ret = sysconf(_SC_NPROCESSORS_ONLN);
+  if (ret == -1)
+    return 0;
+
+  return ret;
+#endif
+}
+
 #ifdef WIN32
 long unsigned __stdcall Thread::startRoutine(void* data)
 #else