Create one decode thread for each CPU

We can start creating more decoding threads now that we handle
rect ordering properly. No point in creating more threads than
there are CPUs though.
diff --git a/common/os/Thread.h b/common/os/Thread.h
index 41d9486..1a9aa54 100644
--- a/common/os/Thread.h
+++ b/common/os/Thread.h
@@ -32,6 +32,9 @@
 
     bool isRunning();
 
+  public:
+    static size_t getSystemCPUCount();
+
   protected:
     virtual void worker() = 0;