Use lookup tables for pixel upconversion

You have to loop many times when converting e.g. 2 bits to 8,
so have a couple of lookup tables to speed things up. We still
use bit replication to get from 8 bits to 16 bits though.
diff --git a/common/rfb/PixelFormat.cxx b/common/rfb/PixelFormat.cxx
index 9e762d9..366c0a3 100644
--- a/common/rfb/PixelFormat.cxx
+++ b/common/rfb/PixelFormat.cxx
@@ -32,6 +32,42 @@
 
 using namespace rfb;
 
+rdr::U8 PixelFormat::upconvTable[256*8];
+
+class PixelFormat::Init {
+public:
+  Init();
+};
+
+PixelFormat::Init PixelFormat::_init;
+
+
+PixelFormat::Init::Init()
+{
+  int bits;
+
+  // Bit replication is almost perfect, but not quite. And
+  // a lookup table is still quicker when there is a large
+  // difference between the source and destination depth.
+
+  for (bits = 1;bits <= 8;bits++) {
+    int i, maxVal;
+    rdr::U8 *subTable;
+
+    maxVal = (1 << bits) - 1;
+    subTable = &upconvTable[(bits-1)*256];
+
+    for (i = 0;i <= maxVal;i++)
+      subTable[i] = i * 255 / maxVal;
+
+    // Duplicate the table so that we don't have to care about
+    // the upper bits when doing a lookup
+    for (;i < 256;i += maxVal+1)
+      memcpy(&subTable[i], &subTable[0], maxVal+1);
+  }
+}
+
+
 PixelFormat::PixelFormat(int b, int d, bool e, bool t,
                          int rm, int gm, int bm, int rs, int gs, int bs)
   : bpp(b), depth(d), trueColour(t), bigEndian(e),
diff --git a/common/rfb/PixelFormat.h b/common/rfb/PixelFormat.h
index c3db878..db12988 100644
--- a/common/rfb/PixelFormat.h
+++ b/common/rfb/PixelFormat.h
@@ -105,6 +105,12 @@
     int redBits, greenBits, blueBits;
     int maxBits, minBits;
     bool endianMismatch;
+
+    static rdr::U8 upconvTable[256*8];
+
+    class Init;
+    friend class Init;
+    static Init _init;
   };
 }
 
diff --git a/common/rfb/PixelFormat.inl b/common/rfb/PixelFormat.inl
index 4bc4a70..f9fb125 100644
--- a/common/rfb/PixelFormat.inl
+++ b/common/rfb/PixelFormat.inl
@@ -102,55 +102,33 @@
 
 inline void PixelFormat::rgbFromPixel(Pixel p, rdr::U16 *r, rdr::U16 *g, rdr::U16 *b) const
 {
-  int mb, rb, gb, bb;
+  rdr::U8 _r, _g, _b;
 
-  /* Bit replication is much cheaper than multiplication and division */
+  _r = p >> redShift;
+  _g = p >> greenShift;
+  _b = p >> blueShift;
 
-  mb = minBits;
-  rb = redBits;
-  gb = greenBits;
-  bb = blueBits;
+  _r = upconvTable[(redBits-1)*256 + _r];
+  _g = upconvTable[(greenBits-1)*256 + _g];
+  _b = upconvTable[(blueBits-1)*256 + _b];
 
-  *r = (p >> redShift) << (16 - rb);
-  *g = (p >> greenShift) << (16 - gb);
-  *b = (p >> blueShift) << (16 - bb);
-
-  while (mb < 16) {
-    *r = *r | (*r >> rb);
-    *g = *g | (*g >> gb);
-    *b = *b | (*b >> bb);
-    mb <<= 1;
-    rb <<= 1;
-    gb <<= 1;
-    bb <<= 1;
-  }
+  *r = _r << 8 | _r;
+  *g = _g << 8 | _g;
+  *b = _b << 8 | _b;
 }
 
 
 inline void PixelFormat::rgbFromPixel(Pixel p, rdr::U8 *r, rdr::U8 *g, rdr::U8 *b) const
 {
-  int mb, rb, gb, bb;
+  rdr::U8 _r, _g, _b;
 
-  /* Bit replication is much cheaper than multiplication and division */
+  _r = p >> redShift;
+  _g = p >> greenShift;
+  _b = p >> blueShift;
 
-  mb = minBits;
-  rb = redBits;
-  gb = greenBits;
-  bb = blueBits;
-
-  *r = (p >> redShift) << (8 - rb);
-  *g = (p >> greenShift) << (8 - gb);
-  *b = (p >> blueShift) << (8 - bb);
-
-  while (mb < 8) {
-    *r = *r | (*r >> rb);
-    *g = *g | (*g >> gb);
-    *b = *b | (*b >> bb);
-    mb <<= 1;
-    rb <<= 1;
-    gb <<= 1;
-    bb <<= 1;
-  }
+  *r = upconvTable[(redBits-1)*256 + _r];
+  *g = upconvTable[(greenBits-1)*256 + _g];
+  *b = upconvTable[(blueBits-1)*256 + _b];
 }