Merge branch 'multicore' of https://github.com/CendioOssman/tigervnc
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 755cccb..773556a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -80,13 +80,19 @@
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address")
 endif()
 
+option(ENABLE_TSAN "Enable thread sanitizer support" OFF)
+if(ENABLE_TSAN AND NOT WIN32 AND NOT APPLE AND CMAKE_SIZEOF_VOID_P MATCHES 8)
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=thread")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread")
+endif()
+
 if(NOT DEFINED BUILD_WINVNC)
   set(BUILD_WINVNC 1)
 endif()
 
-# Minimum version is Windows XP SP2 (5.2)
+# Minimum version is Windows Vista/2008 (6.0)
 if(WIN32)
-  add_definitions(-D_WIN32_IE=0x0502 -D_WIN32_WINNT=0x0502)
+  add_definitions(-D_WIN32_WINNT=0x0600)
 endif()
 
 if(CMAKE_SIZEOF_VOID_P MATCHES 8)
diff --git a/cmake/StaticBuild.cmake b/cmake/StaticBuild.cmake
index 5a3d998..6db0e14 100644
--- a/cmake/StaticBuild.cmake
+++ b/cmake/StaticBuild.cmake
@@ -119,6 +119,13 @@
   if(ENABLE_ASAN AND NOT WIN32 AND NOT APPLE)
     set(STATIC_BASE_LIBRARIES "${STATIC_BASE_LIBRARIES} -Wl,-Bstatic -lasan -Wl,-Bdynamic -ldl -lm -lpthread")
   endif()
+  if(ENABLE_TSAN AND NOT WIN32 AND NOT APPLE AND CMAKE_SIZEOF_VOID_P MATCHES 8)
+    # libtsan redefines some C++ symbols which then conflict with a
+    # statically linked libstdc++. Work around this by allowing multiple
+    # definitions. The linker will pick the first one (i.e. the one
+    # from libtsan).
+    set(STATIC_BASE_LIBRARIES "${STATIC_BASE_LIBRARIES} -Wl,-z -Wl,muldefs -Wl,-Bstatic -ltsan -Wl,-Bdynamic -ldl -lm")
+  endif()
   if(WIN32)
     set(STATIC_BASE_LIBRARIES "${STATIC_BASE_LIBRARIES} -lmingw32 -lgcc_eh -lgcc -lmoldname -lmingwex -lmsvcrt")
     set(STATIC_BASE_LIBRARIES "${STATIC_BASE_LIBRARIES} -luser32 -lkernel32 -ladvapi32 -lshell32")
diff --git a/common/os/CMakeLists.txt b/common/os/CMakeLists.txt
index f082eef..7644341 100644
--- a/common/os/CMakeLists.txt
+++ b/common/os/CMakeLists.txt
@@ -1,9 +1,15 @@
 include_directories(${CMAKE_SOURCE_DIR}/common)
 
 add_library(os STATIC
+  Mutex.cxx
+  Thread.cxx
   w32tiger.c
   os.cxx)
 
 if(UNIX)
+  target_link_libraries(os pthread)
+endif()
+
+if(UNIX)
   libtool_create_control_file(os)
 endif()
diff --git a/common/os/Mutex.cxx b/common/os/Mutex.cxx
new file mode 100644
index 0000000..fcbd0ac
--- /dev/null
+++ b/common/os/Mutex.cxx
@@ -0,0 +1,162 @@
+/* Copyright 2015 Pierre Ossman for Cendio AB
+ * 
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this software; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
+ * USA.
+ */
+
+#ifdef WIN32
+#include <windows.h>
+#else
+#include <pthread.h>
+#endif
+
+#include <rdr/Exception.h>
+
+#include <os/Mutex.h>
+
+using namespace os;
+
+Mutex::Mutex()
+{
+#ifdef WIN32
+  systemMutex = new CRITICAL_SECTION;
+  InitializeCriticalSection((CRITICAL_SECTION*)systemMutex);
+#else
+  int ret;
+
+  systemMutex = new pthread_mutex_t;
+  ret = pthread_mutex_init((pthread_mutex_t*)systemMutex, NULL);
+  if (ret != 0)
+    throw rdr::SystemException("Failed to create mutex", ret);
+#endif
+}
+
+Mutex::~Mutex()
+{
+#ifdef WIN32
+  DeleteCriticalSection((CRITICAL_SECTION*)systemMutex);
+  delete (CRITICAL_SECTION*)systemMutex;
+#else
+  int ret;
+
+  ret = pthread_mutex_destroy((pthread_mutex_t*)systemMutex);
+  delete (pthread_mutex_t*)systemMutex;
+  if (ret != 0)
+    throw rdr::SystemException("Failed to destroy mutex", ret);
+#endif
+}
+
+void Mutex::lock()
+{
+#ifdef WIN32
+  EnterCriticalSection((CRITICAL_SECTION*)systemMutex);
+#else
+  int ret;
+
+  ret = pthread_mutex_lock((pthread_mutex_t*)systemMutex);
+  if (ret != 0)
+    throw rdr::SystemException("Failed to lock mutex", ret);
+#endif
+}
+
+void Mutex::unlock()
+{
+#ifdef WIN32
+  LeaveCriticalSection((CRITICAL_SECTION*)systemMutex);
+#else
+  int ret;
+
+  ret = pthread_mutex_unlock((pthread_mutex_t*)systemMutex);
+  if (ret != 0)
+    throw rdr::SystemException("Failed to unlock mutex", ret);
+#endif
+}
+
+Condition::Condition(Mutex* mutex)
+{
+  this->mutex = mutex;
+
+#ifdef WIN32
+  systemCondition = new CONDITION_VARIABLE;
+  InitializeConditionVariable((CONDITION_VARIABLE*)systemCondition);
+#else
+  int ret;
+
+  systemCondition = new pthread_cond_t;
+  ret = pthread_cond_init((pthread_cond_t*)systemCondition, NULL);
+  if (ret != 0)
+    throw rdr::SystemException("Failed to create condition variable", ret);
+#endif
+}
+
+Condition::~Condition()
+{
+#ifdef WIN32
+  delete (CONDITION_VARIABLE*)systemCondition;
+#else
+  int ret;
+
+  ret = pthread_cond_destroy((pthread_cond_t*)systemCondition);
+  delete (pthread_cond_t*)systemCondition;
+  if (ret != 0)
+    throw rdr::SystemException("Failed to destroy condition variable", ret);
+#endif
+}
+
+void Condition::wait()
+{
+#ifdef WIN32
+  BOOL ret;
+
+  ret = SleepConditionVariableCS((CONDITION_VARIABLE*)systemCondition,
+                                 (CRITICAL_SECTION*)mutex->systemMutex,
+                                 INFINITE);
+  if (!ret)
+    throw rdr::SystemException("Failed to wait on condition variable", GetLastError());
+#else
+  int ret;
+
+  ret = pthread_cond_wait((pthread_cond_t*)systemCondition,
+                          (pthread_mutex_t*)mutex->systemMutex);
+  if (ret != 0)
+    throw rdr::SystemException("Failed to wait on condition variable", ret);
+#endif
+}
+
+void Condition::signal()
+{
+#ifdef WIN32
+  WakeConditionVariable((CONDITION_VARIABLE*)systemCondition);
+#else
+  int ret;
+
+  ret = pthread_cond_signal((pthread_cond_t*)systemCondition);
+  if (ret != 0)
+    throw rdr::SystemException("Failed to signal condition variable", ret);
+#endif
+}
+
+void Condition::broadcast()
+{
+#ifdef WIN32
+  WakeAllConditionVariable((CONDITION_VARIABLE*)systemCondition);
+#else
+  int ret;
+
+  ret = pthread_cond_broadcast((pthread_cond_t*)systemCondition);
+  if (ret != 0)
+    throw rdr::SystemException("Failed to broadcast condition variable", ret);
+#endif
+}
diff --git a/common/os/Mutex.h b/common/os/Mutex.h
new file mode 100644
index 0000000..2a54b20
--- /dev/null
+++ b/common/os/Mutex.h
@@ -0,0 +1,64 @@
+/* Copyright 2015 Pierre Ossman for Cendio AB
+ * 
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this software; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
+ * USA.
+ */
+
+#ifndef __OS_MUTEX_H__
+#define __OS_MUTEX_H__
+
+namespace os {
+  class Condition;
+
+  class Mutex {
+  public:
+    Mutex();
+    ~Mutex();
+
+    void lock();
+    void unlock();
+
+  private:
+    friend Condition;
+
+    void* systemMutex;
+  };
+
+  class AutoMutex {
+  public:
+    AutoMutex(Mutex* mutex) { m = mutex; m->lock(); }
+    ~AutoMutex() { m->unlock(); }
+  private:
+    Mutex* m;
+  };
+
+  class Condition {
+  public:
+    Condition(Mutex* mutex);
+    ~Condition();
+
+    void wait();
+
+    void signal();
+    void broadcast();
+
+  private:
+    Mutex* mutex;
+    void* systemCondition;
+  };
+
+}
+
+#endif
diff --git a/common/os/Thread.cxx b/common/os/Thread.cxx
new file mode 100644
index 0000000..f38a10b
--- /dev/null
+++ b/common/os/Thread.cxx
@@ -0,0 +1,153 @@
+/* Copyright 2015 Pierre Ossman for Cendio AB
+ * 
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this software; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
+ * USA.
+ */
+
+#ifdef WIN32
+#include <windows.h>
+#else
+#include <pthread.h>
+#include <unistd.h>
+#endif
+
+#include <rdr/Exception.h>
+
+#include <os/Mutex.h>
+#include <os/Thread.h>
+
+using namespace os;
+
+Thread::Thread() : running(false), threadId(NULL)
+{
+  mutex = new Mutex;
+
+#ifdef WIN32
+  threadId = new HANDLE;
+#else
+  threadId = new pthread_t;
+#endif
+}
+
+Thread::~Thread()
+{
+#ifdef WIN32
+  delete (HANDLE*)threadId;
+#else
+  if (isRunning())
+    pthread_cancel(*(pthread_t*)threadId);
+  delete (pthread_t*)threadId;
+#endif
+
+  delete mutex;
+}
+
+void Thread::start()
+{
+  AutoMutex a(mutex);
+
+#ifdef WIN32
+  *(HANDLE*)threadId = CreateThread(NULL, 0, startRoutine, this, 0, NULL);
+  if (*(HANDLE*)threadId == NULL)
+    throw rdr::SystemException("Failed to create thread", GetLastError());
+#else
+  int ret;
+
+  ret = pthread_create((pthread_t*)threadId, NULL, startRoutine, this);
+  if (ret != 0)
+    throw rdr::SystemException("Failed to create thread", ret);
+#endif
+
+  running = true;
+}
+
+void Thread::wait()
+{
+  if (!isRunning())
+    return;
+
+#ifdef WIN32
+  DWORD ret;
+
+  ret = WaitForSingleObject(*(HANDLE*)threadId, INFINITE);
+  if (ret != WAIT_OBJECT_0)
+    throw rdr::SystemException("Failed to join thread", GetLastError());
+#else
+  int ret;
+
+  ret = pthread_join(*(pthread_t*)threadId, NULL);
+  if (ret != 0)
+    throw rdr::SystemException("Failed to join thread", ret);
+#endif
+}
+
+bool Thread::isRunning()
+{
+  AutoMutex a(mutex);
+
+  return running;
+}
+
+size_t Thread::getSystemCPUCount()
+{
+#ifdef WIN32
+  SYSTEM_INFO si;
+  size_t count;
+  DWORD mask;
+
+  GetSystemInfo(&si);
+
+  count = 0;
+  for (mask = si.dwActiveProcessorMask;mask != 0;mask >>= 1) {
+    if (mask & 0x1)
+      count++;
+  }
+
+  if (count > si.dwNumberOfProcessors)
+    count = si.dwNumberOfProcessors;
+
+  return count;
+#else
+  long ret;
+
+  ret = sysconf(_SC_NPROCESSORS_ONLN);
+  if (ret == -1)
+    return 0;
+
+  return ret;
+#endif
+}
+
+#ifdef WIN32
+long unsigned __stdcall Thread::startRoutine(void* data)
+#else
+void* Thread::startRoutine(void* data)
+#endif
+{
+  Thread *self;
+
+  self = (Thread*)data;
+
+  try {
+    self->worker();
+  } catch(...) {
+  }
+
+  self->mutex->lock();
+  self->running = false;
+  self->mutex->unlock();
+
+  return 0;
+}
diff --git a/common/os/Thread.h b/common/os/Thread.h
new file mode 100644
index 0000000..1a9aa54
--- /dev/null
+++ b/common/os/Thread.h
@@ -0,0 +1,56 @@
+/* Copyright 2015 Pierre Ossman for Cendio AB
+ * 
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this software; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
+ * USA.
+ */
+
+#ifndef __OS_THREAD_H__
+#define __OP_THREAD_H__
+
+namespace os {
+  class Mutex;
+
+  class Thread {
+  public:
+    Thread();
+    virtual ~Thread();
+
+    void start();
+    void wait();
+
+    bool isRunning();
+
+  public:
+    static size_t getSystemCPUCount();
+
+  protected:
+    virtual void worker() = 0;
+
+  private:
+#ifdef WIN32
+    static long unsigned __stdcall startRoutine(void* data);
+#else
+    static void* startRoutine(void* data);
+#endif
+
+  private:
+    Mutex *mutex;
+    bool running;
+
+    void *threadId;
+  };
+}
+
+#endif
diff --git a/common/rdr/FdInStream.cxx b/common/rdr/FdInStream.cxx
index 9f35c9f..a8b3085 100644
--- a/common/rdr/FdInStream.cxx
+++ b/common/rdr/FdInStream.cxx
@@ -23,11 +23,9 @@
 #include <stdio.h>
 #include <string.h>
 #include <errno.h>
+#include <sys/time.h>
 #ifdef _WIN32
 #include <winsock2.h>
-#ifndef _WIN32_WCE
-#include <sys/timeb.h>
-#endif
 #define read(s,b,l) recv(s,(char*)b,l,0)
 #define close closesocket
 #undef errno
@@ -36,7 +34,6 @@
 #else
 #include <sys/types.h>
 #include <unistd.h>
-#include <sys/time.h>
 #endif
 
 #ifndef vncmin
@@ -161,35 +158,6 @@
   return nItems;
 }
 
-#ifdef _WIN32
-static void gettimeofday(struct timeval* tv, void*)
-{
-  LARGE_INTEGER counts, countsPerSec;
-  static double usecPerCount = 0.0;
-
-  if (QueryPerformanceCounter(&counts)) {
-    if (usecPerCount == 0.0) {
-      QueryPerformanceFrequency(&countsPerSec);
-      usecPerCount = 1000000.0 / countsPerSec.QuadPart;
-    }
-
-    LONGLONG usecs = (LONGLONG)(counts.QuadPart * usecPerCount);
-    tv->tv_usec = (long)(usecs % 1000000);
-    tv->tv_sec = (long)(usecs / 1000000);
-
-  } else {
-#ifndef _WIN32_WCE
-    struct timeb tb;
-    ftime(&tb);
-    tv->tv_sec = tb.time;
-    tv->tv_usec = tb.millitm * 1000;
-#else
-    throw SystemException("QueryPerformanceCounter", GetLastError());
-#endif
-  }
-}
-#endif
-
 //
 // readWithTimeoutOrCallback() reads up to the given length in bytes from the
 // file descriptor into a buffer.  If the wait argument is false, then zero is
diff --git a/common/rdr/FdOutStream.cxx b/common/rdr/FdOutStream.cxx
index 75131a6..f429903 100644
--- a/common/rdr/FdOutStream.cxx
+++ b/common/rdr/FdOutStream.cxx
@@ -198,16 +198,7 @@
 
       FD_ZERO(&fds);
       FD_SET(fd, &fds);
-#ifdef _WIN32_WCE
-      // NB: This fixes a broken Winsock2 select() behaviour.  select()
-      // never returns for non-blocking sockets, unless they're already
-      // ready to be written to...
-      u_long zero = 0; ioctlsocket(fd, FIONBIO, &zero);
-#endif
       n = select(fd+1, 0, &fds, 0, tvp);
-#ifdef _WIN32_WCE
-      u_long one = 0; ioctlsocket(fd, FIONBIO, &one);
-#endif
     } while (n < 0 && errno == EINTR);
 
     if (n < 0) throw SystemException("select",errno);
diff --git a/common/rdr/InStream.h b/common/rdr/InStream.h
index 760fb3d..212a2ec 100644
--- a/common/rdr/InStream.h
+++ b/common/rdr/InStream.h
@@ -92,7 +92,7 @@
 
     // readBytes() reads an exact number of bytes.
 
-    virtual void readBytes(void* data, int length) {
+    void readBytes(void* data, int length) {
       U8* dataPtr = (U8*)data;
       U8* dataEnd = dataPtr + length;
       while (dataPtr < dataEnd) {
diff --git a/common/rdr/OutStream.h b/common/rdr/OutStream.h
index 4afd4bf..a749a20 100644
--- a/common/rdr/OutStream.h
+++ b/common/rdr/OutStream.h
@@ -25,6 +25,7 @@
 #define __RDR_OUTSTREAM_H__
 
 #include <rdr/types.h>
+#include <rdr/InStream.h>
 #include <string.h> // for memcpy
 
 namespace rdr {
@@ -89,7 +90,7 @@
 
     // writeBytes() writes an exact number of bytes.
 
-    virtual void writeBytes(const void* data, int length) {
+    void writeBytes(const void* data, int length) {
       const U8* dataPtr = (const U8*)data;
       const U8* dataEnd = dataPtr + length;
       while (dataPtr < dataEnd) {
@@ -100,6 +101,17 @@
       }
     }
 
+    // copyBytes() efficiently transfers data between streams
+
+    void copyBytes(InStream* is, int length) {
+      while (length > 0) {
+        int n = check(1, length);
+        is->readBytes(ptr, n);
+        ptr += n;
+        length -= n;
+      }
+    }
+
     // writeOpaqueN() writes a quantity without byte-swapping.
 
     inline void writeOpaque8( U8  u) { writeU8(u); }
diff --git a/common/rdr/ZlibInStream.cxx b/common/rdr/ZlibInStream.cxx
index 6f3a7d0..4053bd1 100644
--- a/common/rdr/ZlibInStream.cxx
+++ b/common/rdr/ZlibInStream.cxx
@@ -16,6 +16,8 @@
  * USA.
  */
 
+#include <assert.h>
+
 #include <rdr/ZlibInStream.h>
 #include <rdr/Exception.h>
 #include <zlib.h>
@@ -26,26 +28,16 @@
 
 ZlibInStream::ZlibInStream(int bufSize_)
   : underlying(0), bufSize(bufSize_ ? bufSize_ : DEFAULT_BUF_SIZE), offset(0),
-    bytesIn(0)
+    zs(NULL), bytesIn(0)
 {
-  zs = new z_stream;
-  zs->zalloc    = Z_NULL;
-  zs->zfree     = Z_NULL;
-  zs->opaque    = Z_NULL;
-  zs->next_in   = Z_NULL;
-  zs->avail_in  = 0;
-  if (inflateInit(zs) != Z_OK) {
-    delete zs;
-    throw Exception("ZlibInStream: inflateInit failed");
-  }
   ptr = end = start = new U8[bufSize];
+  init();
 }
 
 ZlibInStream::~ZlibInStream()
 {
+  deinit();
   delete [] start;
-  inflateEnd(zs);
-  delete zs;
 }
 
 void ZlibInStream::setUnderlying(InStream* is, int bytesIn_)
@@ -60,7 +52,7 @@
   return offset + ptr - start;
 }
 
-void ZlibInStream::reset()
+void ZlibInStream::removeUnderlying()
 {
   ptr = end = start;
   if (!underlying) return;
@@ -72,6 +64,38 @@
   underlying = 0;
 }
 
+void ZlibInStream::reset()
+{
+  deinit();
+  init();
+}
+
+void ZlibInStream::init()
+{
+  assert(zs == NULL);
+
+  zs = new z_stream;
+  zs->zalloc    = Z_NULL;
+  zs->zfree     = Z_NULL;
+  zs->opaque    = Z_NULL;
+  zs->next_in   = Z_NULL;
+  zs->avail_in  = 0;
+  if (inflateInit(zs) != Z_OK) {
+    delete zs;
+    zs = NULL;
+    throw Exception("ZlibInStream: inflateInit failed");
+  }
+}
+
+void ZlibInStream::deinit()
+{
+  assert(zs != NULL);
+  removeUnderlying();
+  inflateEnd(zs);
+  delete zs;
+  zs = NULL;
+}
+
 int ZlibInStream::overrun(int itemSize, int nItems, bool wait)
 {
   if (itemSize > bufSize)
diff --git a/common/rdr/ZlibInStream.h b/common/rdr/ZlibInStream.h
index c26b6d6..6bd4da4 100644
--- a/common/rdr/ZlibInStream.h
+++ b/common/rdr/ZlibInStream.h
@@ -38,11 +38,15 @@
     virtual ~ZlibInStream();
 
     void setUnderlying(InStream* is, int bytesIn);
-    void reset();
+    void removeUnderlying();
     int pos();
+    void reset();
 
   private:
 
+    void init();
+    void deinit();
+
     int overrun(int itemSize, int nItems, bool wait);
     bool decompress(bool wait);
 
diff --git a/common/rfb/CConnection.cxx b/common/rfb/CConnection.cxx
index 8ccd948..7e9fd31 100644
--- a/common/rfb/CConnection.cxx
+++ b/common/rfb/CConnection.cxx
@@ -17,17 +17,22 @@
  */
 #include <stdio.h>
 #include <string.h>
+
 #include <rfb/Exception.h>
 #include <rfb/fenceTypes.h>
 #include <rfb/CMsgReader.h>
 #include <rfb/CMsgWriter.h>
 #include <rfb/CSecurity.h>
 #include <rfb/Security.h>
+#include <rfb/SecurityClient.h>
 #include <rfb/CConnection.h>
 #include <rfb/util.h>
 
 #include <rfb/LogWriter.h>
 
+#include <rdr/InStream.h>
+#include <rdr/OutStream.h>
+
 using namespace rfb;
 
 static LogWriter vlog("CConnection");
@@ -35,19 +40,16 @@
 CConnection::CConnection()
   : csecurity(0), is(0), os(0), reader_(0), writer_(0),
     shared(false),
-    state_(RFBSTATE_UNINITIALISED), useProtocol3_3(false)
+    state_(RFBSTATE_UNINITIALISED), useProtocol3_3(false),
+    framebuffer(NULL), decoder(this)
 {
   security = new SecurityClient();
 }
 
 CConnection::~CConnection()
 {
+  setFramebuffer(NULL);
   if (csecurity) csecurity->destroy();
-  deleteReaderAndWriter();
-}
-
-void CConnection::deleteReaderAndWriter()
-{
   delete reader_;
   reader_ = 0;
   delete writer_;
@@ -60,6 +62,47 @@
   os = os_;
 }
 
+void CConnection::setFramebuffer(ModifiablePixelBuffer* fb)
+{
+  decoder.flush();
+
+  if ((framebuffer != NULL) && (fb != NULL)) {
+    Rect rect;
+
+    const rdr::U8* data;
+    int stride;
+
+    const rdr::U8 black[4] = { 0, 0, 0, 0 };
+
+    // Copy still valid area
+
+    rect.setXYWH(0, 0,
+                 __rfbmin(fb->width(), framebuffer->width()),
+                 __rfbmin(fb->height(), framebuffer->height()));
+    data = framebuffer->getBuffer(framebuffer->getRect(), &stride);
+    fb->imageRect(rect, data, stride);
+
+    // Black out any new areas
+
+    if (fb->width() > framebuffer->width()) {
+      rect.setXYWH(framebuffer->width(), 0,
+                   fb->width() - fb->width(),
+                   fb->height());
+      fb->fillRect(rect, black);
+    }
+
+    if (fb->height() > framebuffer->height()) {
+      rect.setXYWH(0, framebuffer->height(),
+                   fb->width(),
+                   fb->height() - framebuffer->height());
+      fb->fillRect(rect, black);
+    }
+  }
+
+  delete framebuffer;
+  framebuffer = fb;
+}
+
 void CConnection::initialiseProtocol()
 {
   state_ = RFBSTATE_PROTOCOL_VERSION;
@@ -260,6 +303,40 @@
   writer_->writeClientInit(shared);
 }
 
+void CConnection::setDesktopSize(int w, int h)
+{
+  decoder.flush();
+
+  CMsgHandler::setDesktopSize(w,h);
+}
+
+void CConnection::setExtendedDesktopSize(unsigned reason,
+                                         unsigned result,
+                                         int w, int h,
+                                         const ScreenSet& layout)
+{
+  decoder.flush();
+
+  CMsgHandler::setExtendedDesktopSize(reason, result, w, h, layout);
+}
+
+void CConnection::framebufferUpdateStart()
+{
+  CMsgHandler::framebufferUpdateStart();
+}
+
+void CConnection::framebufferUpdateEnd()
+{
+  decoder.flush();
+
+  CMsgHandler::framebufferUpdateEnd();
+}
+
+void CConnection::dataRect(const Rect& r, int encoding)
+{
+  decoder.decodeRect(r, encoding, framebuffer);
+}
+
 void CConnection::authSuccess()
 {
 }
diff --git a/common/rfb/CConnection.h b/common/rfb/CConnection.h
index 0109fe8..6bc7a38 100644
--- a/common/rfb/CConnection.h
+++ b/common/rfb/CConnection.h
@@ -23,12 +23,9 @@
 #ifndef __RFB_CCONNECTION_H__
 #define __RFB_CCONNECTION_H__
 
-#include <rdr/InStream.h>
-#include <rdr/OutStream.h>
 #include <rfb/CMsgHandler.h>
-#include <rfb/CSecurity.h>
+#include <rfb/DecodeManager.h>
 #include <rfb/util.h>
-#include <rfb/SecurityClient.h>
 
 namespace rfb {
 
@@ -36,6 +33,7 @@
   class CMsgWriter;
   class CSecurity;
   class IdentityVerifier;
+  class SecurityClient;
 
   class CConnection : public CMsgHandler {
   public:
@@ -67,6 +65,13 @@
     // only ever support protocol version 3.3
     void setProtocol3_3(bool s) {useProtocol3_3 = s;}
 
+    // setFramebuffer configures the PixelBuffer that the CConnection
+    // should render all pixel data in to. Note that the CConnection
+    // takes ownership of the PixelBuffer and it must not be deleted by
+    // anyone else. Call setFramebuffer again with NULL or a different
+    // PixelBuffer to delete the previous one.
+    void setFramebuffer(ModifiablePixelBuffer* fb);
+
     // initialiseProtocol() should be called once the streams and security
     // types are set.  Subsequently, processMsg() should be called whenever
     // there is data to read on the InStream.
@@ -85,6 +90,20 @@
     void processMsg();
 
 
+    // Methods overridden from CMsgHandler
+
+    // Note: These must be called by any deriving classes
+
+    virtual void setDesktopSize(int w, int h);
+    virtual void setExtendedDesktopSize(unsigned reason, unsigned result,
+                                        int w, int h,
+                                        const ScreenSet& layout);
+
+    virtual void framebufferUpdateStart();
+    virtual void framebufferUpdateEnd();
+    virtual void dataRect(const Rect& r, int encoding);
+
+
     // Methods to be overridden in a derived class
 
     // getIdVerifier() returns the identity verifier associated with the connection.
@@ -101,13 +120,6 @@
 
     // Other methods
 
-    // deleteReaderAndWriter() deletes the reader and writer associated with
-    // this connection.  This may be useful if you want to delete the streams
-    // before deleting the SConnection to make sure that no attempt by the
-    // SConnection is made to read or write.
-    // XXX Do we really need this at all???
-    void deleteReaderAndWriter();
-
     CMsgReader* reader() { return reader_; }
     CMsgWriter* writer() { return writer_; }
 
@@ -139,6 +151,8 @@
     void setReader(CMsgReader *r) { reader_ = r; }
     void setWriter(CMsgWriter *w) { writer_ = w; }
 
+    ModifiablePixelBuffer* getFramebuffer() { return framebuffer; }
+
   private:
     // This is a default implementation of fences that automatically
     // responds to requests, stating no support for synchronisation.
@@ -167,6 +181,9 @@
     CharArray serverName;
 
     bool useProtocol3_3;
+
+    ModifiablePixelBuffer* framebuffer;
+    DecodeManager decoder;
   };
 }
 #endif
diff --git a/common/rfb/CMakeLists.txt b/common/rfb/CMakeLists.txt
index 14b8b72..5047e5e 100644
--- a/common/rfb/CMakeLists.txt
+++ b/common/rfb/CMakeLists.txt
@@ -15,6 +15,7 @@
   ConnParams.cxx
   CopyRectDecoder.cxx
   Cursor.cxx
+  DecodeManager.cxx
   Decoder.cxx
   d3des.c
   EncodeManager.cxx
diff --git a/common/rfb/CMsgReader.cxx b/common/rfb/CMsgReader.cxx
index 04846ef..96ddf44 100644
--- a/common/rfb/CMsgReader.cxx
+++ b/common/rfb/CMsgReader.cxx
@@ -23,19 +23,17 @@
 #include <rfb/util.h>
 #include <rfb/CMsgHandler.h>
 #include <rfb/CMsgReader.h>
-#include <rfb/Decoder.h>
 
 using namespace rfb;
 
 CMsgReader::CMsgReader(CMsgHandler* handler_, rdr::InStream* is_)
   : imageBufIdealSize(0), handler(handler_), is(is_),
-    imageBuf(0), imageBufSize(0), nUpdateRectsLeft(0)
+    nUpdateRectsLeft(0)
 {
 }
 
 CMsgReader::~CMsgReader()
 {
-  delete [] imageBuf;
 }
 
 void CMsgReader::readServerInit()
@@ -242,23 +240,3 @@
 
   handler->setExtendedDesktopSize(x, y, w, h, layout);
 }
-
-rdr::U8* CMsgReader::getImageBuf(int required, int requested, int* nPixels)
-{
-  int requiredBytes = required * (handler->cp.pf().bpp / 8);
-  int requestedBytes = requested * (handler->cp.pf().bpp / 8);
-  int size = requestedBytes;
-  if (size > imageBufIdealSize) size = imageBufIdealSize;
-
-  if (size < requiredBytes)
-    size = requiredBytes;
-
-  if (imageBufSize < size) {
-    imageBufSize = size;
-    delete [] imageBuf;
-    imageBuf = new rdr::U8[imageBufSize];
-  }
-  if (nPixels)
-    *nPixels = imageBufSize / (handler->cp.pf().bpp / 8);
-  return imageBuf;
-}
diff --git a/common/rfb/CMsgReader.h b/common/rfb/CMsgReader.h
index 3592ef8..42c6496 100644
--- a/common/rfb/CMsgReader.h
+++ b/common/rfb/CMsgReader.h
@@ -46,7 +46,6 @@
     void readMsg();
 
     rdr::InStream* getInStream() { return is; }
-    rdr::U8* getImageBuf(int required, int requested=0, int* nPixels=0);
 
     int imageBufIdealSize;
 
@@ -67,8 +66,6 @@
 
     CMsgHandler* handler;
     rdr::InStream* is;
-    rdr::U8* imageBuf;
-    int imageBufSize;
     int nUpdateRectsLeft;
   };
 }
diff --git a/common/rfb/CSecurityPlain.cxx b/common/rfb/CSecurityPlain.cxx
index 8aec0de..0320ce2 100644
--- a/common/rfb/CSecurityPlain.cxx
+++ b/common/rfb/CSecurityPlain.cxx
@@ -22,6 +22,8 @@
 #include <rfb/UserPasswdGetter.h>
 #include <rfb/util.h>
 
+#include <rdr/OutStream.h>
+
 using namespace rfb;
 
 bool CSecurityPlain::processMsg(CConnection* cc)
diff --git a/common/rfb/CSecurityVncAuth.cxx b/common/rfb/CSecurityVncAuth.cxx
index 5b53c60..f44e56e 100644
--- a/common/rfb/CSecurityVncAuth.cxx
+++ b/common/rfb/CSecurityVncAuth.cxx
@@ -23,6 +23,7 @@
 
 #include <string.h>
 #include <stdio.h>
+
 #include <rfb/CConnection.h>
 #include <rfb/Password.h>
 #include <rfb/CSecurityVncAuth.h>
@@ -32,6 +33,8 @@
 #include <rfb/d3des.h>
 }
 
+#include <rdr/InStream.h>
+#include <rdr/OutStream.h>
 
 using namespace rfb;
 
diff --git a/common/rfb/ConnParams.cxx b/common/rfb/ConnParams.cxx
index 615f49a..ab3b884 100644
--- a/common/rfb/ConnParams.cxx
+++ b/common/rfb/ConnParams.cxx
@@ -99,7 +99,7 @@
   memcpy(cursor_.mask.buf, other.mask.buf, cursor_.maskLen());
 }
 
-bool ConnParams::supportsEncoding(rdr::S32 encoding)
+bool ConnParams::supportsEncoding(rdr::S32 encoding) const
 {
   return encodings_.count(encoding) != 0;
 }
diff --git a/common/rfb/ConnParams.h b/common/rfb/ConnParams.h
index 0b8a89c..9e647ba 100644
--- a/common/rfb/ConnParams.h
+++ b/common/rfb/ConnParams.h
@@ -56,14 +56,14 @@
     void setVersion(int major, int minor) {
       majorVersion = major; minorVersion = minor;
     }
-    bool isVersion(int major, int minor) {
+    bool isVersion(int major, int minor) const {
       return majorVersion == major && minorVersion == minor;
     }
-    bool beforeVersion(int major, int minor) {
+    bool beforeVersion(int major, int minor) const {
       return (majorVersion < major ||
               (majorVersion == major && minorVersion < minor));
     }
-    bool afterVersion(int major, int minor) {
+    bool afterVersion(int major, int minor) const {
       return !beforeVersion(major,minor+1);
     }
 
@@ -71,16 +71,16 @@
     int height;
     ScreenSet screenLayout;
 
-    const PixelFormat& pf() { return pf_; }
+    const PixelFormat& pf() const { return pf_; }
     void setPF(const PixelFormat& pf);
 
-    const char* name() { return name_; }
+    const char* name() const { return name_; }
     void setName(const char* name);
 
-    const Cursor& cursor() { return cursor_; }
+    const Cursor& cursor() const { return cursor_; }
     void setCursor(const Cursor& cursor);
 
-    bool supportsEncoding(rdr::S32 encoding);
+    bool supportsEncoding(rdr::S32 encoding) const;
 
     void setEncodings(int nEncodings, const rdr::S32* encodings);
 
diff --git a/common/rfb/CopyRectDecoder.cxx b/common/rfb/CopyRectDecoder.cxx
index 4b10418..23949a8 100644
--- a/common/rfb/CopyRectDecoder.cxx
+++ b/common/rfb/CopyRectDecoder.cxx
@@ -15,14 +15,15 @@
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
  * USA.
  */
-#include <rdr/InStream.h>
-#include <rfb/CConnection.h>
+#include <rdr/MemInStream.h>
+#include <rdr/OutStream.h>
 #include <rfb/PixelBuffer.h>
+#include <rfb/Region.h>
 #include <rfb/CopyRectDecoder.h>
 
 using namespace rfb;
 
-CopyRectDecoder::CopyRectDecoder(CConnection* conn) : Decoder(conn)
+CopyRectDecoder::CopyRectDecoder() : Decoder(DecoderPlain)
 {
 }
 
@@ -30,9 +31,35 @@
 {
 }
 
-void CopyRectDecoder::readRect(const Rect& r, ModifiablePixelBuffer* pb)
+void CopyRectDecoder::readRect(const Rect& r, rdr::InStream* is,
+                               const ConnParams& cp, rdr::OutStream* os)
 {
-  int srcX = conn->getInStream()->readU16();
-  int srcY = conn->getInStream()->readU16();
+  os->copyBytes(is, 4);
+}
+
+
+void CopyRectDecoder::getAffectedRegion(const Rect& rect,
+                                        const void* buffer,
+                                        size_t buflen,
+                                        const ConnParams& cp,
+                                        Region* region)
+{
+  rdr::MemInStream is(buffer, buflen);
+  int srcX = is.readU16();
+  int srcY = is.readU16();
+
+  Decoder::getAffectedRegion(rect, buffer, buflen, cp, region);
+
+  region->assign_union(Region(rect.translate(Point(srcX-rect.tl.x,
+                                                   srcY-rect.tl.y))));
+}
+
+void CopyRectDecoder::decodeRect(const Rect& r, const void* buffer,
+                                 size_t buflen, const ConnParams& cp,
+                                 ModifiablePixelBuffer* pb)
+{
+  rdr::MemInStream is(buffer, buflen);
+  int srcX = is.readU16();
+  int srcY = is.readU16();
   pb->copyRect(r, Point(r.tl.x-srcX, r.tl.y-srcY));
 }
diff --git a/common/rfb/CopyRectDecoder.h b/common/rfb/CopyRectDecoder.h
index d14bf92..1d2ce53 100644
--- a/common/rfb/CopyRectDecoder.h
+++ b/common/rfb/CopyRectDecoder.h
@@ -24,9 +24,16 @@
 
   class CopyRectDecoder : public Decoder {
   public:
-    CopyRectDecoder(CConnection* conn);
+    CopyRectDecoder();
     virtual ~CopyRectDecoder();
-    virtual void readRect(const Rect& r, ModifiablePixelBuffer* pb);
+    virtual void readRect(const Rect& r, rdr::InStream* is,
+                          const ConnParams& cp, rdr::OutStream* os);
+    virtual void getAffectedRegion(const Rect& rect, const void* buffer,
+                                   size_t buflen, const ConnParams& cp,
+                                   Region* region);
+    virtual void decodeRect(const Rect& r, const void* buffer,
+                            size_t buflen, const ConnParams& cp,
+                            ModifiablePixelBuffer* pb);
   };
 }
 #endif
diff --git a/common/rfb/DecodeManager.cxx b/common/rfb/DecodeManager.cxx
new file mode 100644
index 0000000..a655c53
--- /dev/null
+++ b/common/rfb/DecodeManager.cxx
@@ -0,0 +1,359 @@
+/* Copyright 2015 Pierre Ossman for Cendio AB
+ * 
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this software; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
+ * USA.
+ */
+
+#include <assert.h>
+#include <string.h>
+
+#include <rfb/CConnection.h>
+#include <rfb/DecodeManager.h>
+#include <rfb/Decoder.h>
+#include <rfb/Region.h>
+
+#include <rfb/LogWriter.h>
+
+#include <rdr/Exception.h>
+#include <rdr/MemOutStream.h>
+
+#include <os/Mutex.h>
+
+using namespace rfb;
+
+static LogWriter vlog("DecodeManager");
+
+DecodeManager::DecodeManager(CConnection *conn) :
+  conn(conn), threadException(NULL)
+{
+  size_t cpuCount;
+
+  memset(decoders, 0, sizeof(decoders));
+
+  queueMutex = new os::Mutex();
+  producerCond = new os::Condition(queueMutex);
+  consumerCond = new os::Condition(queueMutex);
+
+  cpuCount = os::Thread::getSystemCPUCount();
+  if (cpuCount == 0) {
+    vlog.error("Unable to determine the number of CPU cores on this system");
+    cpuCount = 1;
+  } else {
+    vlog.info("Detected %d CPU core(s)", (int)cpuCount);
+    // No point creating more threads than this, they'll just end up
+    // wasting CPU fighting for locks
+    if (cpuCount > 4)
+      cpuCount = 4;
+    // The overhead of threading is small, but not small enough to
+    // ignore on single CPU systems
+    if (cpuCount == 1)
+      vlog.info("Decoding data on main thread");
+    else
+      vlog.info("Creating %d decoder thread(s)", (int)cpuCount);
+  }
+
+  while (cpuCount--) {
+    // Twice as many possible entries in the queue as there
+    // are worker threads to make sure they don't stall
+    freeBuffers.push_back(new rdr::MemOutStream());
+    freeBuffers.push_back(new rdr::MemOutStream());
+
+    threads.push_back(new DecodeThread(this));
+  }
+}
+
+DecodeManager::~DecodeManager()
+{
+  while (!threads.empty()) {
+    delete threads.back();
+    threads.pop_back();
+  }
+
+  delete threadException;
+
+  while (!freeBuffers.empty()) {
+    delete freeBuffers.back();
+    freeBuffers.pop_back();
+  }
+
+  delete consumerCond;
+  delete producerCond;
+  delete queueMutex;
+
+  for (size_t i = 0; i < sizeof(decoders)/sizeof(decoders[0]); i++)
+    delete decoders[i];
+}
+
+void DecodeManager::decodeRect(const Rect& r, int encoding,
+                               ModifiablePixelBuffer* pb)
+{
+  Decoder *decoder;
+  rdr::MemOutStream *bufferStream;
+
+  QueueEntry *entry;
+
+  assert(pb != NULL);
+
+  if (!Decoder::supported(encoding)) {
+    vlog.error("Unknown encoding %d", encoding);
+    throw rdr::Exception("Unknown encoding");
+  }
+
+  if (!decoders[encoding]) {
+    decoders[encoding] = Decoder::createDecoder(encoding);
+    if (!decoders[encoding]) {
+      vlog.error("Unknown encoding %d", encoding);
+      throw rdr::Exception("Unknown encoding");
+    }
+  }
+
+  decoder = decoders[encoding];
+
+  // Fast path for single CPU machines to avoid the context
+  // switching overhead
+  if (threads.size() == 1) {
+    bufferStream = freeBuffers.front();
+    bufferStream->clear();
+    decoder->readRect(r, conn->getInStream(), conn->cp, bufferStream);
+    decoder->decodeRect(r, bufferStream->data(), bufferStream->length(),
+                        conn->cp, pb);
+    return;
+  }
+
+  // Wait for an available memory buffer
+  queueMutex->lock();
+
+  while (freeBuffers.empty())
+    producerCond->wait();
+
+  // Don't pop the buffer in case we throw an exception
+  // whilst reading
+  bufferStream = freeBuffers.front();
+
+  queueMutex->unlock();
+
+  // First check if any thread has encountered a problem
+  throwThreadException();
+
+  // Read the rect
+  bufferStream->clear();
+  decoder->readRect(r, conn->getInStream(), conn->cp, bufferStream);
+
+  // Then try to put it on the queue
+  entry = new QueueEntry;
+
+  entry->active = false;
+  entry->rect = r;
+  entry->encoding = encoding;
+  entry->decoder = decoder;
+  entry->cp = &conn->cp;
+  entry->pb = pb;
+  entry->bufferStream = bufferStream;
+
+  decoder->getAffectedRegion(r, bufferStream->data(),
+                             bufferStream->length(), conn->cp,
+                             &entry->affectedRegion);
+
+  queueMutex->lock();
+
+  // The workers add buffers to the end so it's safe to assume
+  // the front is still the same buffer
+  freeBuffers.pop_front();
+
+  workQueue.push_back(entry);
+
+  // We only put a single entry on the queue so waking a single
+  // thread is sufficient
+  consumerCond->signal();
+
+  queueMutex->unlock();
+}
+
+void DecodeManager::flush()
+{
+  queueMutex->lock();
+
+  while (!workQueue.empty())
+    producerCond->wait();
+
+  queueMutex->unlock();
+
+  throwThreadException();
+}
+
+void DecodeManager::setThreadException(const rdr::Exception& e)
+{
+  os::AutoMutex a(queueMutex);
+
+  if (threadException == NULL)
+    return;
+
+  threadException = new rdr::Exception("Exception on worker thread: %s", e.str());
+}
+
+void DecodeManager::throwThreadException()
+{
+  os::AutoMutex a(queueMutex);
+
+  if (threadException == NULL)
+    return;
+
+  rdr::Exception e(*threadException);
+
+  delete threadException;
+  threadException = NULL;
+
+  throw e;
+}
+
+DecodeManager::DecodeThread::DecodeThread(DecodeManager* manager)
+{
+  this->manager = manager;
+
+  stopRequested = false;
+
+  start();
+}
+
+DecodeManager::DecodeThread::~DecodeThread()
+{
+  stop();
+  wait();
+}
+
+void DecodeManager::DecodeThread::stop()
+{
+  os::AutoMutex a(manager->queueMutex);
+
+  if (!isRunning())
+    return;
+
+  stopRequested = true;
+
+  // We can't wake just this thread, so wake everyone
+  manager->consumerCond->broadcast();
+}
+
+void DecodeManager::DecodeThread::worker()
+{
+  manager->queueMutex->lock();
+
+  while (!stopRequested) {
+    DecodeManager::QueueEntry *entry;
+
+    // Look for an available entry in the work queue
+    entry = findEntry();
+    if (entry == NULL) {
+      // Wait and try again
+      manager->consumerCond->wait();
+      continue;
+    }
+
+    // This is ours now
+    entry->active = true;
+
+    manager->queueMutex->unlock();
+
+    // Do the actual decoding
+    try {
+      entry->decoder->decodeRect(entry->rect, entry->bufferStream->data(),
+                                 entry->bufferStream->length(),
+                                 *entry->cp, entry->pb);
+    } catch (rdr::Exception e) {
+      manager->setThreadException(e);
+    } catch(...) {
+      assert(false);
+    }
+
+    manager->queueMutex->lock();
+
+    // Remove the entry from the queue and give back the memory buffer
+    manager->freeBuffers.push_back(entry->bufferStream);
+    manager->workQueue.remove(entry);
+    delete entry;
+
+    // Wake the main thread in case it is waiting for a memory buffer
+    manager->producerCond->signal();
+    // This rect might have been blocking multiple other rects, so
+    // wake up every worker thread
+    if (manager->workQueue.size() > 1)
+      manager->consumerCond->broadcast();
+  }
+
+  manager->queueMutex->unlock();
+}
+
+DecodeManager::QueueEntry* DecodeManager::DecodeThread::findEntry()
+{
+  std::list<DecodeManager::QueueEntry*>::iterator iter;
+  Region lockedRegion;
+
+  if (manager->workQueue.empty())
+    return NULL;
+
+  if (!manager->workQueue.front()->active)
+    return manager->workQueue.front();
+
+  for (iter = manager->workQueue.begin();
+       iter != manager->workQueue.end();
+       ++iter) {
+    DecodeManager::QueueEntry* entry;
+
+    std::list<DecodeManager::QueueEntry*>::iterator iter2;
+
+    entry = *iter;
+
+    // Another thread working on this?
+    if (entry->active)
+      goto next;
+
+    // If this is an ordered decoder then make sure this is the first
+    // rectangle in the queue for that decoder
+    if (entry->decoder->flags & DecoderOrdered) {
+      for (iter2 = manager->workQueue.begin(); iter2 != iter; ++iter2) {
+        if (entry->encoding == (*iter2)->encoding)
+          goto next;
+      }
+    }
+
+    // For a partially ordered decoder we must ask the decoder for each
+    // pair of rectangles.
+    if (entry->decoder->flags & DecoderPartiallyOrdered) {
+      for (iter2 = manager->workQueue.begin(); iter2 != iter; ++iter2) {
+        if (entry->encoding != (*iter2)->encoding)
+          continue;
+        if (entry->decoder->doRectsConflict(entry->rect,
+                                            entry->bufferStream->data(),
+                                            entry->bufferStream->length(),
+                                            (*iter2)->rect,
+                                            (*iter2)->bufferStream->data(),
+                                            (*iter2)->bufferStream->length(),
+                                            *entry->cp))
+          goto next;
+      }
+    }
+
+    // Check overlap with earlier rectangles
+    if (!lockedRegion.intersect(entry->affectedRegion).is_empty())
+      goto next;
+
+    return entry;
+
+next:
+    lockedRegion.assign_union(entry->affectedRegion);
+  }
+
+  return NULL;
+}
diff --git a/common/rfb/DecodeManager.h b/common/rfb/DecodeManager.h
new file mode 100644
index 0000000..fbb7f77
--- /dev/null
+++ b/common/rfb/DecodeManager.h
@@ -0,0 +1,104 @@
+/* Copyright 2015 Pierre Ossman for Cendio AB
+ * 
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this software; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
+ * USA.
+ */
+
+#ifndef __RFB_DECODEMANAGER_H__
+#define __RFB_DECODEMANAGER_H__
+
+#include <list>
+
+#include <os/Thread.h>
+
+#include <rfb/Region.h>
+#include <rfb/encodings.h>
+
+namespace os {
+  class Condition;
+  class Mutex;
+}
+
+namespace rdr {
+  class Exception;
+  class MemOutStream;
+}
+
+namespace rfb {
+  class CConnection;
+  class Decoder;
+  class ModifiablePixelBuffer;
+  class Rect;
+
+  class DecodeManager {
+  public:
+    DecodeManager(CConnection *conn);
+    ~DecodeManager();
+
+    void decodeRect(const Rect& r, int encoding,
+                    ModifiablePixelBuffer* pb);
+
+    void flush();
+
+  private:
+    void setThreadException(const rdr::Exception& e);
+    void throwThreadException();
+
+  private:
+    CConnection *conn;
+    Decoder *decoders[encodingMax+1];
+
+    struct QueueEntry {
+      bool active;
+      Rect rect;
+      int encoding;
+      Decoder* decoder;
+      const ConnParams* cp;
+      ModifiablePixelBuffer* pb;
+      rdr::MemOutStream* bufferStream;
+      Region affectedRegion;
+    };
+
+    std::list<rdr::MemOutStream*> freeBuffers;
+    std::list<QueueEntry*> workQueue;
+
+    os::Mutex* queueMutex;
+    os::Condition* producerCond;
+    os::Condition* consumerCond;
+
+  private:
+    class DecodeThread : public os::Thread {
+    public:
+      DecodeThread(DecodeManager* manager);
+      ~DecodeThread();
+
+      void stop();
+
+    protected:
+      void worker();
+      DecodeManager::QueueEntry* findEntry();
+
+    private:
+      DecodeManager* manager;
+
+      bool stopRequested;
+    };
+
+    std::list<DecodeThread*> threads;
+    rdr::Exception *threadException;
+  };
+}
+
+#endif
diff --git a/common/rfb/Decoder.cxx b/common/rfb/Decoder.cxx
index 3faa975..370e1f9 100644
--- a/common/rfb/Decoder.cxx
+++ b/common/rfb/Decoder.cxx
@@ -18,6 +18,7 @@
  */
 #include <stdio.h>
 #include <rfb/encodings.h>
+#include <rfb/Region.h>
 #include <rfb/Decoder.h>
 #include <rfb/RawDecoder.h>
 #include <rfb/CopyRectDecoder.h>
@@ -28,7 +29,7 @@
 
 using namespace rfb;
 
-Decoder::Decoder(CConnection* conn_) : conn(conn_)
+Decoder::Decoder(enum DecoderFlags flags) : flags(flags)
 {
 }
 
@@ -36,6 +37,21 @@
 {
 }
 
+void Decoder::getAffectedRegion(const Rect& rect, const void* buffer,
+                                size_t buflen, const ConnParams& cp,
+                                Region* region)
+{
+  region->reset(rect);
+}
+
+bool Decoder::doRectsConflict(const Rect& rectA, const void* bufferA,
+                              size_t buflenA, const Rect& rectB,
+                              const void* bufferB, size_t buflenB,
+                              const ConnParams& cp)
+{
+  return false;
+}
+
 bool Decoder::supported(int encoding)
 {
   switch (encoding) {
@@ -51,21 +67,21 @@
   }
 }
 
-Decoder* Decoder::createDecoder(int encoding, CConnection* conn)
+Decoder* Decoder::createDecoder(int encoding)
 {
   switch (encoding) {
   case encodingRaw:
-    return new RawDecoder(conn);
+    return new RawDecoder();
   case encodingCopyRect:
-    return new CopyRectDecoder(conn);
+    return new CopyRectDecoder();
   case encodingRRE:
-    return new RREDecoder(conn);
+    return new RREDecoder();
   case encodingHextile:
-    return new HextileDecoder(conn);
+    return new HextileDecoder();
   case encodingZRLE:
-    return new ZRLEDecoder(conn);
+    return new ZRLEDecoder();
   case encodingTight:
-    return new TightDecoder(conn);
+    return new TightDecoder();
   default:
     return NULL;
   }
diff --git a/common/rfb/Decoder.h b/common/rfb/Decoder.h
index ff67f57..3840b3f 100644
--- a/common/rfb/Decoder.h
+++ b/common/rfb/Decoder.h
@@ -19,28 +19,77 @@
 #ifndef __RFB_DECODER_H__
 #define __RFB_DECODER_H__
 
-#include <rfb/Rect.h>
+namespace rdr {
+  class InStream;
+  class OutStream;
+}
 
 namespace rfb {
-  class CConnection;
+  class ConnParams;
   class ModifiablePixelBuffer;
+  class Rect;
+  class Region;
+
+  enum DecoderFlags {
+    // A constant for decoders that don't need anything special
+    DecoderPlain = 0,
+    // All rects for this decoder must be handled in order
+    DecoderOrdered = 1 << 0,
+    // Only some of the rects must be handled in order,
+    // see doesRectsConflict()
+    DecoderPartiallyOrdered = 1 << 1,
+  };
 
   class Decoder {
   public:
-    Decoder(CConnection* conn);
+    Decoder(enum DecoderFlags flags);
     virtual ~Decoder();
 
-    // readRect() is the main interface that decodes the given rectangle
-    // with data from the CConnection, given at decoder creation, onto
-    // the ModifiablePixelBuffer. The PixelFormat of the PixelBuffer might
-    // not match the ConnParams and it is up to the decoder to do
-    // any necessary conversion.
-    virtual void readRect(const Rect& r, ModifiablePixelBuffer* pb)=0;
+    // These functions are the main interface to an individual decoder
 
+    // readRect() transfers data for the given rectangle from the
+    // InStream to the OutStream, possibly changing it along the way to
+    // make it easier to decode. This function will always be called in
+    // a serial manner on the main thread.
+    virtual void readRect(const Rect& r, rdr::InStream* is,
+                          const ConnParams& cp, rdr::OutStream* os)=0;
+
+    // These functions will be called from any of the worker threads.
+    // A lock will be held whilst these are called so it is safe to
+    // read and update internal state as necessary.
+
+    // getAffectedRegion() returns the parts of the frame buffer will
+    // be either read from or written do when decoding this rect. The
+    // default implementation simply returns the given rectangle.
+    virtual void getAffectedRegion(const Rect& rect, const void* buffer,
+                                   size_t buflen, const ConnParams& cp,
+                                   Region* region);
+
+    // doesRectsConflict() determines if two rectangles must be decoded
+    // in the order they were received. This will only be called if the
+    // DecoderPartiallyOrdered flag has been set.
+    virtual bool doRectsConflict(const Rect& rectA,
+                                 const void* bufferA,
+                                 size_t buflenA,
+                                 const Rect& rectB,
+                                 const void* bufferB,
+                                 size_t buflenB,
+                                 const ConnParams& cp);
+
+    // decodeRect() decodes the given rectangle with data from the
+    // given buffer, onto the ModifiablePixelBuffer. The PixelFormat of
+    // the PixelBuffer might not match the ConnParams and it is up to
+    // the decoder to do any necessary conversion.
+    virtual void decodeRect(const Rect& r, const void* buffer,
+                            size_t buflen, const ConnParams& cp,
+                            ModifiablePixelBuffer* pb)=0;
+
+  public:
     static bool supported(int encoding);
-    static Decoder* createDecoder(int encoding, CConnection* conn);
-  protected:
-    CConnection* conn;
+    static Decoder* createDecoder(int encoding);
+
+  public:
+    const enum DecoderFlags flags;
   };
 }
 
diff --git a/common/rfb/HextileDecoder.cxx b/common/rfb/HextileDecoder.cxx
index 8b18b7b..eae0040 100644
--- a/common/rfb/HextileDecoder.cxx
+++ b/common/rfb/HextileDecoder.cxx
@@ -15,8 +15,12 @@
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
  * USA.
  */
-#include <rfb/CMsgReader.h>
-#include <rfb/CConnection.h>
+
+#include <rdr/InStream.h>
+#include <rdr/MemInStream.h>
+#include <rdr/OutStream.h>
+
+#include <rfb/ConnParams.h>
 #include <rfb/PixelBuffer.h>
 #include <rfb/HextileDecoder.h>
 
@@ -32,7 +36,7 @@
 #include <rfb/hextileDecode.h>
 #undef BPP
 
-HextileDecoder::HextileDecoder(CConnection* conn) : Decoder(conn)
+HextileDecoder::HextileDecoder() : Decoder(DecoderPlain)
 {
 }
 
@@ -40,14 +44,61 @@
 {
 }
 
-void HextileDecoder::readRect(const Rect& r, ModifiablePixelBuffer* pb)
+void HextileDecoder::readRect(const Rect& r, rdr::InStream* is,
+                              const ConnParams& cp, rdr::OutStream* os)
 {
-  rdr::InStream* is = conn->getInStream();
-  rdr::U8* buf = conn->reader()->getImageBuf(16 * 16 * 4);
-  const PixelFormat& pf = conn->cp.pf();
+  Rect t;
+  size_t bytesPerPixel;
+
+  bytesPerPixel = cp.pf().bpp/8;
+
+  for (t.tl.y = r.tl.y; t.tl.y < r.br.y; t.tl.y += 16) {
+
+    t.br.y = __rfbmin(r.br.y, t.tl.y + 16);
+
+    for (t.tl.x = r.tl.x; t.tl.x < r.br.x; t.tl.x += 16) {
+      rdr::U8 tileType;
+
+      t.br.x = __rfbmin(r.br.x, t.tl.x + 16);
+
+      tileType = is->readU8();
+      os->writeU8(tileType);
+
+      if (tileType & hextileRaw) {
+        os->copyBytes(is, t.area() * bytesPerPixel);
+        continue;
+      }
+
+      if (tileType & hextileBgSpecified)
+        os->copyBytes(is, bytesPerPixel);
+
+      if (tileType & hextileFgSpecified)
+        os->copyBytes(is, bytesPerPixel);
+
+      if (tileType & hextileAnySubrects) {
+        rdr::U8 nSubrects;
+
+        nSubrects = is->readU8();
+        os->writeU8(nSubrects);
+
+        if (tileType & hextileSubrectsColoured)
+          os->copyBytes(is, nSubrects * (bytesPerPixel + 2));
+        else
+          os->copyBytes(is, nSubrects * 2);
+      }
+    }
+  }
+}
+
+void HextileDecoder::decodeRect(const Rect& r, const void* buffer,
+                                size_t buflen, const ConnParams& cp,
+                                ModifiablePixelBuffer* pb)
+{
+  rdr::MemInStream is(buffer, buflen);
+  const PixelFormat& pf = cp.pf();
   switch (pf.bpp) {
-  case 8:  hextileDecode8 (r, is, (rdr::U8*) buf, pf, pb); break;
-  case 16: hextileDecode16(r, is, (rdr::U16*)buf, pf, pb); break;
-  case 32: hextileDecode32(r, is, (rdr::U32*)buf, pf, pb); break;
+  case 8:  hextileDecode8 (r, &is, pf, pb); break;
+  case 16: hextileDecode16(r, &is, pf, pb); break;
+  case 32: hextileDecode32(r, &is, pf, pb); break;
   }
 }
diff --git a/common/rfb/HextileDecoder.h b/common/rfb/HextileDecoder.h
index ffc495e..bdc76bc 100644
--- a/common/rfb/HextileDecoder.h
+++ b/common/rfb/HextileDecoder.h
@@ -24,9 +24,13 @@
 
   class HextileDecoder : public Decoder {
   public:
-    HextileDecoder(CConnection* conn);
+    HextileDecoder();
     virtual ~HextileDecoder();
-    virtual void readRect(const Rect& r, ModifiablePixelBuffer* pb);
+    virtual void readRect(const Rect& r, rdr::InStream* is,
+                          const ConnParams& cp, rdr::OutStream* os);
+    virtual void decodeRect(const Rect& r, const void* buffer,
+                            size_t buflen, const ConnParams& cp,
+                            ModifiablePixelBuffer* pb);
   };
 }
 #endif
diff --git a/common/rfb/Logger_file.cxx b/common/rfb/Logger_file.cxx
index 8a109e4..ebe15d5 100644
--- a/common/rfb/Logger_file.cxx
+++ b/common/rfb/Logger_file.cxx
@@ -61,13 +61,11 @@
     if (!m_file) return;
   }
 
-#ifndef _WIN32_WCE
   time_t current = time(0);
   if (current != m_lastLogTime) {
     m_lastLogTime = current;
     fprintf(m_file, "\n%s", ctime(&m_lastLogTime));
   }
-#endif
 
   fprintf(m_file," %s:", logname);
   int column = strlen(logname) + 2;
diff --git a/common/rfb/RREDecoder.cxx b/common/rfb/RREDecoder.cxx
index 8dc391a..218c9b0 100644
--- a/common/rfb/RREDecoder.cxx
+++ b/common/rfb/RREDecoder.cxx
@@ -15,8 +15,12 @@
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
  * USA.
  */
-#include <rfb/CMsgReader.h>
-#include <rfb/CConnection.h>
+
+#include <rdr/InStream.h>
+#include <rdr/MemInStream.h>
+#include <rdr/OutStream.h>
+
+#include <rfb/ConnParams.h>
 #include <rfb/PixelBuffer.h>
 #include <rfb/RREDecoder.h>
 
@@ -32,7 +36,7 @@
 #include <rfb/rreDecode.h>
 #undef BPP
 
-RREDecoder::RREDecoder(CConnection* conn) : Decoder(conn)
+RREDecoder::RREDecoder() : Decoder(DecoderPlain)
 {
 }
 
@@ -40,13 +44,26 @@
 {
 }
 
-void RREDecoder::readRect(const Rect& r, ModifiablePixelBuffer* pb)
+void RREDecoder::readRect(const Rect& r, rdr::InStream* is,
+                          const ConnParams& cp, rdr::OutStream* os)
 {
-  rdr::InStream* is = conn->getInStream();
-  const PixelFormat& pf = conn->cp.pf();
+  rdr::U32 numRects;
+
+  numRects = is->readU32();
+  os->writeU32(numRects);
+
+  os->copyBytes(is, cp.pf().bpp/8 + numRects * (cp.pf().bpp/8 + 8));
+}
+
+void RREDecoder::decodeRect(const Rect& r, const void* buffer,
+                            size_t buflen, const ConnParams& cp,
+                            ModifiablePixelBuffer* pb)
+{
+  rdr::MemInStream is(buffer, buflen);
+  const PixelFormat& pf = cp.pf();
   switch (pf.bpp) {
-  case 8:  rreDecode8 (r, is, pf, pb); break;
-  case 16: rreDecode16(r, is, pf, pb); break;
-  case 32: rreDecode32(r, is, pf, pb); break;
+  case 8:  rreDecode8 (r, &is, pf, pb); break;
+  case 16: rreDecode16(r, &is, pf, pb); break;
+  case 32: rreDecode32(r, &is, pf, pb); break;
   }
 }
diff --git a/common/rfb/RREDecoder.h b/common/rfb/RREDecoder.h
index b33bc55..f89fef4 100644
--- a/common/rfb/RREDecoder.h
+++ b/common/rfb/RREDecoder.h
@@ -24,9 +24,13 @@
 
   class RREDecoder : public Decoder {
   public:
-    RREDecoder(CConnection* conn);
+    RREDecoder();
     virtual ~RREDecoder();
-    virtual void readRect(const Rect& r, ModifiablePixelBuffer* pb);
+    virtual void readRect(const Rect& r, rdr::InStream* is,
+                          const ConnParams& cp, rdr::OutStream* os);
+    virtual void decodeRect(const Rect& r, const void* buffer,
+                            size_t buflen, const ConnParams& cp,
+                            ModifiablePixelBuffer* pb);
   };
 }
 #endif
diff --git a/common/rfb/RawDecoder.cxx b/common/rfb/RawDecoder.cxx
index d2b3d06..786f154 100644
--- a/common/rfb/RawDecoder.cxx
+++ b/common/rfb/RawDecoder.cxx
@@ -15,15 +15,17 @@
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
  * USA.
  */
-#include <rdr/InStream.h>
-#include <rfb/CMsgReader.h>
-#include <rfb/CConnection.h>
+
+#include <assert.h>
+
+#include <rdr/OutStream.h>
+#include <rfb/ConnParams.h>
 #include <rfb/PixelBuffer.h>
 #include <rfb/RawDecoder.h>
 
 using namespace rfb;
 
-RawDecoder::RawDecoder(CConnection* conn) : Decoder(conn)
+RawDecoder::RawDecoder() : Decoder(DecoderPlain)
 {
 }
 
@@ -31,22 +33,16 @@
 {
 }
 
-void RawDecoder::readRect(const Rect& r, ModifiablePixelBuffer* pb)
+void RawDecoder::readRect(const Rect& r, rdr::InStream* is,
+                          const ConnParams& cp, rdr::OutStream* os)
 {
-  int x = r.tl.x;
-  int y = r.tl.y;
-  int w = r.width();
-  int h = r.height();
-  int nPixels;
-  rdr::U8* imageBuf = conn->reader()->getImageBuf(w, w*h, &nPixels);
-  const PixelFormat& pf = conn->cp.pf();
-  int bytesPerRow = w * (pf.bpp / 8);
-  while (h > 0) {
-    int nRows = nPixels / w;
-    if (nRows > h) nRows = h;
-    conn->getInStream()->readBytes(imageBuf, nRows * bytesPerRow);
-    pb->imageRect(pf, Rect(x, y, x+w, y+nRows), imageBuf);
-    h -= nRows;
-    y += nRows;
-  }
+  os->copyBytes(is, r.area() * cp.pf().bpp/8);
+}
+
+void RawDecoder::decodeRect(const Rect& r, const void* buffer,
+                            size_t buflen, const ConnParams& cp,
+                            ModifiablePixelBuffer* pb)
+{
+  assert(buflen >= (size_t)r.area() * cp.pf().bpp/8);
+  pb->imageRect(cp.pf(), r, buffer);
 }
diff --git a/common/rfb/RawDecoder.h b/common/rfb/RawDecoder.h
index 7a784c6..21ea738 100644
--- a/common/rfb/RawDecoder.h
+++ b/common/rfb/RawDecoder.h
@@ -21,12 +21,15 @@
 #include <rfb/Decoder.h>
 
 namespace rfb {
-
   class RawDecoder : public Decoder {
   public:
-    RawDecoder(CConnection* conn);
+    RawDecoder();
     virtual ~RawDecoder();
-    virtual void readRect(const Rect& r, ModifiablePixelBuffer* pb);
+    virtual void readRect(const Rect& r, rdr::InStream* is,
+                          const ConnParams& cp, rdr::OutStream* os);
+    virtual void decodeRect(const Rect& r, const void* buffer,
+                            size_t buflen, const ConnParams& cp,
+                            ModifiablePixelBuffer* pb);
   };
 }
 #endif
diff --git a/common/rfb/SConnection.cxx b/common/rfb/SConnection.cxx
index bfff70c..cc15d7c 100644
--- a/common/rfb/SConnection.cxx
+++ b/common/rfb/SConnection.cxx
@@ -67,11 +67,6 @@
 SConnection::~SConnection()
 {
   if (ssecurity) ssecurity->destroy();
-  deleteReaderAndWriter();
-}
-
-void SConnection::deleteReaderAndWriter()
-{
   delete reader_;
   reader_ = 0;
   delete writer_;
diff --git a/common/rfb/SConnection.h b/common/rfb/SConnection.h
index ef1de2b..b43cf08 100644
--- a/common/rfb/SConnection.h
+++ b/common/rfb/SConnection.h
@@ -140,13 +140,6 @@
     bool authenticated() { return (state_ == RFBSTATE_INITIALISATION ||
                                    state_ == RFBSTATE_NORMAL); }
 
-    // deleteReaderAndWriter() deletes the reader and writer associated with
-    // this connection.  This may be useful if you want to delete the streams
-    // before deleting the SConnection to make sure that no attempt by the
-    // SConnection is made to read or write.
-    // XXX Do we really need this at all???
-    void deleteReaderAndWriter();
-
     // throwConnFailedException() prints a message to the log, sends a conn
     // failed message to the client (if possible) and throws a
     // ConnFailedException.
diff --git a/common/rfb/SecurityClient.cxx b/common/rfb/SecurityClient.cxx
index fe34712..9bd780f 100644
--- a/common/rfb/SecurityClient.cxx
+++ b/common/rfb/SecurityClient.cxx
@@ -84,7 +84,7 @@
     return new CSecurityStack(secTypeX509None, "X509 with no password",
 			      new CSecurityTLS(false));
   case secTypeX509Vnc:
-    return new CSecurityStack(secTypeX509None, "X509 with VNCAuth",
+    return new CSecurityStack(secTypeX509Vnc, "X509 with VNCAuth",
 			      new CSecurityTLS(false), new CSecurityVncAuth());
   case secTypeX509Plain:
     return new CSecurityStack(secTypeX509Plain, "X509 with Username/Password",
diff --git a/common/rfb/TightDecoder.cxx b/common/rfb/TightDecoder.cxx
index 5f4142b..3a1254a 100644
--- a/common/rfb/TightDecoder.cxx
+++ b/common/rfb/TightDecoder.cxx
@@ -1,5 +1,6 @@
 /* Copyright (C) 2000-2003 Constantin Kaplinsky.  All Rights Reserved.
  * Copyright 2004-2005 Cendio AB.
+ * Copyright 2009-2015 Pierre Ossman for Cendio AB
  * Copyright (C) 2011 D. R. Commander.  All Rights Reserved.
  *    
  * This is free software; you can redistribute it and/or modify
@@ -17,14 +18,23 @@
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
  * USA.
  */
-#include <rfb/CMsgReader.h>
-#include <rfb/CConnection.h>
+
+#include <assert.h>
+
+#include <rdr/InStream.h>
+#include <rdr/MemInStream.h>
+#include <rdr/OutStream.h>
+
+#include <rfb/ConnParams.h>
+#include <rfb/Exception.h>
 #include <rfb/PixelBuffer.h>
+#include <rfb/TightConstants.h>
 #include <rfb/TightDecoder.h>
 
 using namespace rfb;
 
-#define TIGHT_MAX_WIDTH 2048
+static const int TIGHT_MAX_WIDTH = 2048;
+static const int TIGHT_MIN_TO_COMPRESS = 12;
 
 #define BPP 8
 #include <rfb/tightDecode.h>
@@ -36,7 +46,7 @@
 #include <rfb/tightDecode.h>
 #undef BPP
 
-TightDecoder::TightDecoder(CConnection* conn) : Decoder(conn)
+TightDecoder::TightDecoder() : Decoder(DecoderPartiallyOrdered)
 {
 }
 
@@ -44,29 +54,386 @@
 {
 }
 
-void TightDecoder::readRect(const Rect& r, ModifiablePixelBuffer* pb)
+void TightDecoder::readRect(const Rect& r, rdr::InStream* is,
+                            const ConnParams& cp, rdr::OutStream* os)
 {
-  is = conn->getInStream();
-  this->pb = pb;
-  clientpf = pb->getPF();
-  serverpf = conn->cp.pf();
+  rdr::U8 comp_ctl;
 
-  if (clientpf.equal(serverpf)) {
-    /* Decode directly into the framebuffer (fast path) */
+  comp_ctl = is->readU8();
+  os->writeU8(comp_ctl);
+
+  comp_ctl >>= 4;
+
+  // "Fill" compression type.
+  if (comp_ctl == tightFill) {
+    if (cp.pf().is888())
+      os->copyBytes(is, 3);
+    else
+      os->copyBytes(is, cp.pf().bpp/8);
+    return;
+  }
+
+  // "JPEG" compression type.
+  if (comp_ctl == tightJpeg) {
+    rdr::U32 len;
+
+    len = readCompact(is);
+    os->writeOpaque32(len);
+    os->copyBytes(is, len);
+    return;
+  }
+
+  // Quit on unsupported compression type.
+  if (comp_ctl > tightMaxSubencoding)
+    throw Exception("TightDecoder: bad subencoding value received");
+
+  // "Basic" compression type.
+
+  int palSize = 0;
+
+  if (r.width() > TIGHT_MAX_WIDTH)
+    throw Exception("TightDecoder: too large rectangle (%d pixels)", r.width());
+
+  // Possible palette
+  if ((comp_ctl & tightExplicitFilter) != 0) {
+    rdr::U8 filterId;
+
+    filterId = is->readU8();
+    os->writeU8(filterId);
+
+    switch (filterId) {
+    case tightFilterPalette:
+      palSize = is->readU8() + 1;
+      os->writeU8(palSize - 1);
+
+      if (cp.pf().is888())
+        os->copyBytes(is, palSize * 3);
+      else
+        os->copyBytes(is, palSize * cp.pf().bpp/8);
+      break;
+    case tightFilterGradient:
+      if (cp.pf().bpp == 8)
+        throw Exception("TightDecoder: invalid BPP for gradient filter");
+      break;
+    case tightFilterCopy:
+      break;
+    default:
+      throw Exception("TightDecoder: unknown filter code received");
+    }
+  }
+
+  size_t rowSize, dataSize;
+
+  if (palSize != 0) {
+    if (palSize <= 2)
+      rowSize = (r.width() + 7) / 8;
+    else
+      rowSize = r.width();
+  } else if (cp.pf().is888()) {
+    rowSize = r.width() * 3;
+  } else {
+    rowSize = r.width() * cp.pf().bpp/8;
+  }
+
+  dataSize = r.height() * rowSize;
+
+  if (dataSize < TIGHT_MIN_TO_COMPRESS)
+    os->copyBytes(is, dataSize);
+  else {
+    rdr::U32 len;
+
+    len = readCompact(is);
+    os->writeOpaque32(len);
+    os->copyBytes(is, len);
+  }
+}
+
+bool TightDecoder::doRectsConflict(const Rect& rectA,
+                                   const void* bufferA,
+                                   size_t buflenA,
+                                   const Rect& rectB,
+                                   const void* bufferB,
+                                   size_t buflenB,
+                                   const ConnParams& cp)
+{
+  rdr::U8 comp_ctl_a, comp_ctl_b;
+
+  assert(buflenA >= 1);
+  assert(buflenB >= 1);
+
+  comp_ctl_a = *(const rdr::U8*)bufferA;
+  comp_ctl_b = *(const rdr::U8*)bufferB;
+
+  // Resets or use of zlib pose the same problem, so merge them
+  if ((comp_ctl_a & 0x80) == 0x00)
+    comp_ctl_a |= 1 << ((comp_ctl_a >> 4) & 0x03);
+  if ((comp_ctl_b & 0x80) == 0x00)
+    comp_ctl_b |= 1 << ((comp_ctl_b >> 4) & 0x03);
+
+  if (((comp_ctl_a & 0x0f) & (comp_ctl_b & 0x0f)) != 0)
+    return true;
+
+  return false;
+}
+
+void TightDecoder::decodeRect(const Rect& r, const void* buffer,
+                              size_t buflen, const ConnParams& cp,
+                              ModifiablePixelBuffer* pb)
+{
+  const rdr::U8* bufptr;
+  const PixelFormat& pf = cp.pf();
+
+  rdr::U8 comp_ctl;
+
+  bufptr = (const rdr::U8*)buffer;
+
+  assert(buflen >= 1);
+
+  comp_ctl = *bufptr;
+  bufptr += 1;
+  buflen -= 1;
+
+  // Reset zlib streams if we are told by the server to do so.
+  for (int i = 0; i < 4; i++) {
+    if (comp_ctl & 1) {
+      zis[i].reset();
+    }
+    comp_ctl >>= 1;
+  }
+
+  // "Fill" compression type.
+  if (comp_ctl == tightFill) {
+    if (pf.is888()) {
+      rdr::U8 pix[4];
+
+      assert(buflen >= 3);
+
+      pf.bufferFromRGB(pix, bufptr, 1);
+      pb->fillRect(pf, r, pix);
+    } else {
+      assert(buflen >= (size_t)pf.bpp/8);
+      pb->fillRect(pf, r, bufptr);
+    }
+    return;
+  }
+
+  // "JPEG" compression type.
+  if (comp_ctl == tightJpeg) {
+    rdr::U32 len;
+
+    int stride;
+    rdr::U8 *buf;
+
+    JpegDecompressor jd;
+
+    assert(buflen >= 4);
+
+    memcpy(&len, bufptr, 4);
+    bufptr += 4;
+    buflen -= 4;
+
+    // We always use direct decoding with JPEG images
+    buf = pb->getBufferRW(r, &stride);
+    jd.decompress(bufptr, len, buf, stride, r, pb->getPF());
+    pb->commitBufferRW(r);
+    return;
+  }
+
+  // Quit on unsupported compression type.
+  assert(comp_ctl <= tightMaxSubencoding);
+
+  // "Basic" compression type.
+
+  int palSize = 0;
+  rdr::U8 palette[256 * 4];
+  bool useGradient = false;
+
+  if ((comp_ctl & tightExplicitFilter) != 0) {
+    rdr::U8 filterId;
+
+    assert(buflen >= 1);
+
+    filterId = *bufptr;
+    bufptr += 1;
+    buflen -= 1;
+
+    switch (filterId) {
+    case tightFilterPalette:
+      assert(buflen >= 1);
+
+      palSize = *bufptr + 1;
+      bufptr += 1;
+      buflen -= 1;
+
+      if (pf.is888()) {
+        rdr::U8 tightPalette[palSize * 3];
+
+        assert(buflen >= sizeof(tightPalette));
+
+        memcpy(tightPalette, bufptr, sizeof(tightPalette));
+        bufptr += sizeof(tightPalette);
+        buflen -= sizeof(tightPalette);
+
+        pf.bufferFromRGB(palette, tightPalette, palSize);
+      } else {
+        size_t len;
+
+        len = palSize * pf.bpp/8;
+
+        assert(buflen >= len);
+
+        memcpy(palette, bufptr, len);
+        bufptr += len;
+        buflen -= len;
+      }
+      break;
+    case tightFilterGradient:
+      useGradient = true;
+      break;
+    case tightFilterCopy:
+      break;
+    default:
+      assert(false);
+    }
+  }
+
+  // Determine if the data should be decompressed or just copied.
+  size_t rowSize, dataSize;
+  rdr::U8* netbuf;
+
+  netbuf = NULL;
+
+  if (palSize != 0) {
+    if (palSize <= 2)
+      rowSize = (r.width() + 7) / 8;
+    else
+      rowSize = r.width();
+  } else if (pf.is888()) {
+    rowSize = r.width() * 3;
+  } else {
+    rowSize = r.width() * pf.bpp/8;
+  }
+
+  dataSize = r.height() * rowSize;
+
+  if (dataSize < TIGHT_MIN_TO_COMPRESS)
+    assert(buflen >= dataSize);
+  else {
+    rdr::U32 len;
+    int streamId;
+    rdr::MemInStream* ms;
+
+    assert(buflen >= 4);
+
+    memcpy(&len, bufptr, 4);
+    bufptr += 4;
+    buflen -= 4;
+
+    assert(buflen >= len);
+
+    streamId = comp_ctl & 0x03;
+    ms = new rdr::MemInStream(bufptr, len);
+    zis[streamId].setUnderlying(ms, len);
+
+    // Allocate buffer and decompress the data
+    netbuf = new rdr::U8[dataSize];
+
+    zis[streamId].readBytes(netbuf, dataSize);
+
+    zis[streamId].removeUnderlying();
+    delete ms;
+
+    bufptr = netbuf;
+    buflen = dataSize;
+  }
+
+  // Time to decode the actual data
+  bool directDecode;
+
+  rdr::U8* outbuf;
+  int stride;
+
+  if (pb->getPF().equal(pf)) {
+    // Decode directly into the framebuffer (fast path)
     directDecode = true;
   } else {
-    /* Decode into an intermediate buffer and use pixel translation */
+    // Decode into an intermediate buffer and use pixel translation
     directDecode = false;
   }
 
-  switch (serverpf.bpp) {
-  case 8:
-    tightDecode8 (r); break;
-  case 16:
-    tightDecode16(r); break;
-  case 32:
-    tightDecode32(r); break;
+  if (directDecode)
+    outbuf = pb->getBufferRW(r, &stride);
+  else {
+    outbuf = new rdr::U8[r.area() * pf.bpp/8];
+    stride = r.width();
   }
+
+  if (palSize == 0) {
+    // Truecolor data
+    if (useGradient) {
+      if (pf.is888())
+        FilterGradient24(bufptr, pf, (rdr::U32*)outbuf, stride, r);
+      else {
+        switch (pf.bpp) {
+        case 8:
+          assert(false);
+          break;
+        case 16:
+          FilterGradient(bufptr, pf, (rdr::U16*)outbuf, stride, r);
+          break;
+        case 32:
+          FilterGradient(bufptr, pf, (rdr::U32*)outbuf, stride, r);
+          break;
+        }
+      }
+    } else {
+      // Copy
+      rdr::U8* ptr = outbuf;
+      const rdr::U8* srcPtr = bufptr;
+      int w = r.width();
+      int h = r.height();
+      if (pf.is888()) {
+        while (h > 0) {
+          pf.bufferFromRGB(ptr, srcPtr, w);
+          ptr += stride * pf.bpp/8;
+          srcPtr += w * 3;
+          h--;
+        }
+      } else {
+        while (h > 0) {
+          memcpy(ptr, srcPtr, w * pf.bpp/8);
+          ptr += stride * pf.bpp/8;
+          srcPtr += w * pf.bpp/8;
+          h--;
+        }
+      }
+    }
+  } else {
+    // Indexed color
+    switch (pf.bpp) {
+    case 8:
+      FilterPalette((const rdr::U8*)palette, palSize,
+                    bufptr, (rdr::U8*)outbuf, stride, r);
+      break;
+    case 16:
+      FilterPalette((const rdr::U16*)palette, palSize,
+                    bufptr, (rdr::U16*)outbuf, stride, r);
+      break;
+    case 32:
+      FilterPalette((const rdr::U32*)palette, palSize,
+                    bufptr, (rdr::U32*)outbuf, stride, r);
+      break;
+    }
+  }
+
+  if (directDecode)
+    pb->commitBufferRW(r);
+  else {
+    pb->imageRect(pf, r, outbuf);
+    delete [] outbuf;
+  }
+
+  delete [] netbuf;
 }
 
 rdr::U32 TightDecoder::readCompact(rdr::InStream* is)
diff --git a/common/rfb/TightDecoder.h b/common/rfb/TightDecoder.h
index a44f7d8..6eb93d2 100644
--- a/common/rfb/TightDecoder.h
+++ b/common/rfb/TightDecoder.h
@@ -1,5 +1,6 @@
 /* Copyright (C) 2000-2003 Constantin Kaplinsky.  All Rights Reserved.
  * Copyright (C) 2011 D. R. Commander.  All Rights Reserved.
+ * Copyright 2009-2015 Pierre Ossman for Cendio AB
  *    
  * This is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -28,41 +29,44 @@
   class TightDecoder : public Decoder {
 
   public:
-    TightDecoder(CConnection* conn);
+    TightDecoder();
     virtual ~TightDecoder();
-    virtual void readRect(const Rect& r, ModifiablePixelBuffer* pb);
+    virtual void readRect(const Rect& r, rdr::InStream* is,
+                          const ConnParams& cp, rdr::OutStream* os);
+    virtual bool doRectsConflict(const Rect& rectA,
+                                 const void* bufferA,
+                                 size_t buflenA,
+                                 const Rect& rectB,
+                                 const void* bufferB,
+                                 size_t buflenB,
+                                 const ConnParams& cp);
+    virtual void decodeRect(const Rect& r, const void* buffer,
+                            size_t buflen, const ConnParams& cp,
+                            ModifiablePixelBuffer* pb);
 
   private:
     rdr::U32 readCompact(rdr::InStream* is);
 
-    void tightDecode8(const Rect& r);
-    void tightDecode16(const Rect& r);
-    void tightDecode32(const Rect& r);
+    void FilterGradient24(const rdr::U8* inbuf, const PixelFormat& pf,
+                          rdr::U32* outbuf, int stride, const Rect& r);
 
-    void DecompressJpegRect8(const Rect& r);
-    void DecompressJpegRect16(const Rect& r);
-    void DecompressJpegRect32(const Rect& r);
+    void FilterGradient(const rdr::U8* inbuf, const PixelFormat& pf,
+                        rdr::U16* outbuf, int stride, const Rect& r);
+    void FilterGradient(const rdr::U8* inbuf, const PixelFormat& pf,
+                        rdr::U32* outbuf, int stride, const Rect& r);
 
-    void FilterGradient8(rdr::U8 *netbuf, rdr::U8* buf, int stride, 
-                         const Rect& r);
-    void FilterGradient16(rdr::U8 *netbuf, rdr::U16* buf, int stride, 
-                          const Rect& r);
-    void FilterGradient24(rdr::U8 *netbuf, rdr::U32* buf, int stride, 
-                          const Rect& r);
-    void FilterGradient32(rdr::U8 *netbuf, rdr::U32* buf, int stride, 
-                          const Rect& r);
+    void FilterPalette(const rdr::U8* palette, int palSize,
+                       const rdr::U8* inbuf, rdr::U8* outbuf,
+                       int stride, const Rect& r);
+    void FilterPalette(const rdr::U16* palette, int palSize,
+                       const rdr::U8* inbuf, rdr::U16* outbuf,
+                       int stride, const Rect& r);
+    void FilterPalette(const rdr::U32* palette, int palSize,
+                       const rdr::U8* inbuf, rdr::U32* outbuf,
+                       int stride, const Rect& r);
 
-    void directFillRect8(const Rect& r, Pixel pix);
-    void directFillRect16(const Rect& r, Pixel pix);
-    void directFillRect32(const Rect& r, Pixel pix);
-
-    ModifiablePixelBuffer* pb;
-    rdr::InStream* is;
+  private:
     rdr::ZlibInStream zis[4];
-    JpegDecompressor jd;
-    PixelFormat clientpf;
-    PixelFormat serverpf;
-    bool directDecode;
   };
 }
 
diff --git a/common/rfb/Timer.cxx b/common/rfb/Timer.cxx
index 5f9c6f2..676f24e 100644
--- a/common/rfb/Timer.cxx
+++ b/common/rfb/Timer.cxx
@@ -19,11 +19,8 @@
 // -=- Timer.cxx
 
 #include <stdio.h>
-#ifdef WIN32
-#ifndef _WIN32_WCE
-#include <sys/timeb.h>
-#endif
-#endif
+#include <sys/time.h>
+
 #include <rfb/Timer.h>
 #include <rfb/util.h>
 #include <rfb/LogWriter.h>
@@ -40,39 +37,6 @@
 #endif
 
 
-// Win32 does not provide gettimeofday, so we emulate it to simplify the
-// Timer code.
-
-#ifdef _WIN32
-static void gettimeofday(struct timeval* tv, void*)
-{
-  LARGE_INTEGER counts, countsPerSec;
-  static double usecPerCount = 0.0;
-
-  if (QueryPerformanceCounter(&counts)) {
-    if (usecPerCount == 0.0) {
-      QueryPerformanceFrequency(&countsPerSec);
-      usecPerCount = 1000000.0 / countsPerSec.QuadPart;
-    }
-
-    LONGLONG usecs = (LONGLONG)(counts.QuadPart * usecPerCount);
-    tv->tv_usec = (long)(usecs % 1000000);
-    tv->tv_sec = (long)(usecs / 1000000);
-
-  } else {
-#ifndef _WIN32_WCE
-    struct timeb tb;
-    ftime(&tb);
-    tv->tv_sec = tb.time;
-    tv->tv_usec = tb.millitm * 1000;
-#else
-    throw SystemException("QueryPerformanceCounter", GetLastError());
-#endif
-  }
-}
-#endif
-
-
 // Millisecond timeout processing helper functions
 
 inline static timeval addMillis(timeval inTime, int millis) {
diff --git a/common/rfb/ZRLEDecoder.cxx b/common/rfb/ZRLEDecoder.cxx
index 60e5dd1..c13f286 100644
--- a/common/rfb/ZRLEDecoder.cxx
+++ b/common/rfb/ZRLEDecoder.cxx
@@ -15,8 +15,12 @@
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
  * USA.
  */
-#include <rfb/CMsgReader.h>
-#include <rfb/CConnection.h>
+
+#include <rdr/InStream.h>
+#include <rdr/MemInStream.h>
+#include <rdr/OutStream.h>
+
+#include <rfb/ConnParams.h>
 #include <rfb/PixelBuffer.h>
 #include <rfb/ZRLEDecoder.h>
 
@@ -58,7 +62,7 @@
 #undef CPIXEL
 #undef BPP
 
-ZRLEDecoder::ZRLEDecoder(CConnection* conn) : Decoder(conn)
+ZRLEDecoder::ZRLEDecoder() : Decoder(DecoderOrdered)
 {
 }
 
@@ -66,14 +70,26 @@
 {
 }
 
-void ZRLEDecoder::readRect(const Rect& r, ModifiablePixelBuffer* pb)
+void ZRLEDecoder::readRect(const Rect& r, rdr::InStream* is,
+                           const ConnParams& cp, rdr::OutStream* os)
 {
-  rdr::InStream* is = conn->getInStream();
-  rdr::U8* buf = conn->reader()->getImageBuf(64 * 64 * 4);
-  const rfb::PixelFormat& pf = conn->cp.pf();
+  rdr::U32 len;
+
+  len = is->readU32();
+  os->writeU32(len);
+  os->copyBytes(is, len);
+}
+
+void ZRLEDecoder::decodeRect(const Rect& r, const void* buffer,
+                             size_t buflen, const ConnParams& cp,
+                             ModifiablePixelBuffer* pb)
+{
+  rdr::MemInStream is(buffer, buflen);
+  const rfb::PixelFormat& pf = cp.pf();
+  rdr::U8* buf[64 * 64 * 4 * pf.bpp/8];
   switch (pf.bpp) {
-  case 8:  zrleDecode8 (r, is, &zis, (rdr::U8*) buf, pf, pb); break;
-  case 16: zrleDecode16(r, is, &zis, (rdr::U16*)buf, pf, pb); break;
+  case 8:  zrleDecode8 (r, &is, &zis, (rdr::U8*) buf, pf, pb); break;
+  case 16: zrleDecode16(r, &is, &zis, (rdr::U16*)buf, pf, pb); break;
   case 32:
     {
       Pixel maxPixel = pf.pixelFromRGB((rdr::U16)-1, (rdr::U16)-1, (rdr::U16)-1);
@@ -83,16 +99,16 @@
       if ((fitsInLS3Bytes && pf.isLittleEndian()) ||
           (fitsInMS3Bytes && pf.isBigEndian()))
       {
-        zrleDecode24A(r, is, &zis, (rdr::U32*)buf, pf, pb);
+        zrleDecode24A(r, &is, &zis, (rdr::U32*)buf, pf, pb);
       }
       else if ((fitsInLS3Bytes && pf.isBigEndian()) ||
                (fitsInMS3Bytes && pf.isLittleEndian()))
       {
-        zrleDecode24B(r, is, &zis, (rdr::U32*)buf, pf, pb);
+        zrleDecode24B(r, &is, &zis, (rdr::U32*)buf, pf, pb);
       }
       else
       {
-        zrleDecode32(r, is, &zis, (rdr::U32*)buf, pf, pb);
+        zrleDecode32(r, &is, &zis, (rdr::U32*)buf, pf, pb);
       }
       break;
     }
diff --git a/common/rfb/ZRLEDecoder.h b/common/rfb/ZRLEDecoder.h
index 492597e..1e33851 100644
--- a/common/rfb/ZRLEDecoder.h
+++ b/common/rfb/ZRLEDecoder.h
@@ -25,9 +25,13 @@
 
   class ZRLEDecoder : public Decoder {
   public:
-    ZRLEDecoder(CConnection* conn);
+    ZRLEDecoder();
     virtual ~ZRLEDecoder();
-    virtual void readRect(const Rect& r, ModifiablePixelBuffer* pb);
+    virtual void readRect(const Rect& r, rdr::InStream* is,
+                          const ConnParams& cp, rdr::OutStream* os);
+    virtual void decodeRect(const Rect& r, const void* buffer,
+                            size_t buflen, const ConnParams& cp,
+                            ModifiablePixelBuffer* pb);
   private:
     rdr::ZlibInStream zis;
   };
diff --git a/common/rfb/hextileDecode.h b/common/rfb/hextileDecode.h
index 56ba118..7affa15 100644
--- a/common/rfb/hextileDecode.h
+++ b/common/rfb/hextileDecode.h
@@ -37,12 +37,14 @@
 #define READ_PIXEL CONCAT2E(readOpaque,BPP)
 #define HEXTILE_DECODE CONCAT2E(hextileDecode,BPP)
 
-void HEXTILE_DECODE (const Rect& r, rdr::InStream* is, PIXEL_T* buf,
-                     const PixelFormat& pf, ModifiablePixelBuffer* pb)
+static void HEXTILE_DECODE (const Rect& r, rdr::InStream* is,
+                            const PixelFormat& pf,
+                            ModifiablePixelBuffer* pb)
 {
   Rect t;
   PIXEL_T bg = 0;
   PIXEL_T fg = 0;
+  PIXEL_T buf[16 * 16 * 4];
 
   for (t.tl.y = r.tl.y; t.tl.y < r.br.y; t.tl.y += 16) {
 
@@ -55,20 +57,20 @@
       int tileType = is->readU8();
 
       if (tileType & hextileRaw) {
-	is->readBytes(buf, t.area() * (BPP/8));
-	pb->imageRect(pf, t, buf);
-	continue;
+        is->readBytes(buf, t.area() * (BPP/8));
+        pb->imageRect(pf, t, buf);
+        continue;
       }
 
       if (tileType & hextileBgSpecified)
-	bg = is->READ_PIXEL();
+        bg = is->READ_PIXEL();
 
       int len = t.area();
-      PIXEL_T* ptr = (PIXEL_T*)buf;
+      PIXEL_T* ptr = buf;
       while (len-- > 0) *ptr++ = bg;
 
       if (tileType & hextileFgSpecified)
-	fg = is->READ_PIXEL();
+        fg = is->READ_PIXEL();
 
       if (tileType & hextileAnySubrects) {
         int nSubrects = is->readU8();
@@ -85,7 +87,7 @@
           int y = (xy & 15);
           int w = ((wh >> 4) & 15) + 1;
           int h = (wh & 15) + 1;
-          PIXEL_T* ptr = (PIXEL_T*)buf + y * t.width() + x;
+          PIXEL_T* ptr = buf + y * t.width() + x;
           int rowAdd = t.width() - w;
           while (h-- > 0) {
             int len = w;
diff --git a/common/rfb/tightDecode.h b/common/rfb/tightDecode.h
index 7a1a9a2..b6e86ed 100644
--- a/common/rfb/tightDecode.h
+++ b/common/rfb/tightDecode.h
@@ -1,5 +1,6 @@
 /* Copyright (C) 2000-2003 Constantin Kaplinsky.  All Rights Reserved.
  * Copyright 2004-2005 Cendio AB.
+ * Copyright 2009-2015 Pierre Ossman for Cendio AB
  * Copyright (C) 2011 D. R. Commander.  All Rights Reserved.
  *    
  * This is free software; you can redistribute it and/or modify
@@ -24,11 +25,6 @@
 // This file is #included after having set the following macro:
 // BPP                - 8, 16 or 32
 
-#include <rdr/InStream.h>
-#include <rdr/ZlibInStream.h>
-#include <rfb/Exception.h>
-#include <rfb/TightConstants.h>
-
 namespace rfb {
 
 // CONCAT2E concatenates its arguments, expanding them if they are macros
@@ -39,232 +35,60 @@
 #endif
 
 #define PIXEL_T rdr::CONCAT2E(U,BPP)
-#define READ_PIXEL CONCAT2E(readOpaque,BPP)
-#define TIGHT_DECODE TightDecoder::CONCAT2E(tightDecode,BPP)
-#define DECOMPRESS_JPEG_RECT TightDecoder::CONCAT2E(DecompressJpegRect,BPP)
-#define FILTER_GRADIENT TightDecoder::CONCAT2E(FilterGradient,BPP)
-
-#define TIGHT_MIN_TO_COMPRESS 12
-
-// Main function implementing Tight decoder
-
-void TIGHT_DECODE (const Rect& r)
-{
-  bool cutZeros = false;
-#if BPP == 32
-  if (serverpf.is888()) {
-    cutZeros = true;
-  } 
-#endif
-
-  rdr::U8 comp_ctl = is->readU8();
-
-  // Flush zlib streams if we are told by the server to do so.
-  for (int i = 0; i < 4; i++) {
-    if (comp_ctl & 1) {
-      zis[i].reset();
-    }
-    comp_ctl >>= 1;
-  }
-
-  // "Fill" compression type.
-  if (comp_ctl == tightFill) {
-    PIXEL_T pix;
-    if (cutZeros) {
-      rdr::U8 bytebuf[3];
-      is->readBytes(bytebuf, 3);
-      serverpf.bufferFromRGB((rdr::U8*)&pix, bytebuf, 1);
-    } else {
-      pix = is->READ_PIXEL();
-    }
-    pb->fillRect(serverpf, r, &pix);
-    return;
-  }
-
-  // "JPEG" compression type.
-  if (comp_ctl == tightJpeg) {
-    DECOMPRESS_JPEG_RECT(r);
-    return;
-  }
-
-  // Quit on unsupported compression type.
-  if (comp_ctl > tightMaxSubencoding) {
-    throw Exception("TightDecoder: bad subencoding value received");
-    return;
-  }
-
-  // "Basic" compression type.
-  int palSize = 0;
-  static PIXEL_T palette[256];
-  bool useGradient = false;
-
-  if ((comp_ctl & tightExplicitFilter) != 0) {
-    rdr::U8 filterId = is->readU8();
-
-    switch (filterId) {
-    case tightFilterPalette:
-      palSize = is->readU8() + 1;
-      if (cutZeros) {
-        rdr::U8 tightPalette[256 * 3];
-        is->readBytes(tightPalette, palSize * 3);
-        serverpf.bufferFromRGB((rdr::U8*)palette, tightPalette, palSize);
-      } else {
-        is->readBytes(palette, palSize * sizeof(PIXEL_T));
-      }
-      break;
-    case tightFilterGradient:
-      useGradient = true;
-      break;
-    case tightFilterCopy:
-      break;
-    default:
-      throw Exception("TightDecoder: unknown filter code received");
-      return;
-    }
-  }
-
-  int bppp = BPP;
-  if (palSize != 0) {
-    bppp = (palSize <= 2) ? 1 : 8;
-  } else if (cutZeros) {
-    bppp = 24;
-  }
-
-  // Determine if the data should be decompressed or just copied.
-  int rowSize = (r.width() * bppp + 7) / 8;
-  int dataSize = r.height() * rowSize;
-  int streamId = -1;
-  rdr::InStream *input;
-  if (dataSize < TIGHT_MIN_TO_COMPRESS) {
-    input = is;
-  } else {
-    int length = readCompact(is);
-    streamId = comp_ctl & 0x03;
-    zis[streamId].setUnderlying(is, length);
-    input = &zis[streamId];
-  }
-
-  // Allocate netbuf and read in data
-  rdr::U8 *netbuf = new rdr::U8[dataSize];
-  if (!netbuf) {
-    throw Exception("rfb::TightDecoder::tightDecode unable to allocate buffer");
-  }
-  input->readBytes(netbuf, dataSize);
-
-  PIXEL_T *buf;
-  int stride = r.width();
-  if (directDecode) buf = (PIXEL_T *)pb->getBufferRW(r, &stride);
-  else buf = (PIXEL_T *)conn->reader()->getImageBuf(r.area());
-
-  if (palSize == 0) {
-    // Truecolor data
-    if (useGradient) {
-#if BPP == 32
-      if (cutZeros) {
-        FilterGradient24(netbuf, buf, stride, r);
-      } else 
-#endif
-      {
-        FILTER_GRADIENT(netbuf, buf, stride, r);
-      }
-    } else {
-      // Copy
-      int h = r.height();
-      PIXEL_T *ptr = buf;
-      rdr::U8 *srcPtr = netbuf;
-      int w = r.width();
-      if (cutZeros) {
-        while (h > 0) {
-          serverpf.bufferFromRGB((rdr::U8*)ptr, srcPtr, w);
-          ptr += stride;
-          srcPtr += w * 3;
-          h--;
-        }
-      } else {
-        while (h > 0) {
-          memcpy(ptr, srcPtr, w * sizeof(PIXEL_T));
-          ptr += stride;
-          srcPtr += w * sizeof(PIXEL_T);
-          h--;
-        }
-      }
-    }
-  } else {
-    // Indexed color
-    int x, h = r.height(), w = r.width(), b, pad = stride - w;
-    PIXEL_T *ptr = buf;
-    rdr::U8 bits, *srcPtr = netbuf;
-    if (palSize <= 2) {
-      // 2-color palette
-      while (h > 0) {
-        for (x = 0; x < w / 8; x++) {
-          bits = *srcPtr++;
-          for (b = 7; b >= 0; b--) {
-            *ptr++ = palette[bits >> b & 1];
-          }
-        }
-        if (w % 8 != 0) {
-          bits = *srcPtr++;
-          for (b = 7; b >= 8 - w % 8; b--) {
-            *ptr++ = palette[bits >> b & 1];
-          }
-        }
-        ptr += pad;
-        h--;
-      }
-    } else {
-      // 256-color palette
-      while (h > 0) {
-        PIXEL_T *endOfRow = ptr + w;
-        while (ptr < endOfRow) {
-          *ptr++ = palette[*srcPtr++];
-        }
-        ptr += pad;
-        h--;
-      }
-    }
-  }
-
-  if (directDecode) pb->commitBufferRW(r);
-  else pb->imageRect(serverpf, r, buf);
-
-  delete [] netbuf;
-
-  if (streamId != -1) {
-    zis[streamId].reset();
-  }
-}
-
-void
-DECOMPRESS_JPEG_RECT(const Rect& r)
-{
-  // Read length
-  int compressedLen = readCompact(is);
-  if (compressedLen <= 0) {
-      throw Exception("Incorrect data received from the server.\n");
-  }
-
-  // Allocate netbuf and read in data
-  rdr::U8* netbuf = new rdr::U8[compressedLen];
-  if (!netbuf) {
-    throw Exception("rfb::TightDecoder::DecompressJpegRect unable to allocate buffer");
-  }
-  is->readBytes(netbuf, compressedLen);
-
-  // We always use direct decoding with JPEG images
-  int stride;
-  rdr::U8 *buf = pb->getBufferRW(r, &stride);
-  jd.decompress(netbuf, compressedLen, buf, stride, r, clientpf);
-  pb->commitBufferRW(r);
-
-  delete [] netbuf;
-}
 
 #if BPP == 32
 
 void
-TightDecoder::FilterGradient24(rdr::U8 *netbuf, PIXEL_T* buf, int stride,
-                               const Rect& r)
+TightDecoder::FilterGradient24(const rdr::U8 *inbuf,
+                               const PixelFormat& pf, PIXEL_T* outbuf,
+                               int stride, const Rect& r)
+{
+  int x, y, c;
+  rdr::U8 prevRow[TIGHT_MAX_WIDTH*3];
+  rdr::U8 thisRow[TIGHT_MAX_WIDTH*3];
+  rdr::U8 pix[3]; 
+  int est[3]; 
+
+  memset(prevRow, 0, sizeof(prevRow));
+
+  // Set up shortcut variables
+  int rectHeight = r.height();
+  int rectWidth = r.width();
+
+  for (y = 0; y < rectHeight; y++) {
+    /* First pixel in a row */
+    for (c = 0; c < 3; c++) {
+      pix[c] = inbuf[y*rectWidth*3+c] + prevRow[c];
+      thisRow[c] = pix[c];
+    }
+    pf.bufferFromRGB((rdr::U8*)&outbuf[y*stride], pix, 1);
+
+    /* Remaining pixels of a row */
+    for (x = 1; x < rectWidth; x++) {
+      for (c = 0; c < 3; c++) {
+        est[c] = prevRow[x*3+c] + pix[c] - prevRow[(x-1)*3+c];
+        if (est[c] > 0xff) {
+          est[c] = 0xff;
+        } else if (est[c] < 0) {
+          est[c] = 0;
+        }
+        pix[c] = inbuf[(y*rectWidth+x)*3+c] + est[c];
+        thisRow[x*3+c] = pix[c];
+      }
+      pf.bufferFromRGB((rdr::U8*)&outbuf[y*stride+x], pix, 1);
+    }
+
+    memcpy(prevRow, thisRow, sizeof(prevRow));
+  }
+}
+
+#endif
+
+#if BPP != 8
+
+void TightDecoder::FilterGradient(const rdr::U8* inbuf,
+                                  const PixelFormat& pf, PIXEL_T* outbuf,
+                                  int stride, const Rect& r)
 {
   int x, y, c;
   static rdr::U8 prevRow[TIGHT_MAX_WIDTH*3];
@@ -280,57 +104,13 @@
 
   for (y = 0; y < rectHeight; y++) {
     /* First pixel in a row */
-    for (c = 0; c < 3; c++) {
-      pix[c] = netbuf[y*rectWidth*3+c] + prevRow[c];
-      thisRow[c] = pix[c];
-    }
-    serverpf.bufferFromRGB((rdr::U8*)&buf[y*stride], pix, 1);
-
-    /* Remaining pixels of a row */
-    for (x = 1; x < rectWidth; x++) {
-      for (c = 0; c < 3; c++) {
-        est[c] = prevRow[x*3+c] + pix[c] - prevRow[(x-1)*3+c];
-        if (est[c] > 0xff) {
-          est[c] = 0xff;
-        } else if (est[c] < 0) {
-          est[c] = 0;
-        }
-        pix[c] = netbuf[(y*rectWidth+x)*3+c] + est[c];
-        thisRow[x*3+c] = pix[c];
-      }
-      serverpf.bufferFromRGB((rdr::U8*)&buf[y*stride+x], pix, 1);
-    }
-
-    memcpy(prevRow, thisRow, sizeof(prevRow));
-  }
-}
-
-#endif
-
-void
-FILTER_GRADIENT(rdr::U8 *netbuf, PIXEL_T* buf, int stride, const Rect& r)
-{
-  int x, y, c;
-  static rdr::U8 prevRow[TIGHT_MAX_WIDTH*sizeof(PIXEL_T)];
-  static rdr::U8 thisRow[TIGHT_MAX_WIDTH*sizeof(PIXEL_T)];
-  rdr::U8 pix[3]; 
-  int est[3]; 
-
-  memset(prevRow, 0, sizeof(prevRow));
-
-  // Set up shortcut variables
-  int rectHeight = r.height();
-  int rectWidth = r.width();
-
-  for (y = 0; y < rectHeight; y++) {
-    /* First pixel in a row */
-    serverpf.rgbFromBuffer(pix, (rdr::U8*)&netbuf[y*rectWidth], 1);
+    pf.rgbFromBuffer(pix, &inbuf[y*rectWidth], 1);
     for (c = 0; c < 3; c++)
       pix[c] += prevRow[c];
 
     memcpy(thisRow, pix, sizeof(pix));
 
-    serverpf.bufferFromRGB((rdr::U8*)&buf[y*stride], pix, 1);
+    pf.bufferFromRGB((rdr::U8*)&outbuf[y*stride], pix, 1);
 
     /* Remaining pixels of a row */
     for (x = 1; x < rectWidth; x++) {
@@ -343,23 +123,60 @@
         }
       }
 
-      serverpf.rgbFromBuffer(pix, (rdr::U8*)&netbuf[y*rectWidth+x], 1);
+      pf.rgbFromBuffer(pix, &inbuf[y*rectWidth+x], 1);
       for (c = 0; c < 3; c++)
         pix[c] += est[c];
 
       memcpy(&thisRow[x*3], pix, sizeof(pix));
 
-      serverpf.bufferFromRGB((rdr::U8*)&buf[y*stride+x], pix, 1);
+      pf.bufferFromRGB((rdr::U8*)&outbuf[y*stride+x], pix, 1);
     }
 
     memcpy(prevRow, thisRow, sizeof(prevRow));
   }
 }
 
-#undef TIGHT_MIN_TO_COMPRESS
-#undef FILTER_GRADIENT
-#undef DECOMPRESS_JPEG_RECT
-#undef TIGHT_DECODE
-#undef READ_PIXEL
+#endif
+
+void TightDecoder::FilterPalette(const PIXEL_T* palette, int palSize,
+                                 const rdr::U8* inbuf, PIXEL_T* outbuf,
+                                 int stride, const Rect& r)
+{
+  // Indexed color
+  int x, h = r.height(), w = r.width(), b, pad = stride - w;
+  PIXEL_T* ptr = outbuf;
+  rdr::U8 bits;
+  const rdr::U8* srcPtr = inbuf;
+  if (palSize <= 2) {
+    // 2-color palette
+    while (h > 0) {
+      for (x = 0; x < w / 8; x++) {
+        bits = *srcPtr++;
+        for (b = 7; b >= 0; b--) {
+          *ptr++ = palette[bits >> b & 1];
+        }
+      }
+      if (w % 8 != 0) {
+        bits = *srcPtr++;
+        for (b = 7; b >= 8 - w % 8; b--) {
+          *ptr++ = palette[bits >> b & 1];
+        }
+      }
+      ptr += pad;
+      h--;
+    }
+  } else {
+    // 256-color palette
+    while (h > 0) {
+      PIXEL_T *endOfRow = ptr + w;
+      while (ptr < endOfRow) {
+        *ptr++ = palette[*srcPtr++];
+      }
+      ptr += pad;
+      h--;
+    }
+  }
+}
+
 #undef PIXEL_T
 }
diff --git a/common/rfb/zrleDecode.h b/common/rfb/zrleDecode.h
index 2566171..07d6795 100644
--- a/common/rfb/zrleDecode.h
+++ b/common/rfb/zrleDecode.h
@@ -177,7 +177,7 @@
     }
   }
 
-  zis->reset();
+  zis->removeUnderlying();
 }
 
 #undef ZRLE_DECODE
diff --git a/tests/decperf.cxx b/tests/decperf.cxx
index fe7b032..1fd763e 100644
--- a/tests/decperf.cxx
+++ b/tests/decperf.cxx
@@ -27,13 +27,13 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
+#include <sys/time.h>
 
 #include <rdr/Exception.h>
 #include <rdr/FileInStream.h>
 
 #include <rfb/CConnection.h>
 #include <rfb/CMsgReader.h>
-#include <rfb/Decoder.h>
 #include <rfb/PixelBuffer.h>
 #include <rfb/PixelFormat.h>
 
@@ -50,7 +50,8 @@
   virtual void setDesktopSize(int w, int h);
   virtual void setPixelFormat(const rfb::PixelFormat& pf);
   virtual void setCursor(int, int, const rfb::Point&, void*, void*);
-  virtual void dataRect(const rfb::Rect&, int);
+  virtual void framebufferUpdateStart();
+  virtual void framebufferUpdateEnd();
   virtual void setColourMapEntries(int, int, rdr::U16*);
   virtual void bell();
   virtual void serverCutText(const char*, rdr::U32);
@@ -60,27 +61,15 @@
 
 protected:
   rdr::FileInStream *in;
-  rfb::Decoder *decoders[rfb::encodingMax+1];
-  rfb::ManagedPixelBuffer pb;
 };
 
 CConn::CConn(const char *filename)
 {
-  int i;
-
   cpuTime = 0.0;
 
   in = new rdr::FileInStream(filename);
   setStreams(in, NULL);
 
-  memset(decoders, 0, sizeof(decoders));
-  for (i = 0;i < rfb::encodingMax;i++) {
-    if (!rfb::Decoder::supported(i))
-      continue;
-
-    decoders[i] = rfb::Decoder::createDecoder(i, this);
-  }
-
   // Need to skip the initial handshake
   setState(RFBSTATE_INITIALISATION);
   // That also means that the reader and writer weren't setup
@@ -89,40 +78,37 @@
 
 CConn::~CConn()
 {
-  int i;
-
   delete in;
-
-  for (i = 0;i < rfb::encodingMax;i++)
-    delete decoders[i];
 }
 
 void CConn::setDesktopSize(int w, int h)
 {
   CConnection::setDesktopSize(w, h);
 
-  pb.setSize(cp.width, cp.height);
+  setFramebuffer(new rfb::ManagedPixelBuffer(filePF, cp.width, cp.height));
 }
 
 void CConn::setPixelFormat(const rfb::PixelFormat& pf)
 {
   // Override format
   CConnection::setPixelFormat(filePF);
-
-  pb.setPF(cp.pf());
 }
 
 void CConn::setCursor(int, int, const rfb::Point&, void*, void*)
 {
 }
 
-void CConn::dataRect(const rfb::Rect &r, int encoding)
+void CConn::framebufferUpdateStart()
 {
-  if (!decoders[encoding])
-    throw rdr::Exception("Unknown encoding");
+  CConnection::framebufferUpdateStart();
 
   startCpuCounter();
-  decoders[encoding]->readRect(r, &pb);
+}
+
+void CConn::framebufferUpdateEnd()
+{
+  CConnection::framebufferUpdateEnd();
+
   endCpuCounter();
 
   cpuTime += getCpuCounter();
@@ -140,10 +126,19 @@
 {
 }
 
-static double runTest(const char *fn)
+struct stats
+{
+  double decodeTime;
+  double realTime;
+};
+
+static struct stats runTest(const char *fn)
 {
   CConn *cc;
-  double time;
+  struct timeval start, stop;
+  struct stats s;
+
+  gettimeofday(&start, NULL);
 
   try {
     cc = new CConn(fn);
@@ -161,11 +156,15 @@
     exit(1);
   }
 
-  time = cc->cpuTime;
+  gettimeofday(&stop, NULL);
+
+  s.decodeTime = cc->cpuTime;
+  s.realTime = (double)stop.tv_sec - start.tv_sec;
+  s.realTime += ((double)stop.tv_usec - start.tv_usec)/1000000.0;
 
   delete cc;
 
-  return time;
+  return s;
 }
 
 static void sort(double *array, int count)
@@ -191,7 +190,8 @@
 int main(int argc, char **argv)
 {
   int i;
-  double times[runCount], dev[runCount];
+  struct stats runs[runCount];
+  double values[runCount], dev[runCount];
   double median, meddev;
 
   if (argc != 2) {
@@ -204,19 +204,37 @@
 
   // Multiple runs to get a good average
   for (i = 0;i < runCount;i++)
-    times[i] = runTest(argv[1]);
+    runs[i] = runTest(argv[1]);
 
-  // Calculate median and median deviation
-  sort(times, runCount);
-  median = times[runCount/2];
+  // Calculate median and median deviation for CPU usage
+  for (i = 0;i < runCount;i++)
+    values[i] = runs[i].decodeTime;
+
+  sort(values, runCount);
+  median = values[runCount/2];
 
   for (i = 0;i < runCount;i++)
-    dev[i] = fabs((times[i] - median) / median) * 100;
+    dev[i] = fabs((values[i] - median) / median) * 100;
 
   sort(dev, runCount);
   meddev = dev[runCount/2];
 
   printf("CPU time: %g s (+/- %g %%)\n", median, meddev);
 
+  // And for CPU core usage
+  for (i = 0;i < runCount;i++)
+    values[i] = runs[i].decodeTime / runs[i].realTime;
+
+  sort(values, runCount);
+  median = values[runCount/2];
+
+  for (i = 0;i < runCount;i++)
+    dev[i] = fabs((values[i] - median) / median) * 100;
+
+  sort(dev, runCount);
+  meddev = dev[runCount/2];
+
+  printf("Core usage: %g (+/- %g %%)\n", median, meddev);
+
   return 0;
 }
diff --git a/tests/encperf.cxx b/tests/encperf.cxx
index d93c771..d58d82e 100644
--- a/tests/encperf.cxx
+++ b/tests/encperf.cxx
@@ -29,6 +29,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
+#include <sys/time.h>
 
 #include <rdr/Exception.h>
 #include <rdr/OutStream.h>
@@ -38,7 +39,6 @@
 
 #include <rfb/CConnection.h>
 #include <rfb/CMsgReader.h>
-#include <rfb/Decoder.h>
 #include <rfb/UpdateTracker.h>
 
 #include <rfb/EncodeManager.h>
@@ -104,8 +104,6 @@
 
 protected:
   rdr::FileInStream *in;
-  rfb::Decoder *decoders[rfb::encodingMax + 1];
-  rfb::ManagedPixelBuffer pb;
   rfb::SimpleUpdateTracker updates;
   class SConn *sc;
 };
@@ -165,49 +163,31 @@
 
 CConn::CConn(const char *filename)
 {
-  int i;
-
   decodeTime = 0.0;
   encodeTime = 0.0;
 
   in = new rdr::FileInStream(filename);
   setStreams(in, NULL);
 
-  memset(decoders, 0, sizeof(decoders));
-  for (i = 0; i < rfb::encodingMax; i++) {
-    if (!rfb::Decoder::supported(i))
-      continue;
-
-    decoders[i] = rfb::Decoder::createDecoder(i, this);
-  }
-
   // Need to skip the initial handshake and ServerInit
   setState(RFBSTATE_NORMAL);
   // That also means that the reader and writer weren't setup
   setReader(new rfb::CMsgReader(this, in));
   // Nor the frame buffer size and format
-  setDesktopSize(width, height);
   rfb::PixelFormat pf;
   pf.parse(format);
   setPixelFormat(pf);
-
-  pb.setPF((bool)translate ? fbPF : pf);
+  setDesktopSize(width, height);
 
   sc = new SConn();
-  sc->cp.setPF(pb.getPF());
+  sc->cp.setPF((bool)translate ? fbPF : pf);
   sc->setEncodings(sizeof(encodings) / sizeof(*encodings), encodings);
 }
 
 CConn::~CConn()
 {
-  int i;
-
   delete sc;
-
   delete in;
-
-  for (i = 0; i < rfb::encodingMax; i++)
-    delete decoders[i];
 }
 
 void CConn::getStats(double& ratio, unsigned long long& bytes,
@@ -218,9 +198,13 @@
 
 void CConn::setDesktopSize(int w, int h)
 {
+  rfb::ModifiablePixelBuffer *pb;
+
   CConnection::setDesktopSize(w, h);
 
-  pb.setSize(cp.width, cp.height);
+  pb = new rfb::ManagedPixelBuffer((bool)translate ? fbPF : cp.pf(),
+                                   cp.width, cp.height);
+  setFramebuffer(pb);
 }
 
 void CConn::setCursor(int, int, const rfb::Point&, void*, void*)
@@ -229,18 +213,28 @@
 
 void CConn::framebufferUpdateStart()
 {
+  CConnection::framebufferUpdateStart();
+
   updates.clear();
+  startCpuCounter();
 }
 
 void CConn::framebufferUpdateEnd()
 {
   rfb::UpdateInfo ui;
-  rfb::Region clip(pb.getRect());
+  rfb::PixelBuffer* pb = getFramebuffer();
+  rfb::Region clip(pb->getRect());
+
+  CConnection::framebufferUpdateEnd();
+
+  endCpuCounter();
+
+  decodeTime += getCpuCounter();
 
   updates.getUpdateInfo(&ui, clip);
 
   startCpuCounter();
-  sc->writeUpdate(ui, &pb);
+  sc->writeUpdate(ui, pb);
   endCpuCounter();
 
   encodeTime += getCpuCounter();
@@ -248,14 +242,7 @@
 
 void CConn::dataRect(const rfb::Rect &r, int encoding)
 {
-  if (!decoders[encoding])
-    throw rdr::Exception("Unknown encoding");
-
-  startCpuCounter();
-  decoders[encoding]->readRect(r, &pb);
-  endCpuCounter();
-
-  decodeTime += getCpuCounter();
+  CConnection::dataRect(r, encoding);
 
   if (encoding != rfb::encodingCopyRect) // FIXME
     updates.add_changed(rfb::Region(r));
@@ -334,11 +321,24 @@
 {
 }
 
-static double runTest(const char *fn, double& ratio, unsigned long long& bytes,
-                      unsigned long long& rawEquivalent)
+struct stats
+{
+  double decodeTime;
+  double encodeTime;
+  double realTime;
+
+  double ratio;
+  unsigned long long bytes;
+  unsigned long long rawEquivalent;
+};
+
+static struct stats runTest(const char *fn)
 {
   CConn *cc;
-  double time;
+  struct stats s;
+  struct timeval start, stop;
+
+  gettimeofday(&start, NULL);
 
   try {
     cc = new CConn(fn);
@@ -356,12 +356,17 @@
     exit(1);
   }
 
-  time = cc->encodeTime;
-  cc->getStats(ratio, bytes, rawEquivalent);
+  gettimeofday(&stop, NULL);
+
+  s.decodeTime = cc->decodeTime;
+  s.encodeTime = cc->encodeTime;
+  s.realTime = (double)stop.tv_sec - start.tv_sec;
+  s.realTime += ((double)stop.tv_usec - start.tv_usec)/1000000.0;
+  cc->getStats(s.ratio, s.bytes, s.rawEquivalent);
 
   delete cc;
 
-  return time;
+  return s;
 }
 
 static void sort(double *array, int count)
@@ -418,9 +423,9 @@
   }
 
   int runCount = count;
-  double times[runCount], dev[runCount];
-  double median, meddev, ratio;
-  unsigned long long bytes, equivalent;
+  struct stats runs[runCount];
+  double values[runCount], dev[runCount];
+  double median, meddev;
 
   if (fn == NULL) {
     fprintf(stderr, "No file specified!\n\n");
@@ -438,31 +443,65 @@
   }
 
   // Warmup
-  runTest(fn, ratio, bytes, equivalent);
+  runTest(fn);
 
   // Multiple runs to get a good average
   for (i = 0; i < runCount; i++)
-    times[i] = runTest(fn, ratio, bytes, equivalent);
+    runs[i] = runTest(fn);
 
-  // Calculate median and median deviation
-  sort(times, runCount);
-  median = times[runCount / 2];
+  // Calculate median and median deviation for CPU usage decoding
+  for (i = 0;i < runCount;i++)
+    values[i] = runs[i].decodeTime;
 
-  for (i = 0; i < runCount; i++)
-    dev[i] = fabs((times[i] - median) / median) * 100;
+  sort(values, runCount);
+  median = values[runCount/2];
+
+  for (i = 0;i < runCount;i++)
+    dev[i] = fabs((values[i] - median) / median) * 100;
 
   sort(dev, runCount);
-  meddev = dev[runCount / 2];
+  meddev = dev[runCount/2];
 
-  printf("CPU time: %g s (+/- %g %%)\n", median, meddev);
+  printf("CPU time (decoding): %g s (+/- %g %%)\n", median, meddev);
+
+  // And for CPU usage encoding
+  for (i = 0;i < runCount;i++)
+    values[i] = runs[i].encodeTime;
+
+  sort(values, runCount);
+  median = values[runCount/2];
+
+  for (i = 0;i < runCount;i++)
+    dev[i] = fabs((values[i] - median) / median) * 100;
+
+  sort(dev, runCount);
+  meddev = dev[runCount/2];
+
+  printf("CPU time (encoding): %g s (+/- %g %%)\n", median, meddev);
+
+  // And for CPU core usage encoding
+  for (i = 0;i < runCount;i++)
+    values[i] = (runs[i].decodeTime + runs[i].encodeTime) / runs[i].realTime;
+
+  sort(values, runCount);
+  median = values[runCount/2];
+
+  for (i = 0;i < runCount;i++)
+    dev[i] = fabs((values[i] - median) / median) * 100;
+
+  sort(dev, runCount);
+  meddev = dev[runCount/2];
+
+  printf("Core usage (total): %g (+/- %g %%)\n", median, meddev);
+
 #ifdef WIN32
-  printf("Encoded bytes: %I64d\n", bytes);
-  printf("Raw equivalent bytes: %I64d\n", equivalent);
+  printf("Encoded bytes: %I64d\n", runs[0].bytes);
+  printf("Raw equivalent bytes: %I64d\n", runs[0].rawEquivalent);
 #else
-  printf("Encoded bytes: %lld\n", bytes);
-  printf("Raw equivalent bytes: %lld\n", equivalent);
+  printf("Encoded bytes: %lld\n", runs[0].bytes);
+  printf("Raw equivalent bytes: %lld\n", runs[0].rawEquivalent);
 #endif
-  printf("Ratio: %g\n", ratio);
+  printf("Ratio: %g\n", runs[0].ratio);
 
   return 0;
 }
diff --git a/tests/results/multicore/README b/tests/results/multicore/README
new file mode 100644
index 0000000..ffeca4e
--- /dev/null
+++ b/tests/results/multicore/README
@@ -0,0 +1,40 @@
+This directory contains the evaluation of the multi-core implementation
+in the decoder. The baseline is the performance before the addition of
+the DecodeManager class.
+
+Tests were performed on the following systems:
+
+ - eLux RP Atom N270 1.6 GHz
+ - Lubuntu 13.10 i.MX6 Quad 1.2 GHz
+ - Fedora 22 i7-3770 3.4 GHz
+ - Windows Vista Core 2 Duo E7400 2.8 GHz
+ - Windows 10 i3-4170 3.7 GHz
+ - OS X 10.6 Core 2 Duo 2.53 GHz
+ - OS X 10.11 i5 2.3 GHz
+
+The systems were tested with:
+
+ a) The old, baseline code
+ b) The new code with all CPUs enabled
+ c) The new code with only one CPU enabled
+
+The test itself consists of running decperf on the test files from the
+TurboVNC project. Rate of decoding is then compared to the baseline.
+Note that the CPU time is divided by core usage in the multi CPU cases
+in order to derive total decoding time. This method is sensitive to
+other load on the system.
+
+On average, there is no regression in performance for single CPU
+systems. This however relies on the addition of the single CPU shortcut
+in DecodeManager. Without that the performance sees a 10% lower rate.
+
+Dual CPU systems see between 20% and 50% increase, and the quad core
+systems between 75% and 125% on average. OS X is an outlier though in
+that it gets a mere 32% increase on average. It is unknown why at this
+point and tracing doesn't reveal anything obvious. It may be because it
+is not a true quad core system, but rather uses HyperThreading.
+
+So in summary, the new code can do a noticable improvement on decoding
+time. However it does so at a cost of efficiency. Four times the CPUs
+only gives you about twice the performance. More improvements may be
+possible.
diff --git a/tests/results/multicore/multicore.ods b/tests/results/multicore/multicore.ods
new file mode 100644
index 0000000..42e024d
--- /dev/null
+++ b/tests/results/multicore/multicore.ods
Binary files differ
diff --git a/tests/util.cxx b/tests/util.cxx
index 419f09c..4683d35 100644
--- a/tests/util.cxx
+++ b/tests/util.cxx
@@ -29,7 +29,10 @@
 #include "util.h"
 
 #ifdef WIN32
-typedef FILETIME syscounter_t;
+typedef struct {
+  FILETIME kernelTime;
+  FILETIME userTime;
+} syscounter_t;
 #else
 typedef struct rusage syscounter_t;
 #endif
@@ -73,10 +76,10 @@
 static void measureCpu(syscounter_t *counter)
 {
 #ifdef WIN32
-  FILETIME dummy1, dummy2, dummy3;
+  FILETIME dummy1, dummy2;
 
   GetProcessTimes(GetCurrentProcess(), &dummy1, &dummy2,
-                  &dummy3, counter);
+                  &counter->kernelTime, &counter->userTime);
 #else
   getrusage(RUSAGE_SELF, counter);
 #endif
@@ -97,23 +100,35 @@
 double getCpuCounter(cpucounter_t c)
 {
   syscounter_t *s = (syscounter_t*)c;
-  double seconds;
+  double sysSeconds, userSeconds;
 
 #ifdef WIN32
   uint64_t counters[2];
 
-  counters[0] = (uint64_t)s[0].dwHighDateTime << 32 |
-                s[0].dwLowDateTime;
-  counters[1] = (uint64_t)s[1].dwHighDateTime << 32 |
-                s[1].dwLowDateTime;
+  counters[0] = (uint64_t)s[0].kernelTime.dwHighDateTime << 32 |
+                s[0].kernelTime.dwLowDateTime;
+  counters[1] = (uint64_t)s[1].kernelTime.dwHighDateTime << 32 |
+                s[1].kernelTime.dwLowDateTime;
 
-  seconds = (double)(counters[1] - counters[0]) / 10000000.0;
+  sysSeconds = (double)(counters[1] - counters[0]) / 10000000.0;
+
+  counters[0] = (uint64_t)s[0].userTime.dwHighDateTime << 32 |
+                s[0].userTime.dwLowDateTime;
+  counters[1] = (uint64_t)s[1].userTime.dwHighDateTime << 32 |
+                s[1].userTime.dwLowDateTime;
+
+  userSeconds = (double)(counters[1] - counters[0]) / 10000000.0;
 #else
-  seconds = (double)(s[1].ru_utime.tv_sec -
-                     s[0].ru_utime.tv_sec);
-  seconds += (double)(s[1].ru_utime.tv_usec -
-                      s[0].ru_utime.tv_usec) / 1000000.0;
+  sysSeconds = (double)(s[1].ru_stime.tv_sec -
+                        s[0].ru_stime.tv_sec);
+  sysSeconds += (double)(s[1].ru_stime.tv_usec -
+                         s[0].ru_stime.tv_usec) / 1000000.0;
+
+  userSeconds = (double)(s[1].ru_utime.tv_sec -
+                         s[0].ru_utime.tv_sec);
+  userSeconds += (double)(s[1].ru_utime.tv_usec -
+                          s[0].ru_utime.tv_usec) / 1000000.0;
 #endif
 
-  return seconds;
+  return sysSeconds + userSeconds;
 }
diff --git a/vncviewer/CConn.cxx b/vncviewer/CConn.cxx
index 686ce30..262dd2c 100644
--- a/vncviewer/CConn.cxx
+++ b/vncviewer/CConn.cxx
@@ -28,10 +28,10 @@
 #endif
 
 #include <rfb/CMsgWriter.h>
-#include <rfb/encodings.h>
-#include <rfb/Decoder.h>
+#include <rfb/CSecurity.h>
 #include <rfb/Hostname.h>
 #include <rfb/LogWriter.h>
+#include <rfb/Security.h>
 #include <rfb/util.h>
 #include <rfb/screenTypes.h>
 #include <rfb/fenceTypes.h>
@@ -46,6 +46,7 @@
 #include "CConn.h"
 #include "OptionsDialog.h"
 #include "DesktopWindow.h"
+#include "PlatformPixelBuffer.h"
 #include "i18n.h"
 #include "parameters.h"
 #include "vncviewer.h"
@@ -81,8 +82,6 @@
   setShared(::shared);
   sock = socket;
 
-  memset(decoders, 0, sizeof(decoders));
-
   int encNum = encodingNum(preferredEncoding);
   if (encNum != -1)
     currentEncoding = encNum;
@@ -135,9 +134,6 @@
   OptionsDialog::removeCallback(handleOptions);
   Fl::remove_timeout(handleUpdateTimeout, this);
 
-  for (size_t i = 0; i < sizeof(decoders)/sizeof(decoders[0]); i++)
-    delete decoders[i];
-
   if (desktop)
     delete desktop;
 
@@ -233,13 +229,7 @@
 
 void CConn::blockCallback()
 {
-  int next_timer;
-
-  next_timer = Timer::checkTimeouts();
-  if (next_timer == 0)
-    next_timer = INT_MAX;
-
-  Fl::wait((double)next_timer / 1000.0);
+  run_mainloop();
 
   if (should_exit())
     throw rdr::Exception("Termination requested");
@@ -347,11 +337,32 @@
 // one.
 void CConn::framebufferUpdateStart()
 {
+  ModifiablePixelBuffer* pb;
+  PlatformPixelBuffer* ppb;
+
+  CConnection::framebufferUpdateStart();
+
   // Note: This might not be true if sync fences are supported
   pendingUpdate = false;
 
   requestNewUpdate();
 
+  // We might still be rendering the previous update
+  pb = getFramebuffer();
+  assert(pb != NULL);
+  ppb = dynamic_cast<PlatformPixelBuffer*>(pb);
+  assert(ppb != NULL);
+  if (ppb->isRendering()) {
+    // Need to stop monitoring the socket or we'll just busy loop
+    assert(sock != NULL);
+    Fl::remove_fd(sock->getFd());
+
+    while (ppb->isRendering())
+      run_mainloop();
+
+    Fl::add_fd(sock->getFd(), FL_READ | FL_EXCEPT, socketEvent, this);
+  }
+
   // Update the screen prematurely for very slow updates
   Fl::add_timeout(1.0, handleUpdateTimeout, this);
 }
@@ -362,6 +373,8 @@
 // appropriately, and then request another incremental update.
 void CConn::framebufferUpdateEnd()
 {
+  CConnection::framebufferUpdateEnd();
+
   Fl::remove_timeout(handleUpdateTimeout, this);
   desktop->updateWindow();
 
@@ -439,20 +452,7 @@
   if (encoding != encodingCopyRect)
     lastServerEncoding = encoding;
 
-  if (!Decoder::supported(encoding)) {
-    // TRANSLATORS: Refers to a VNC protocol encoding type
-    vlog.error(_("Unknown encoding %d"), encoding);
-    throw Exception(_("Unknown encoding"));
-  }
-
-  if (!decoders[encoding]) {
-    decoders[encoding] = Decoder::createDecoder(encoding, this);
-    if (!decoders[encoding]) {
-      vlog.error(_("Unknown encoding %d"), encoding);
-      throw Exception(_("Unknown encoding"));
-    }
-  }
-  decoders[encoding]->readRect(r, desktop->getFramebuffer());
+  CConnection::dataRect(r, encoding);
 
   sock->inStream().stopTiming();
 }
diff --git a/vncviewer/CConn.h b/vncviewer/CConn.h
index 06e3040..c934f3d 100644
--- a/vncviewer/CConn.h
+++ b/vncviewer/CConn.h
@@ -23,10 +23,9 @@
 #include <FL/Fl.H>
 
 #include <rfb/CConnection.h>
-#include <rfb/encodings.h>
-#include <network/Socket.h>
+#include <rdr/FdInStream.h>
 
-namespace rfb { class Decoder; }
+namespace network { class Socket; }
 
 class DesktopWindow;
 
@@ -96,8 +95,6 @@
   bool pendingPFChange;
   rfb::PixelFormat pendingPF;
 
-  rfb::Decoder *decoders[rfb::encodingMax+1];
-
   int currentEncoding, lastServerEncoding;
 
   bool formatChange;
diff --git a/vncviewer/DesktopWindow.cxx b/vncviewer/DesktopWindow.cxx
index bba502b..2787bee 100644
--- a/vncviewer/DesktopWindow.cxx
+++ b/vncviewer/DesktopWindow.cxx
@@ -199,12 +199,6 @@
 }
 
 
-rfb::ModifiablePixelBuffer* DesktopWindow::getFramebuffer(void)
-{
-  return viewport->getFramebuffer();
-}
-
-
 // Copy the areas of the framebuffer that have been changed (damaged)
 // to the displayed window.
 
diff --git a/vncviewer/DesktopWindow.h b/vncviewer/DesktopWindow.h
index 8b91450..d755b6d 100644
--- a/vncviewer/DesktopWindow.h
+++ b/vncviewer/DesktopWindow.h
@@ -50,9 +50,6 @@
   // Updated session title
   void setName(const char *name);
 
-  // Return a pointer to the framebuffer for decoders to write into
-  rfb::ModifiablePixelBuffer* getFramebuffer(void);
-
   // Resize the current framebuffer, but retain the contents
   void resizeFramebuffer(int new_w, int new_h);
 
diff --git a/vncviewer/PlatformPixelBuffer.cxx b/vncviewer/PlatformPixelBuffer.cxx
index 5bd50d2..876ab94 100644
--- a/vncviewer/PlatformPixelBuffer.cxx
+++ b/vncviewer/PlatformPixelBuffer.cxx
@@ -28,15 +28,24 @@
 void PlatformPixelBuffer::commitBufferRW(const rfb::Rect& r)
 {
   FullFramePixelBuffer::commitBufferRW(r);
+  mutex.lock();
   damage.assign_union(rfb::Region(r));
+  mutex.unlock();
 }
 
 rfb::Rect PlatformPixelBuffer::getDamage(void)
 {
   rfb::Rect r;
 
+  mutex.lock();
   r = damage.get_bounding_rect();
   damage.clear();
+  mutex.unlock();
 
   return r;
 }
+
+bool PlatformPixelBuffer::isRendering(void)
+{
+  return false;
+}
diff --git a/vncviewer/PlatformPixelBuffer.h b/vncviewer/PlatformPixelBuffer.h
index 21b93be..9f0e3b1 100644
--- a/vncviewer/PlatformPixelBuffer.h
+++ b/vncviewer/PlatformPixelBuffer.h
@@ -19,6 +19,8 @@
 #ifndef __PLATFORMPIXELBUFFER_H__
 #define __PLATFORMPIXELBUFFER_H__
 
+#include <os/Mutex.h>
+
 #include <rfb/PixelBuffer.h>
 #include <rfb/Region.h>
 
@@ -32,7 +34,10 @@
   virtual void draw(int src_x, int src_y, int x, int y, int w, int h) = 0;
   rfb::Rect getDamage(void);
 
+  virtual bool isRendering(void);
+
 protected:
+  os::Mutex mutex;
   rfb::Region damage;
 };
 
diff --git a/vncviewer/Viewport.cxx b/vncviewer/Viewport.cxx
index a7881bc..1bc0b82 100644
--- a/vncviewer/Viewport.cxx
+++ b/vncviewer/Viewport.cxx
@@ -113,6 +113,7 @@
 
   frameBuffer = createFramebuffer(w, h);
   assert(frameBuffer);
+  cc->setFramebuffer(frameBuffer);
 
   contextMenu = new Fl_Menu_Button(0, 0, 0, 0);
   // Setting box type to FL_NO_BOX prevents it from trying to draw the
@@ -149,8 +150,6 @@
 
   OptionsDialog::removeCallback(handleOptions);
 
-  delete frameBuffer;
-
   if (cursor) {
     if (!cursor->alloc_array)
       delete [] cursor->array;
@@ -179,11 +178,6 @@
   damage(FL_DAMAGE_USER1, r.tl.x + x(), r.tl.y + y(), r.width(), r.height());
 }
 
-rfb::ModifiablePixelBuffer* Viewport::getFramebuffer(void)
-{
-  return frameBuffer;
-}
-
 static const char * dotcursor_xpm[] = {
   "5 5 2 1",
   ".	c #000000",
@@ -274,51 +268,15 @@
 
 void Viewport::resize(int x, int y, int w, int h)
 {
-  PlatformPixelBuffer* newBuffer;
-  rfb::Rect rect;
+  if ((w != frameBuffer->width()) || (h != frameBuffer->height())) {
+    vlog.debug("Resizing framebuffer from %dx%d to %dx%d",
+               frameBuffer->width(), frameBuffer->height(), w, h);
 
-  const rdr::U8* data;
-  int stride;
-
-  const rdr::U8 black[4] = { 0, 0, 0, 0 };
-
-  // FIXME: Resize should probably be a feature of the pixel buffer itself
-
-  if ((w == frameBuffer->width()) && (h == frameBuffer->height()))
-    goto end;
-
-  vlog.debug("Resizing framebuffer from %dx%d to %dx%d",
-             frameBuffer->width(), frameBuffer->height(), w, h);
-
-  newBuffer = createFramebuffer(w, h);
-  assert(newBuffer);
-
-  rect.setXYWH(0, 0,
-               __rfbmin(newBuffer->width(), frameBuffer->width()),
-               __rfbmin(newBuffer->height(), frameBuffer->height()));
-  data = frameBuffer->getBuffer(frameBuffer->getRect(), &stride);
-  newBuffer->imageRect(rect, data, stride);
-
-  // Black out any new areas
-
-  if (newBuffer->width() > frameBuffer->width()) {
-    rect.setXYWH(frameBuffer->width(), 0,
-                 newBuffer->width() - frameBuffer->width(),
-                 newBuffer->height());
-    newBuffer->fillRect(rect, black);
+    frameBuffer = createFramebuffer(w, h);
+    assert(frameBuffer);
+    cc->setFramebuffer(frameBuffer);
   }
 
-  if (newBuffer->height() > frameBuffer->height()) {
-    rect.setXYWH(0, frameBuffer->height(),
-                 newBuffer->width(),
-                 newBuffer->height() - frameBuffer->height());
-    newBuffer->fillRect(rect, black);
-  }
-
-  delete frameBuffer;
-  frameBuffer = newBuffer;
-
-end:
   Fl_Widget::resize(x, y, w, h);
 }
 
diff --git a/vncviewer/Viewport.h b/vncviewer/Viewport.h
index 387ff03..f73a27d 100644
--- a/vncviewer/Viewport.h
+++ b/vncviewer/Viewport.h
@@ -46,9 +46,6 @@
   // Flush updates to screen
   void updateWindow();
 
-  // Return a pointer to the framebuffer for decoders to write into
-  rfb::ModifiablePixelBuffer* getFramebuffer(void);
-
   // New image for the locally rendered cursor
   void setCursor(int width, int height, const rfb::Point& hotspot,
                  void* data, void* mask);
diff --git a/vncviewer/X11PixelBuffer.cxx b/vncviewer/X11PixelBuffer.cxx
index 9196fdc..046676e 100644
--- a/vncviewer/X11PixelBuffer.cxx
+++ b/vncviewer/X11PixelBuffer.cxx
@@ -21,8 +21,10 @@
 #include <config.h>
 #endif
 
+#include <assert.h>
 #include <stdlib.h>
 
+#include <FL/Fl.H>
 #include <FL/x.H>
 
 #include <rfb/LogWriter.h>
@@ -35,6 +37,8 @@
 
 static rfb::LogWriter vlog("X11PixelBuffer");
 
+std::list<X11PixelBuffer*> X11PixelBuffer::shmList;
+
 static PixelFormat display_pf()
 {
   int i;
@@ -97,7 +101,7 @@
 
 X11PixelBuffer::X11PixelBuffer(int width, int height) :
   PlatformPixelBuffer(display_pf(), width, height, NULL, 0),
-  shminfo(NULL), xim(NULL)
+  shminfo(NULL), xim(NULL), pendingPutImage(0)
 {
   // Might not be open at this point
   fl_open_display();
@@ -122,6 +126,8 @@
 {
   if (shminfo) {
     vlog.debug("Freeing shared memory XImage");
+    shmList.remove(this);
+    Fl::remove_system_handler(handleSystemEvent);
     shmdt(shminfo->shmaddr);
     shmctl(shminfo->shmid, IPC_RMID, 0);
     delete shminfo;
@@ -137,12 +143,18 @@
 
 void X11PixelBuffer::draw(int src_x, int src_y, int x, int y, int w, int h)
 {
-  if (shminfo)
-    XShmPutImage(fl_display, fl_window, fl_gc, xim, src_x, src_y, x, y, w, h, False);
-  else
+  if (shminfo) {
+    XShmPutImage(fl_display, fl_window, fl_gc, xim, src_x, src_y, x, y, w, h, True);
+    pendingPutImage++;
+  } else {
     XPutImage(fl_display, fl_window, fl_gc, xim, src_x, src_y, x, y, w, h);
+  }
 }
 
+bool X11PixelBuffer::isRendering(void)
+{
+  return pendingPutImage > 0;
+}
 
 static bool caughtError;
 
@@ -202,6 +214,11 @@
   if (caughtError)
     goto free_shmaddr;
 
+  // FLTK is a bit stupid and unreliable if you register the same
+  // callback with different data values.
+  Fl::add_system_handler(handleSystemEvent, NULL);
+  shmList.push_back(this);
+
   vlog.debug("Using shared memory XImage");
 
   return 1;
@@ -222,3 +239,31 @@
 
   return 0;
 }
+
+int X11PixelBuffer::handleSystemEvent(void* event, void* data)
+{
+  XEvent* xevent;
+  XShmCompletionEvent* shmevent;
+
+  std::list<X11PixelBuffer*>::iterator iter;
+
+  xevent = (XEvent*)event;
+  assert(xevent);
+
+  if (xevent->type != XShmGetEventBase(fl_display))
+    return 0;
+
+  shmevent = (XShmCompletionEvent*)event;
+
+  for (iter = shmList.begin();iter != shmList.end();++iter) {
+    if (shmevent->shmseg != (*iter)->shminfo->shmseg)
+      continue;
+
+    (*iter)->pendingPutImage--;
+    assert((*iter)->pendingPutImage >= 0);
+
+    return 1;
+  }
+
+  return 0;
+}
diff --git a/vncviewer/X11PixelBuffer.h b/vncviewer/X11PixelBuffer.h
index c2ffdc2..115984d 100644
--- a/vncviewer/X11PixelBuffer.h
+++ b/vncviewer/X11PixelBuffer.h
@@ -24,6 +24,8 @@
 #include <sys/shm.h>
 #include <X11/extensions/XShm.h>
 
+#include <list>
+
 #include "PlatformPixelBuffer.h"
 
 class X11PixelBuffer: public PlatformPixelBuffer {
@@ -33,12 +35,19 @@
 
   virtual void draw(int src_x, int src_y, int x, int y, int w, int h);
 
+  virtual bool isRendering(void);
+
 protected:
   int setupShm();
 
+  static int handleSystemEvent(void* event, void* data);
+
 protected:
   XShmSegmentInfo *shminfo;
   XImage *xim;
+  int pendingPutImage;
+
+  static std::list<X11PixelBuffer*> shmList;
 };
 
 
diff --git a/vncviewer/vncviewer.cxx b/vncviewer/vncviewer.cxx
index a2bc029..d71cb7f 100644
--- a/vncviewer/vncviewer.cxx
+++ b/vncviewer/vncviewer.cxx
@@ -128,6 +128,20 @@
   fl_message("%s", about_text());
 }
 
+void run_mainloop()
+{
+  int next_timer;
+
+  next_timer = Timer::checkTimeouts();
+  if (next_timer == 0)
+    next_timer = INT_MAX;
+
+  if (Fl::wait((double)next_timer / 1000.0) < 0.0) {
+    vlog.error(_("Internal FLTK error. Exiting."));
+    exit(-1);
+  }
+}
+
 #ifdef __APPLE__
 static void about_callback(Fl_Widget *widget, void *data)
 {
@@ -590,18 +604,8 @@
 
   CConn *cc = new CConn(vncServerName, sock);
 
-  while (!exitMainloop) {
-    int next_timer;
-
-    next_timer = Timer::checkTimeouts();
-    if (next_timer == 0)
-      next_timer = INT_MAX;
-
-    if (Fl::wait((double)next_timer / 1000.0) < 0.0) {
-      vlog.error(_("Internal FLTK error. Exiting."));
-      break;
-    }
-  }
+  while (!exitMainloop)
+    run_mainloop();
 
   delete cc;
 
diff --git a/vncviewer/vncviewer.h b/vncviewer/vncviewer.h
index 4d0566b..c98c2e9 100644
--- a/vncviewer/vncviewer.h
+++ b/vncviewer/vncviewer.h
@@ -24,5 +24,6 @@
 void exit_vncviewer(const char *error = NULL);
 bool should_exit();
 void about_vncviewer();
+void run_mainloop();
 
 #endif