SIMD should now work on 64-bit Windows


git-svn-id: svn://svn.code.sf.net/p/tigervnc/code/trunk@4048 3789f03b-4d11-0410-bbf8-ca57d06f2519
diff --git a/common/jpeg/configure.ac b/common/jpeg/configure.ac
index a1ffccb..354a267 100644
--- a/common/jpeg/configure.ac
+++ b/common/jpeg/configure.ac
@@ -2,7 +2,7 @@
 # Process this file with autoconf to produce a configure script.
 
 AC_PREREQ([2.56])
-AC_INIT([libjpeg-turbo], [0.0.92])
+AC_INIT([libjpeg-turbo], [0.0.93])
 
 AM_INIT_AUTOMAKE([-Wall foreign dist-bzip2])
 
@@ -112,17 +112,8 @@
   case "$host_cpu" in
     x86_64)
       AC_MSG_RESULT([yes (x86_64)])
-
-      case "$host_os" in
-        cygwin* | mingw* | pw32* | interix*)
-          AC_MSG_WARN([Win64 not yet supported. Disabling SIMD routines.])
-          with_simd=no
-          ;;
-        *)
-          AC_PROG_NASM
-          simd_arch=x86_64
-          ;;
-      esac
+      AC_PROG_NASM
+      simd_arch=x86_64
     ;;
     i*86 | x86 | ia32)
       AC_MSG_RESULT([yes (i386)])
diff --git a/common/jpeg/jchuff.c b/common/jpeg/jchuff.c
index e40cfe3..b05c8e7 100644
--- a/common/jpeg/jchuff.c
+++ b/common/jpeg/jchuff.c
@@ -384,7 +384,7 @@
   put_buffer = (put_buffer << size) | code;                     \
  }
 
-#if __WORDSIZE==64
+#if __WORDSIZE==64 || defined(_WIN64)
 
 #define DUMP_BITS(code, size) {                                 \
   CHECKBUF47()                                                  \
@@ -425,7 +425,7 @@
 
 int _max=0;
 
-#if __WORDSIZE==64
+#if __WORDSIZE==64 || defined(_WIN64)
 
 #define DUMP_VALUE(ht, codevalue, t, nbits) { \
   size = ht->ehufsi[codevalue];               \
diff --git a/common/jpeg/jdhuff.c b/common/jpeg/jdhuff.c
index 9710142..cbab592 100644
--- a/common/jpeg/jdhuff.c
+++ b/common/jpeg/jdhuff.c
@@ -628,7 +628,7 @@
 
 /***************************************************************/
 
-#if __WORDSIZE == 64
+#if __WORDSIZE == 64 || defined(_WIN64)
 
 #define ENSURE_SHORT \
   if (bits_left < 16) { \
diff --git a/common/jpeg/jdhuff.h b/common/jpeg/jdhuff.h
index b192e48..0a242c5 100644
--- a/common/jpeg/jdhuff.h
+++ b/common/jpeg/jdhuff.h
@@ -73,7 +73,7 @@
  * necessary.
  */
 
-#if __WORDSIZE == 64
+#if __WORDSIZE == 64 || defined(_WIN64)
 
 typedef size_t bit_buf_type;	/* type of bit-extraction buffer */
 #define BIT_BUF_SIZE  64		/* size of buffer in bits */
diff --git a/common/jpeg/simd/jcclrss2-64.asm b/common/jpeg/simd/jcclrss2-64.asm
index 8b8fa8a..8ca47aa 100644
--- a/common/jpeg/simd/jcclrss2-64.asm
+++ b/common/jpeg/simd/jcclrss2-64.asm
@@ -70,7 +70,7 @@
 	pop	rcx
 
 	mov rsi, r11
-	mov	rax, r14
+	mov	eax, r14d
 	test	rax,rax
 	jle	near .return
 .rowloop:
diff --git a/common/jpeg/simd/jdclrss2-64.asm b/common/jpeg/simd/jdclrss2-64.asm
index eac6eb4..4282bd2 100644
--- a/common/jpeg/simd/jdclrss2-64.asm
+++ b/common/jpeg/simd/jdclrss2-64.asm
@@ -72,7 +72,7 @@
 	pop	rcx
 
 	mov	rdi, r13
-	mov	rax, r14
+	mov	eax, r14d
 	test	rax,rax
 	jle	near .return
 .rowloop:
diff --git a/common/jpeg/simd/jdmrgss2-64.asm b/common/jpeg/simd/jdmrgss2-64.asm
index b768208..121bb82 100644
--- a/common/jpeg/simd/jdmrgss2-64.asm
+++ b/common/jpeg/simd/jdmrgss2-64.asm
@@ -521,10 +521,10 @@
 	push	rax
 
 	%ifdef WIN64
-	; rcx already parameter 1
-	mov rdx, rdi
-	mov r8, rax
-	mov r9, rbx
+	mov r8, rcx
+	mov r9, rdi
+	mov rcx, rax
+	mov rdx, rbx
 	%else
 	mov rdx, rcx
 	mov rcx, rdi
@@ -554,10 +554,10 @@
 	push	rax
 
 	%ifdef WIN64
-	; rcx already parameter 1
-	mov rdx, rdi
-	mov r8, rax
-	mov r9, rbx
+	mov r8, rcx
+	mov r9, rdi
+	mov rcx, rax
+	mov rdx, rbx
 	%else
 	mov rdx, rcx
 	mov rcx, rdi
diff --git a/common/jpeg/simd/jsimdext.inc b/common/jpeg/simd/jsimdext.inc
index d0b47e6..4ea3d17 100644
--- a/common/jpeg/simd/jsimdext.inc
+++ b/common/jpeg/simd/jsimdext.inc
@@ -2,6 +2,7 @@
 ; jsimdext.inc - common declarations
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2010 D. R. Commander
 ;
 ; Based on
 ; x86 SIMD extension for IJG JPEG library - version 1.02
@@ -310,6 +311,27 @@
 	mov r13, r9
 	mov r14, [rax+48]
 	mov r15, [rax+56]
+	push rsi
+	push rdi
+	sub     rsp, SIZEOF_XMMWORD
+	movlpd  XMMWORD [rsp], xmm6
+	sub     rsp, SIZEOF_XMMWORD
+	movlpd  XMMWORD [rsp], xmm7
+%endmacro
+
+%imacro uncollect_args 0
+	movlpd  xmm7, XMMWORD [rsp]
+	add     rsp, SIZEOF_XMMWORD
+	movlpd  xmm6, XMMWORD [rsp]
+	add     rsp, SIZEOF_XMMWORD
+	pop rdi
+	pop rsi
+	pop r15
+	pop r14
+	pop r13
+	pop r12
+	pop r11
+	pop r10
 %endmacro
 
 %else
@@ -329,8 +351,6 @@
 	mov r15, r9
 %endmacro
 
-%endif
-
 %imacro uncollect_args 0
 	pop r15
 	pop r14
@@ -342,6 +362,8 @@
 
 %endif
 
+%endif
+
 ; --------------------------------------------------------------------------
 ;  Defines picked up from the C headers
 ;