diff --git a/common/jpeg/acinclude.m4 b/common/jpeg/acinclude.m4
index 3627d23..b8254b6 100644
--- a/common/jpeg/acinclude.m4
+++ b/common/jpeg/acinclude.m4
@@ -82,7 +82,7 @@
 case "$objfmt" in
   MSOMF)      NAFLAGS='-fobj -DOBJ32';;
   Win32-COFF) NAFLAGS='-fwin32 -DWIN32';;
-  Win64-COFF) NAFLAGS='-fwin64 -DWIN32 -D__x86_64__';;
+  Win64-COFF) NAFLAGS='-fwin64 -DWIN64 -D__x86_64__';;
   COFF)       NAFLAGS='-fcoff -DCOFF';;
   a.out)      NAFLAGS='-faout -DAOUT';;
   BSD-a.out)  NAFLAGS='-faoutb -DAOUT';;
diff --git a/common/jpeg/simd/jcclrss2-64.asm b/common/jpeg/simd/jcclrss2-64.asm
index 31c5be6..89d2185 100644
--- a/common/jpeg/simd/jcclrss2-64.asm
+++ b/common/jpeg/simd/jcclrss2-64.asm
@@ -49,8 +49,8 @@
 	mov	[rsp],rax
 	mov	rbp,rsp				; rbp = aligned rbp
 	lea	rsp, [wk(0)]
-	push	rbx
 	collect_args
+	push	rbx
 
 	mov	rcx, r10
 	test	rcx,rcx
@@ -475,8 +475,8 @@
 	jg	near .rowloop
 
 .return:
-	uncollect_args
 	pop	rbx
+	uncollect_args
 	mov	rsp,rbp		; rsp <- aligned rbp
 	pop	rsp		; rsp <- original rbp
 	pop	rbp
diff --git a/common/jpeg/simd/jcqnts2f-64.asm b/common/jpeg/simd/jcqnts2f-64.asm
index e09387c..6d8a123 100644
--- a/common/jpeg/simd/jcqnts2f-64.asm
+++ b/common/jpeg/simd/jcqnts2f-64.asm
@@ -40,9 +40,10 @@
 
 EXTN(jsimd_convsamp_float_sse2):
 	push	rbp
+	mov	rax,rsp
 	mov	rbp,rsp
-	push	rbx
 	collect_args
+	push	rbx
 
 	pcmpeqw  xmm7,xmm7
 	psllw    xmm7,7
@@ -89,8 +90,8 @@
 	dec	rcx
 	jnz	short .convloop
 
-	uncollect_args
 	pop	rbx
+	uncollect_args
 	pop	rbp
 	ret
 
@@ -113,6 +114,7 @@
 
 EXTN(jsimd_quantize_float_sse2):
 	push	rbp
+	mov	rax,rsp
 	mov	rbp,rsp
 	collect_args
 
diff --git a/common/jpeg/simd/jcqnts2i-64.asm b/common/jpeg/simd/jcqnts2i-64.asm
index 4568dfc..419c593 100644
--- a/common/jpeg/simd/jcqnts2i-64.asm
+++ b/common/jpeg/simd/jcqnts2i-64.asm
@@ -40,9 +40,10 @@
 
 EXTN(jsimd_convsamp_sse2):
 	push	rbp
+	mov	rax,rsp
 	mov	rbp,rsp
-	push	rbx
 	collect_args
+	push	rbx
 
 	pxor	xmm6,xmm6		; xmm6=(all 0's)
 	pcmpeqw	xmm7,xmm7
@@ -84,8 +85,8 @@
 	dec	rcx
 	jnz	short .convloop
 
-	uncollect_args
 	pop	rbx
+	uncollect_args
 	pop	rbp
 	ret
 
@@ -115,6 +116,7 @@
 
 EXTN(jsimd_quantize_sse2):
 	push	rbp
+	mov	rax,rsp
 	mov	rbp,rsp
 	collect_args
 
diff --git a/common/jpeg/simd/jcsamss2-64.asm b/common/jpeg/simd/jcsamss2-64.asm
index 29c3f4f..502d225 100644
--- a/common/jpeg/simd/jcsamss2-64.asm
+++ b/common/jpeg/simd/jcsamss2-64.asm
@@ -45,6 +45,7 @@
 
 EXTN(jsimd_h2v1_downsample_sse2):
 	push	rbp
+	mov	rax,rsp
 	mov	rbp,rsp
 	collect_args
 
@@ -188,6 +189,7 @@
 
 EXTN(jsimd_h2v2_downsample_sse2):
 	push	rbp
+	mov	rax,rsp
 	mov	rbp,rsp
 	collect_args
 
diff --git a/common/jpeg/simd/jdclrss2-64.asm b/common/jpeg/simd/jdclrss2-64.asm
index ea9d2ac..52d133f 100644
--- a/common/jpeg/simd/jdclrss2-64.asm
+++ b/common/jpeg/simd/jdclrss2-64.asm
@@ -51,8 +51,8 @@
 	mov	[rsp],rax
 	mov	rbp,rsp				; rbp = aligned rbp
 	lea	rsp, [wk(0)]
-	push	rbx
 	collect_args
+	push	rbx
 
 	mov	rcx, r10	; num_cols
 	test	rcx,rcx
@@ -475,8 +475,8 @@
 	sfence		; flush the write buffer
 
 .return:
-	uncollect_args
 	pop	rbx
+	uncollect_args
 	mov	rsp,rbp		; rsp <- aligned rbp
 	pop	rsp		; rsp <- original rbp
 	pop	rbp
diff --git a/common/jpeg/simd/jdmrgss2-64.asm b/common/jpeg/simd/jdmrgss2-64.asm
index 3e54c7a..c51c91b 100644
--- a/common/jpeg/simd/jdmrgss2-64.asm
+++ b/common/jpeg/simd/jdmrgss2-64.asm
@@ -12,7 +12,7 @@
 ; This file should be assembled with NASM (Netwide Assembler),
 ; can *not* be assembled with Microsoft's MASM or any compatible
 ; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
+; NASM is available from http://nasm.sourceforge.net/ for
 ; http://sourceforge.net/project/showfiles.php?group_id=6208
 ;
 ; [TAB8]
@@ -51,8 +51,8 @@
 	mov	[rsp],rax
 	mov	rbp,rsp				; rbp = aligned rbp
 	lea	rsp, [wk(0)]
-	push	rbx
 	collect_args
+	push	rbx
 
 	mov	rcx, r10	; col
 	test	rcx,rcx
@@ -468,8 +468,8 @@
 	sfence		; flush the write buffer
 
 .return:
-	uncollect_args
 	pop	rbx
+	uncollect_args
 	mov	rsp,rbp		; rsp <- aligned rbp
 	pop	rsp		; rsp <- original rbp
 	pop	rbp
@@ -496,9 +496,10 @@
 
 EXTN(jsimd_h2v2_merged_upsample_sse2):
 	push	rbp
+	mov	rax,rsp
 	mov	rbp,rsp
-	push	rbx
 	collect_args
+	push	rbx
 
 	mov	rax, r10
 
@@ -519,10 +520,17 @@
 	push	rcx
 	push	rax
 
+	%ifdef WIN64
+	; rcx already parameter 1
+	mov rdx, rdi
+	mov r8, rax
+	mov r9, rbx
+	%else
 	mov rdx, rcx
 	mov rcx, rdi
 	mov	rdi, rax
 	mov rsi, rbx
+	%endif
 
 	call	EXTN(jsimd_h2v1_merged_upsample_sse2)
 
@@ -545,10 +553,17 @@
 	push	rcx
 	push	rax
 
+	%ifdef WIN64
+	; rcx already parameter 1
+	mov rdx, rdi
+	mov r8, rax
+	mov r9, rbx
+	%else
 	mov rdx, rcx
 	mov rcx, rdi
 	mov	rdi, rax
 	mov rsi, rbx
+	%endif
 
 	call	EXTN(jsimd_h2v1_merged_upsample_sse2)
 
@@ -559,8 +574,8 @@
 	pop rbx
 	pop rdx
 
-	uncollect_args
 	pop	rbx
+	uncollect_args
 	pop	rbp
 	ret
 
diff --git a/common/jpeg/simd/jdsamss2-64.asm b/common/jpeg/simd/jdsamss2-64.asm
index 8521491..f9c61fc 100644
--- a/common/jpeg/simd/jdsamss2-64.asm
+++ b/common/jpeg/simd/jdsamss2-64.asm
@@ -63,6 +63,7 @@
 
 EXTN(jsimd_h2v1_fancy_upsample_sse2):
 	push	rbp
+	mov	rax,rsp
 	mov	rbp,rsp
 	collect_args
 
@@ -210,8 +211,8 @@
 	mov	[rsp],rax
 	mov	rbp,rsp				; rbp = aligned rbp
 	lea	rsp, [wk(0)]
-	push	rbx
 	collect_args
+	push	rbx
 
 	mov	rax, r11  ; colctr
 	test	rax,rax
@@ -472,8 +473,8 @@
 	jg	near .rowloop
 
 .return:
-	uncollect_args
 	pop	rbx
+	uncollect_args
 	mov	rsp,rbp		; rsp <- aligned rbp
 	pop	rsp		; rsp <- original rbp
 	pop	rbp
@@ -501,6 +502,7 @@
 
 EXTN(jsimd_h2v1_upsample_sse2):
 	push	rbp
+	mov	rax,rsp
 	mov	rbp,rsp
 	collect_args
 
@@ -589,9 +591,10 @@
 
 EXTN(jsimd_h2v2_upsample_sse2):
 	push	rbp
+	mov	rax,rsp
 	mov	rbp,rsp
-	push	rbx
 	collect_args
+	push	rbx
 
 	mov	rdx, r11
 	add	rdx, byte (2*SIZEOF_XMMWORD)-1
@@ -658,8 +661,8 @@
 	jg	near .rowloop
 
 .return:
-	uncollect_args
 	pop	rbx
+	uncollect_args
 	pop	rbp
 	ret
 
diff --git a/common/jpeg/simd/jiss2flt-64.asm b/common/jpeg/simd/jiss2flt-64.asm
index 96bfa5c..3db52ee 100644
--- a/common/jpeg/simd/jiss2flt-64.asm
+++ b/common/jpeg/simd/jiss2flt-64.asm
@@ -84,8 +84,8 @@
 	mov	[rsp],rax
 	mov	rbp,rsp				; rbp = aligned rbp
 	lea	rsp, [workspace]
-	push	rbx
 	collect_args
+	push	rbx
 
 	; ---- Pass 1: process columns from input, store into work array.
 
@@ -471,8 +471,8 @@
 	dec	rcx				; ctr
 	jnz	near .rowloop
 
-	uncollect_args
 	pop	rbx
+	uncollect_args
 	mov	rsp,rbp		; rsp <- aligned rbp
 	pop	rsp		; rsp <- original rbp
 	pop	rbp
diff --git a/common/jpeg/simd/jiss2red-64.asm b/common/jpeg/simd/jiss2red-64.asm
index eddaded..f390367 100644
--- a/common/jpeg/simd/jiss2red-64.asm
+++ b/common/jpeg/simd/jiss2red-64.asm
@@ -417,9 +417,10 @@
 
 EXTN(jsimd_idct_2x2_sse2):
 	push	rbp
+	mov	rax,rsp
 	mov	rbp,rsp
-	push	rbx
 	collect_args
+	push	rbx
 
 	; ---- Pass 1: process columns from input.
 
@@ -565,8 +566,8 @@
 	mov	WORD [rdx+rax*SIZEOF_JSAMPLE], bx
 	mov	WORD [rsi+rax*SIZEOF_JSAMPLE], cx
 
-	uncollect_args
 	pop	rbx
+	uncollect_args
 	pop	rbp
 	ret
 
diff --git a/common/jpeg/simd/jsimdext.inc b/common/jpeg/simd/jsimdext.inc
index 4695360..d0b47e6 100644
--- a/common/jpeg/simd/jsimdext.inc
+++ b/common/jpeg/simd/jsimdext.inc
@@ -40,6 +40,17 @@
 %define SEG_TEXT    .text  align=16 public use32 class=CODE
 %define SEG_CONST   .rdata align=16 public use32 class=CONST
 
+%elifdef WIN64	; ----(nasm -fwin64 -DWIN64 ...)--------
+; * Microsoft Visual C++
+
+; -- segment definition --
+;
+%define SEG_TEXT    .text  align=16 public use64 class=CODE
+%define SEG_CONST   .rdata align=16 public use64 class=CONST
+%ifdef MSVC
+%define EXTN(name)  name			; foo() -> foo
+%endif
+
 %elifdef OBJ32	; ----(nasm -fobj -DOBJ32 ...)----------
 ; * Borland C++ (Win32)
 
@@ -283,6 +294,26 @@
 %endmacro
 
 %ifdef __x86_64__
+
+%ifdef WIN64
+
+%imacro collect_args 0
+	push r10
+	push r11
+	push r12
+	push r13
+	push r14
+	push r15
+	mov r10, rcx
+	mov r11, rdx
+	mov r12, r8
+	mov r13, r9
+	mov r14, [rax+48]
+	mov r15, [rax+56]
+%endmacro
+
+%else
+
 %imacro collect_args 0
 	push r10
 	push r11
@@ -298,6 +329,8 @@
 	mov r15, r9
 %endmacro
 
+%endif
+
 %imacro uncollect_args 0
 	pop r15
 	pop r14
diff --git a/common/jpeg/simd/nasm_lt.sh b/common/jpeg/simd/nasm_lt.sh
index 7ba5253..6cd7329 100755
--- a/common/jpeg/simd/nasm_lt.sh
+++ b/common/jpeg/simd/nasm_lt.sh
@@ -12,7 +12,7 @@
             fi
             ;;
         -f|-fbin|-faout|-faoutb|-fcoff|-felf|-felf64|-fas86| \
-        -fobj|-fwin32|-frdf|-fieee|-fmacho|-fmacho64)
+        -fobj|-fwin32|-fwin64|-frdf|-fieee|-fmacho|-fmacho64)
             # it's a file format specifier for nasm.
             command="$command $1"
             ;;
