Add SSE SIMD implementation of computationally intensive routines.


git-svn-id: svn://svn.code.sf.net/p/tigervnc/code/trunk@3652 3789f03b-4d11-0410-bbf8-ca57d06f2519
diff --git a/common/jpeg/simd/jsimd.h b/common/jpeg/simd/jsimd.h
index 8e78eab..98bcebc 100644
--- a/common/jpeg/simd/jsimd.h
+++ b/common/jpeg/simd/jsimd.h
@@ -13,6 +13,7 @@
 #define JSIMD_NONE    0x00
 #define JSIMD_MMX     0x01
 #define JSIMD_3DNOW   0x02
+#define JSIMD_SSE     0x04
 
 /* Short forms of external names for systems with brain-damaged linkers. */
 
@@ -30,16 +31,22 @@
 #define jsimd_h2v1_merged_upsample_mmx        jSMUpH2V1M
 #define jsimd_convsamp_mmx                    jSConvM
 #define jsimd_convsamp_float_3dnow            jSConvF3D
+#define jsimd_convsamp_float_sse              jSConvFS
 #define jsimd_fdct_islow_mmx                  jSFDMIS
 #define jsimd_fdct_ifast_mmx                  jSFDMIF
 #define jsimd_fdct_float_3dnow                jSFD3DF
+#define jconst_fdct_float_sse                 jSCFDSF
+#define jsimd_fdct_float_sse                  jSFDSF
 #define jsimd_quantize_mmx                    jSQuantM
 #define jsimd_quantize_float_3dnow            jSQuantF3D
+#define jsimd_quantize_float_sse              jSQuantFS
 #define jsimd_idct_2x2_mmx                    jSIDM22
 #define jsimd_idct_4x4_mmx                    jSIDM44
 #define jsimd_idct_islow_mmx                  jSIDMIS
 #define jsimd_idct_ifast_mmx                  jSIDMIF
 #define jsimd_idct_float_3dnow                jSID3DF
+#define jconst_fdct_float_sse                 jSCIDSF
+#define jsimd_idct_float_sse                  jSIDSF
 #endif /* NEED_SHORT_EXTERNAL_NAMES */
 
 /* SIMD Ext: retrieve SIMD/CPU information */
@@ -96,12 +103,19 @@
                                              JDIMENSION start_col,
                                              FAST_FLOAT * workspace));
 
+EXTERN(void) jsimd_convsamp_float_sse JPP((JSAMPARRAY sample_data,
+                                           JDIMENSION start_col,
+                                           FAST_FLOAT * workspace));
+
 /* SIMD Forward DCT */
 EXTERN(void) jsimd_fdct_islow_mmx JPP((DCTELEM * data));
 EXTERN(void) jsimd_fdct_ifast_mmx JPP((DCTELEM * data));
 
 EXTERN(void) jsimd_fdct_float_3dnow JPP((FAST_FLOAT * data));
 
+extern const int jconst_fdct_float_sse[];
+EXTERN(void) jsimd_fdct_float_sse JPP((FAST_FLOAT * data));
+
 /* SIMD Quantization */
 EXTERN(void) jsimd_quantize_mmx JPP((JCOEFPTR coef_block,
                                      DCTELEM * divisors,
@@ -111,6 +125,10 @@
                                              FAST_FLOAT * divisors,
                                              FAST_FLOAT * workspace));
 
+EXTERN(void) jsimd_quantize_float_sse JPP((JCOEFPTR coef_block,
+                                           FAST_FLOAT * divisors,
+                                           FAST_FLOAT * workspace));
+
 /* SIMD Reduced Inverse DCT */
 EXTERN(void) jsimd_idct_2x2_mmx JPP((void * dct_table,
                                      JCOEFPTR coef_block,
@@ -136,3 +154,9 @@
                                          JSAMPARRAY output_buf,
                                          JDIMENSION output_col));
 
+extern const int jconst_idct_float_sse[];
+EXTERN(void) jsimd_idct_float_sse JPP((void * dct_table,
+                                       JCOEFPTR coef_block,
+                                       JSAMPARRAY output_buf,
+                                       JDIMENSION output_col));
+