Add SSE SIMD implementation of computationally intensive routines.
git-svn-id: svn://svn.code.sf.net/p/tigervnc/code/trunk@3652 3789f03b-4d11-0410-bbf8-ca57d06f2519
diff --git a/common/jpeg/simd/jsimd.h b/common/jpeg/simd/jsimd.h
index 8e78eab..98bcebc 100644
--- a/common/jpeg/simd/jsimd.h
+++ b/common/jpeg/simd/jsimd.h
@@ -13,6 +13,7 @@
#define JSIMD_NONE 0x00
#define JSIMD_MMX 0x01
#define JSIMD_3DNOW 0x02
+#define JSIMD_SSE 0x04
/* Short forms of external names for systems with brain-damaged linkers. */
@@ -30,16 +31,22 @@
#define jsimd_h2v1_merged_upsample_mmx jSMUpH2V1M
#define jsimd_convsamp_mmx jSConvM
#define jsimd_convsamp_float_3dnow jSConvF3D
+#define jsimd_convsamp_float_sse jSConvFS
#define jsimd_fdct_islow_mmx jSFDMIS
#define jsimd_fdct_ifast_mmx jSFDMIF
#define jsimd_fdct_float_3dnow jSFD3DF
+#define jconst_fdct_float_sse jSCFDSF
+#define jsimd_fdct_float_sse jSFDSF
#define jsimd_quantize_mmx jSQuantM
#define jsimd_quantize_float_3dnow jSQuantF3D
+#define jsimd_quantize_float_sse jSQuantFS
#define jsimd_idct_2x2_mmx jSIDM22
#define jsimd_idct_4x4_mmx jSIDM44
#define jsimd_idct_islow_mmx jSIDMIS
#define jsimd_idct_ifast_mmx jSIDMIF
#define jsimd_idct_float_3dnow jSID3DF
+#define jconst_fdct_float_sse jSCIDSF
+#define jsimd_idct_float_sse jSIDSF
#endif /* NEED_SHORT_EXTERNAL_NAMES */
/* SIMD Ext: retrieve SIMD/CPU information */
@@ -96,12 +103,19 @@
JDIMENSION start_col,
FAST_FLOAT * workspace));
+EXTERN(void) jsimd_convsamp_float_sse JPP((JSAMPARRAY sample_data,
+ JDIMENSION start_col,
+ FAST_FLOAT * workspace));
+
/* SIMD Forward DCT */
EXTERN(void) jsimd_fdct_islow_mmx JPP((DCTELEM * data));
EXTERN(void) jsimd_fdct_ifast_mmx JPP((DCTELEM * data));
EXTERN(void) jsimd_fdct_float_3dnow JPP((FAST_FLOAT * data));
+extern const int jconst_fdct_float_sse[];
+EXTERN(void) jsimd_fdct_float_sse JPP((FAST_FLOAT * data));
+
/* SIMD Quantization */
EXTERN(void) jsimd_quantize_mmx JPP((JCOEFPTR coef_block,
DCTELEM * divisors,
@@ -111,6 +125,10 @@
FAST_FLOAT * divisors,
FAST_FLOAT * workspace));
+EXTERN(void) jsimd_quantize_float_sse JPP((JCOEFPTR coef_block,
+ FAST_FLOAT * divisors,
+ FAST_FLOAT * workspace));
+
/* SIMD Reduced Inverse DCT */
EXTERN(void) jsimd_idct_2x2_mmx JPP((void * dct_table,
JCOEFPTR coef_block,
@@ -136,3 +154,9 @@
JSAMPARRAY output_buf,
JDIMENSION output_col));
+extern const int jconst_idct_float_sse[];
+EXTERN(void) jsimd_idct_float_sse JPP((void * dct_table,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf,
+ JDIMENSION output_col));
+