Add MMX SIMD implementation of computationally intensive routines.


git-svn-id: svn://svn.code.sf.net/p/tigervnc/code/trunk@3648 3789f03b-4d11-0410-bbf8-ca57d06f2519
diff --git a/common/jpeg/jsimd.c b/common/jpeg/jsimd.c
index c92ae60..eb1f37b 100644
--- a/common/jpeg/jsimd.c
+++ b/common/jpeg/jsimd.c
@@ -43,6 +43,17 @@
 {
   init_simd();
 
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
+    return 0;
+
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
   return 0;
 }
 
@@ -51,6 +62,17 @@
 {
   init_simd();
 
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
+    return 0;
+
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
   return 0;
 }
 
@@ -59,6 +81,11 @@
                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
                        JDIMENSION output_row, int num_rows)
 {
+#ifdef WITH_SIMD
+  if (simd_support & JSIMD_MMX)
+    jsimd_rgb_ycc_convert_mmx(cinfo->image_width, input_buf,
+        output_buf, output_row, num_rows);
+#endif
 }
 
 GLOBAL(void)
@@ -66,6 +93,11 @@
                        JSAMPIMAGE input_buf, JDIMENSION input_row,
                        JSAMPARRAY output_buf, int num_rows)
 {
+#ifdef WITH_SIMD
+  if (simd_support & JSIMD_MMX)
+    jsimd_ycc_rgb_convert_mmx(cinfo->output_width, input_buf,
+        input_row, output_buf, num_rows);
+#endif
 }
 
 GLOBAL(int)
@@ -73,6 +105,15 @@
 {
   init_simd();
 
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
   return 0;
 }
 
@@ -81,6 +122,15 @@
 {
   init_simd();
 
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
   return 0;
 }
 
@@ -88,12 +138,24 @@
 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
                        JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
+#ifdef WITH_SIMD
+  if (simd_support & JSIMD_MMX)
+    jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
+        compptr->v_samp_factor, compptr->width_in_blocks,
+        input_data, output_data);
+#endif
 }
 
 GLOBAL(void)
 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
                        JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
+#ifdef WITH_SIMD
+  if (simd_support & JSIMD_MMX)
+    jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
+        compptr->v_samp_factor, compptr->width_in_blocks,
+        input_data, output_data);
+#endif
 }
 
 GLOBAL(int)
@@ -101,6 +163,15 @@
 {
   init_simd();
 
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
   return 0;
 }
 
@@ -109,6 +180,15 @@
 {
   init_simd();
 
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
   return 0;
 }
 
@@ -118,6 +198,11 @@
                      JSAMPARRAY input_data,
                      JSAMPARRAY * output_data_ptr)
 {
+#ifdef WITH_SIMD
+  if (simd_support & JSIMD_MMX)
+    jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor,
+        cinfo->output_width, input_data, output_data_ptr);
+#endif
 }
 
 GLOBAL(void)
@@ -126,6 +211,11 @@
                      JSAMPARRAY input_data,
                      JSAMPARRAY * output_data_ptr)
 {
+#ifdef WITH_SIMD
+  if (simd_support & JSIMD_MMX)
+    jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor,
+        cinfo->output_width, input_data, output_data_ptr);
+#endif
 }
 
 GLOBAL(int)
@@ -133,6 +223,15 @@
 {
   init_simd();
 
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
   return 0;
 }
 
@@ -141,6 +240,15 @@
 {
   init_simd();
 
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
   return 0;
 }
 
@@ -150,6 +258,11 @@
                            JSAMPARRAY input_data,
                            JSAMPARRAY * output_data_ptr)
 {
+#ifdef WITH_SIMD
+  if (simd_support & JSIMD_MMX)
+    jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor,
+        compptr->downsampled_width, input_data, output_data_ptr);
+#endif
 }
 
 GLOBAL(void)
@@ -158,6 +271,11 @@
                            JSAMPARRAY input_data,
                            JSAMPARRAY * output_data_ptr)
 {
+#ifdef WITH_SIMD
+  if (simd_support & JSIMD_MMX)
+    jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor,
+        compptr->downsampled_width, input_data, output_data_ptr);
+#endif
 }
 
 GLOBAL(int)
@@ -165,6 +283,15 @@
 {
   init_simd();
 
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
   return 0;
 }
 
@@ -173,6 +300,15 @@
 {
   init_simd();
 
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
   return 0;
 }
 
@@ -182,6 +318,11 @@
                             JDIMENSION in_row_group_ctr,
                             JSAMPARRAY output_buf)
 {
+#ifdef WITH_SIMD
+  if (simd_support & JSIMD_MMX)
+    jsimd_h2v2_merged_upsample_mmx(cinfo->output_width, input_buf,
+        in_row_group_ctr, output_buf);
+#endif
 }
 
 GLOBAL(void)
@@ -190,6 +331,11 @@
                             JDIMENSION in_row_group_ctr,
                             JSAMPARRAY output_buf)
 {
+#ifdef WITH_SIMD
+  if (simd_support & JSIMD_MMX)
+    jsimd_h2v1_merged_upsample_mmx(cinfo->output_width, input_buf,
+        in_row_group_ctr, output_buf);
+#endif
 }
 
 GLOBAL(int)
@@ -197,6 +343,19 @@
 {
   init_simd();
 
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(DCTELEM) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
   return 0;
 }
 
@@ -212,6 +371,10 @@
 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
                 DCTELEM * workspace)
 {
+#ifdef WITH_SIMD
+  if (simd_support & JSIMD_MMX)
+    jsimd_convsamp_mmx(sample_data, start_col, workspace);
+#endif
 }
 
 GLOBAL(void)
@@ -225,6 +388,15 @@
 {
   init_simd();
 
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(DCTELEM) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
   return 0;
 }
 
@@ -233,6 +405,15 @@
 {
   init_simd();
 
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(DCTELEM) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
   return 0;
 }
 
@@ -247,11 +428,19 @@
 GLOBAL(void)
 jsimd_fdct_islow (DCTELEM * data)
 {
+#ifdef WITH_SIMD
+  if (simd_support & JSIMD_MMX)
+    jsimd_fdct_islow_mmx(data);
+#endif
 }
 
 GLOBAL(void)
 jsimd_fdct_ifast (DCTELEM * data)
 {
+#ifdef WITH_SIMD
+  if (simd_support & JSIMD_MMX)
+    jsimd_fdct_ifast_mmx(data);
+#endif
 }
 
 GLOBAL(void)
@@ -264,6 +453,17 @@
 {
   init_simd();
 
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (sizeof(DCTELEM) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
   return 0;
 }
 
@@ -279,6 +479,10 @@
 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
                 DCTELEM * workspace)
 {
+#ifdef WITH_SIMD
+  if (simd_support & JSIMD_MMX)
+    jsimd_quantize_mmx(coef_block, divisors, workspace);
+#endif
 }
 
 GLOBAL(void)
@@ -292,6 +496,21 @@
 {
   init_simd();
 
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(ISLOW_MULT_TYPE) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
   return 0;
 }
 
@@ -300,6 +519,21 @@
 {
   init_simd();
 
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(ISLOW_MULT_TYPE) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
   return 0;
 }
 
@@ -308,6 +542,10 @@
                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
                 JDIMENSION output_col)
 {
+#if WITH_SIMD
+  if (simd_support & JSIMD_MMX)
+    jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col);
+#endif
 }
 
 GLOBAL(void)
@@ -315,6 +553,10 @@
                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
                 JDIMENSION output_col)
 {
+#if WITH_SIMD
+  if (simd_support & JSIMD_MMX)
+    jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col);
+#endif
 }
 
 GLOBAL(int)
@@ -322,6 +564,21 @@
 {
   init_simd();
 
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(ISLOW_MULT_TYPE) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
   return 0;
 }
 
@@ -330,6 +587,23 @@
 {
   init_simd();
 
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(IFAST_MULT_TYPE) != 2)
+    return 0;
+  if (IFAST_SCALE_BITS != 2)
+    return 0;
+
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
   return 0;
 }
 
@@ -346,6 +620,10 @@
                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
                 JDIMENSION output_col)
 {
+#if WITH_SIMD
+  if (simd_support & JSIMD_MMX)
+    jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf, output_col);
+#endif
 }
 
 GLOBAL(void)
@@ -353,6 +631,10 @@
                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
                 JDIMENSION output_col)
 {
+#if WITH_SIMD
+  if (simd_support & JSIMD_MMX)
+    jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf, output_col);
+#endif
 }
 
 GLOBAL(void)