blob: 371586e64a56def86d4a6ae31f1e7e6c98e4fcda [file] [log] [blame]
Pierre Ossman82c7f312009-03-09 13:21:27 +00001/*
2 * simd/jsimd.h
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 *
6 * Based on the x86 SIMD extension for IJG JPEG library,
7 * Copyright (C) 1999-2006, MIYASAKA Masaru.
8 *
9 */
10
11/* Bitmask for supported acceleration methods */
12
13#define JSIMD_NONE 0x00
Pierre Ossman3e0e2de2009-03-09 13:25:30 +000014#define JSIMD_MMX 0x01
Pierre Ossman2c2e54b2009-03-09 13:28:10 +000015#define JSIMD_3DNOW 0x02
Pierre Ossman0d37c572009-03-09 13:31:56 +000016#define JSIMD_SSE 0x04
Pierre Ossman74693862009-03-09 13:34:17 +000017#define JSIMD_SSE2 0x08
Pierre Ossman82c7f312009-03-09 13:21:27 +000018
19/* Short forms of external names for systems with brain-damaged linkers. */
20
21#ifdef NEED_SHORT_EXTERNAL_NAMES
Pierre Ossman3e0e2de2009-03-09 13:25:30 +000022#define jpeg_simd_cpu_support jSiCpuSupport
23#define jsimd_rgb_ycc_convert_mmx jSRGBYCCM
24#define jsimd_ycc_rgb_convert_mmx jSYCCRGBM
Pierre Ossman74693862009-03-09 13:34:17 +000025#define jconst_rgb_ycc_convert_sse2 jSCRGBYCCS2
26#define jsimd_rgb_ycc_convert_sse2 jSRGBYCCS2
27#define jconst_ycc_rgb_convert_sse2 jSCYCCRGBS2
28#define jsimd_ycc_rgb_convert_sse2 jSYCCRGBS2
Pierre Ossman3e0e2de2009-03-09 13:25:30 +000029#define jsimd_h2v2_downsample_mmx jSDnH2V2M
30#define jsimd_h2v1_downsample_mmx jSDnH2V1M
Pierre Ossman74693862009-03-09 13:34:17 +000031#define jsimd_h2v2_downsample_sse2 jSDnH2V2S2
32#define jsimd_h2v1_downsample_sse2 jSDnH2V1S2
Pierre Ossman3e0e2de2009-03-09 13:25:30 +000033#define jsimd_h2v2_upsample_mmx jSUpH2V2M
34#define jsimd_h2v1_upsample_mmx jSUpH2V1M
35#define jsimd_h2v2_fancy_upsample_mmx jSFUpH2V2M
36#define jsimd_h2v1_fancy_upsample_mmx jSFUpH2V1M
37#define jsimd_h2v2_merged_upsample_mmx jSMUpH2V2M
38#define jsimd_h2v1_merged_upsample_mmx jSMUpH2V1M
Pierre Ossman74693862009-03-09 13:34:17 +000039#define jsimd_h2v2_upsample_sse2 jSUpH2V2S2
40#define jsimd_h2v1_upsample_sse2 jSUpH2V1S2
41#define jconst_fancy_upsample_sse2 jSCFUpS2
42#define jsimd_h2v2_fancy_upsample_sse2 jSFUpH2V2S2
43#define jsimd_h2v1_fancy_upsample_sse2 jSFUpH2V1S2
44#define jconst_merged_upsample_sse2 jSCMUpS2
45#define jsimd_h2v2_merged_upsample_sse2 jSMUpH2V2S2
46#define jsimd_h2v1_merged_upsample_sse2 jSMUpH2V1S2
Pierre Ossman3e0e2de2009-03-09 13:25:30 +000047#define jsimd_convsamp_mmx jSConvM
Pierre Ossman74693862009-03-09 13:34:17 +000048#define jsimd_convsamp_sse2 jSConvS2
Pierre Ossman2c2e54b2009-03-09 13:28:10 +000049#define jsimd_convsamp_float_3dnow jSConvF3D
Pierre Ossman0d37c572009-03-09 13:31:56 +000050#define jsimd_convsamp_float_sse jSConvFS
Pierre Ossman74693862009-03-09 13:34:17 +000051#define jsimd_convsamp_float_sse2 jSConvFS2
Pierre Ossman3e0e2de2009-03-09 13:25:30 +000052#define jsimd_fdct_islow_mmx jSFDMIS
53#define jsimd_fdct_ifast_mmx jSFDMIF
Pierre Ossman74693862009-03-09 13:34:17 +000054#define jconst_fdct_islow_sse2 jSCFDS2IS
55#define jsimd_fdct_islow_sse2 jSFDS2IS
56#define jconst_fdct_ifast_sse2 jSCFDS2IF
57#define jsimd_fdct_ifast_sse2 jSFDS2IF
Pierre Ossman2c2e54b2009-03-09 13:28:10 +000058#define jsimd_fdct_float_3dnow jSFD3DF
Pierre Ossman0d37c572009-03-09 13:31:56 +000059#define jconst_fdct_float_sse jSCFDSF
60#define jsimd_fdct_float_sse jSFDSF
Pierre Ossman3e0e2de2009-03-09 13:25:30 +000061#define jsimd_quantize_mmx jSQuantM
Pierre Ossman74693862009-03-09 13:34:17 +000062#define jsimd_quantize_sse2 jSQuantS2
Pierre Ossman2c2e54b2009-03-09 13:28:10 +000063#define jsimd_quantize_float_3dnow jSQuantF3D
Pierre Ossman0d37c572009-03-09 13:31:56 +000064#define jsimd_quantize_float_sse jSQuantFS
Pierre Ossman74693862009-03-09 13:34:17 +000065#define jsimd_quantize_float_sse2 jSQuantFS2
Pierre Ossman3e0e2de2009-03-09 13:25:30 +000066#define jsimd_idct_2x2_mmx jSIDM22
67#define jsimd_idct_4x4_mmx jSIDM44
Pierre Ossman74693862009-03-09 13:34:17 +000068#define jconst_idct_red_sse2 jSCIDS2R
69#define jsimd_idct_2x2_sse2 jSIDS222
70#define jsimd_idct_4x4_sse2 jSIDS244
Pierre Ossman3e0e2de2009-03-09 13:25:30 +000071#define jsimd_idct_islow_mmx jSIDMIS
72#define jsimd_idct_ifast_mmx jSIDMIF
Pierre Ossman74693862009-03-09 13:34:17 +000073#define jconst_idct_islow_sse2 jSCIDS2IS
74#define jsimd_idct_islow_sse2 jSIDS2IS
75#define jconst_idct_ifast_sse2 jSCIDS2IF
76#define jsimd_idct_ifast_sse2 jSIDS2IF
Pierre Ossman2c2e54b2009-03-09 13:28:10 +000077#define jsimd_idct_float_3dnow jSID3DF
Pierre Ossman0d37c572009-03-09 13:31:56 +000078#define jconst_fdct_float_sse jSCIDSF
79#define jsimd_idct_float_sse jSIDSF
Pierre Ossman74693862009-03-09 13:34:17 +000080#define jconst_fdct_float_sse2 jSCIDS2F
81#define jsimd_idct_float_sse2 jSIDS2F
Pierre Ossman82c7f312009-03-09 13:21:27 +000082#endif /* NEED_SHORT_EXTERNAL_NAMES */
83
84/* SIMD Ext: retrieve SIMD/CPU information */
85EXTERN(unsigned int) jpeg_simd_cpu_support JPP((void));
86
Pierre Ossman3e0e2de2009-03-09 13:25:30 +000087/* SIMD Color Space Conversion */
88EXTERN(void) jsimd_rgb_ycc_convert_mmx
89 JPP((JDIMENSION img_width,
90 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
91 JDIMENSION output_row, int num_rows));
92EXTERN(void) jsimd_ycc_rgb_convert_mmx
93 JPP((JDIMENSION out_width,
94 JSAMPIMAGE input_buf, JDIMENSION input_row,
95 JSAMPARRAY output_buf, int num_rows));
96
Pierre Ossman74693862009-03-09 13:34:17 +000097extern const int jconst_rgb_ycc_convert_sse2[];
98EXTERN(void) jsimd_rgb_ycc_convert_sse2
99 JPP((JDIMENSION img_width,
100 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
101 JDIMENSION output_row, int num_rows));
102extern const int jconst_ycc_rgb_convert_sse2[];
103EXTERN(void) jsimd_ycc_rgb_convert_sse2
104 JPP((JDIMENSION out_width,
105 JSAMPIMAGE input_buf, JDIMENSION input_row,
106 JSAMPARRAY output_buf, int num_rows));
107
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000108/* SIMD Downsample */
109EXTERN(void) jsimd_h2v2_downsample_mmx
110 JPP((JDIMENSION image_width, int max_v_samp_factor,
111 JDIMENSION v_samp_factor, JDIMENSION width_blocks,
112 JSAMPARRAY input_data, JSAMPARRAY output_data));
113EXTERN(void) jsimd_h2v1_downsample_mmx
114 JPP((JDIMENSION image_width, int max_v_samp_factor,
115 JDIMENSION v_samp_factor, JDIMENSION width_blocks,
116 JSAMPARRAY input_data, JSAMPARRAY output_data));
117
Pierre Ossman74693862009-03-09 13:34:17 +0000118EXTERN(void) jsimd_h2v2_downsample_sse2
119 JPP((JDIMENSION image_width, int max_v_samp_factor,
120 JDIMENSION v_samp_factor, JDIMENSION width_blocks,
121 JSAMPARRAY input_data, JSAMPARRAY output_data));
122EXTERN(void) jsimd_h2v1_downsample_sse2
123 JPP((JDIMENSION image_width, int max_v_samp_factor,
124 JDIMENSION v_samp_factor, JDIMENSION width_blocks,
125 JSAMPARRAY input_data, JSAMPARRAY output_data));
126
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000127/* SIMD Upsample */
128EXTERN(void) jsimd_h2v2_upsample_mmx
129 JPP((int max_v_samp_factor, JDIMENSION output_width,
130 JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
131EXTERN(void) jsimd_h2v1_upsample_mmx
132 JPP((int max_v_samp_factor, JDIMENSION output_width,
133 JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
134
135EXTERN(void) jsimd_h2v2_fancy_upsample_mmx
136 JPP((int max_v_samp_factor, JDIMENSION downsampled_width,
137 JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
138EXTERN(void) jsimd_h2v1_fancy_upsample_mmx
139 JPP((int max_v_samp_factor, JDIMENSION downsampled_width,
140 JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
141
142EXTERN(void) jsimd_h2v2_merged_upsample_mmx
143 JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
144 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf));
145EXTERN(void) jsimd_h2v1_merged_upsample_mmx
146 JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
147 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf));
148
Pierre Ossman74693862009-03-09 13:34:17 +0000149EXTERN(void) jsimd_h2v2_upsample_sse2
150 JPP((int max_v_samp_factor, JDIMENSION output_width,
151 JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
152EXTERN(void) jsimd_h2v1_upsample_sse2
153 JPP((int max_v_samp_factor, JDIMENSION output_width,
154 JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
155
156extern const int jconst_fancy_upsample_sse2[];
157EXTERN(void) jsimd_h2v2_fancy_upsample_sse2
158 JPP((int max_v_samp_factor, JDIMENSION downsampled_width,
159 JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
160EXTERN(void) jsimd_h2v1_fancy_upsample_sse2
161 JPP((int max_v_samp_factor, JDIMENSION downsampled_width,
162 JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
163
164extern const int jconst_merged_upsample_sse2[];
165EXTERN(void) jsimd_h2v2_merged_upsample_sse2
166 JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
167 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf));
168EXTERN(void) jsimd_h2v1_merged_upsample_sse2
169 JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
170 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf));
171
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000172/* SIMD Sample Conversion */
173EXTERN(void) jsimd_convsamp_mmx JPP((JSAMPARRAY sample_data,
174 JDIMENSION start_col,
175 DCTELEM * workspace));
176
Pierre Ossman74693862009-03-09 13:34:17 +0000177EXTERN(void) jsimd_convsamp_sse2 JPP((JSAMPARRAY sample_data,
178 JDIMENSION start_col,
179 DCTELEM * workspace));
180
Pierre Ossman2c2e54b2009-03-09 13:28:10 +0000181EXTERN(void) jsimd_convsamp_float_3dnow JPP((JSAMPARRAY sample_data,
182 JDIMENSION start_col,
183 FAST_FLOAT * workspace));
184
Pierre Ossman0d37c572009-03-09 13:31:56 +0000185EXTERN(void) jsimd_convsamp_float_sse JPP((JSAMPARRAY sample_data,
186 JDIMENSION start_col,
187 FAST_FLOAT * workspace));
188
Pierre Ossman74693862009-03-09 13:34:17 +0000189EXTERN(void) jsimd_convsamp_float_sse2 JPP((JSAMPARRAY sample_data,
190 JDIMENSION start_col,
191 FAST_FLOAT * workspace));
192
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000193/* SIMD Forward DCT */
194EXTERN(void) jsimd_fdct_islow_mmx JPP((DCTELEM * data));
195EXTERN(void) jsimd_fdct_ifast_mmx JPP((DCTELEM * data));
196
Pierre Ossman74693862009-03-09 13:34:17 +0000197extern const int jconst_fdct_ifast_sse2[];
198EXTERN(void) jsimd_fdct_islow_sse2 JPP((DCTELEM * data));
199extern const int jconst_fdct_islow_sse2[];
200EXTERN(void) jsimd_fdct_ifast_sse2 JPP((DCTELEM * data));
201
Pierre Ossman2c2e54b2009-03-09 13:28:10 +0000202EXTERN(void) jsimd_fdct_float_3dnow JPP((FAST_FLOAT * data));
203
Pierre Ossman0d37c572009-03-09 13:31:56 +0000204extern const int jconst_fdct_float_sse[];
205EXTERN(void) jsimd_fdct_float_sse JPP((FAST_FLOAT * data));
206
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000207/* SIMD Quantization */
208EXTERN(void) jsimd_quantize_mmx JPP((JCOEFPTR coef_block,
209 DCTELEM * divisors,
210 DCTELEM * workspace));
211
Pierre Ossman74693862009-03-09 13:34:17 +0000212EXTERN(void) jsimd_quantize_sse2 JPP((JCOEFPTR coef_block,
213 DCTELEM * divisors,
214 DCTELEM * workspace));
215
Pierre Ossman2c2e54b2009-03-09 13:28:10 +0000216EXTERN(void) jsimd_quantize_float_3dnow JPP((JCOEFPTR coef_block,
217 FAST_FLOAT * divisors,
218 FAST_FLOAT * workspace));
219
Pierre Ossman0d37c572009-03-09 13:31:56 +0000220EXTERN(void) jsimd_quantize_float_sse JPP((JCOEFPTR coef_block,
221 FAST_FLOAT * divisors,
222 FAST_FLOAT * workspace));
223
Pierre Ossman74693862009-03-09 13:34:17 +0000224EXTERN(void) jsimd_quantize_float_sse2 JPP((JCOEFPTR coef_block,
225 FAST_FLOAT * divisors,
226 FAST_FLOAT * workspace));
227
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000228/* SIMD Reduced Inverse DCT */
229EXTERN(void) jsimd_idct_2x2_mmx JPP((void * dct_table,
230 JCOEFPTR coef_block,
231 JSAMPARRAY output_buf,
232 JDIMENSION output_col));
233EXTERN(void) jsimd_idct_4x4_mmx JPP((void * dct_table,
234 JCOEFPTR coef_block,
235 JSAMPARRAY output_buf,
236 JDIMENSION output_col));
237
Pierre Ossman74693862009-03-09 13:34:17 +0000238extern const int jconst_idct_red_sse2[];
239EXTERN(void) jsimd_idct_2x2_sse2 JPP((void * dct_table,
240 JCOEFPTR coef_block,
241 JSAMPARRAY output_buf,
242 JDIMENSION output_col));
243EXTERN(void) jsimd_idct_4x4_sse2 JPP((void * dct_table,
244 JCOEFPTR coef_block,
245 JSAMPARRAY output_buf,
246 JDIMENSION output_col));
247
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000248/* SIMD Inverse DCT */
249EXTERN(void) jsimd_idct_islow_mmx JPP((void * dct_table,
250 JCOEFPTR coef_block,
251 JSAMPARRAY output_buf,
252 JDIMENSION output_col));
253EXTERN(void) jsimd_idct_ifast_mmx JPP((void * dct_table,
254 JCOEFPTR coef_block,
255 JSAMPARRAY output_buf,
256 JDIMENSION output_col));
257
Pierre Ossman74693862009-03-09 13:34:17 +0000258extern const int jconst_idct_islow_sse2[];
259EXTERN(void) jsimd_idct_islow_sse2 JPP((void * dct_table,
260 JCOEFPTR coef_block,
261 JSAMPARRAY output_buf,
262 JDIMENSION output_col));
263extern const int jconst_idct_ifast_sse2[];
264EXTERN(void) jsimd_idct_ifast_sse2 JPP((void * dct_table,
265 JCOEFPTR coef_block,
266 JSAMPARRAY output_buf,
267 JDIMENSION output_col));
268
Pierre Ossman2c2e54b2009-03-09 13:28:10 +0000269EXTERN(void) jsimd_idct_float_3dnow JPP((void * dct_table,
270 JCOEFPTR coef_block,
271 JSAMPARRAY output_buf,
272 JDIMENSION output_col));
273
Pierre Ossman0d37c572009-03-09 13:31:56 +0000274extern const int jconst_idct_float_sse[];
275EXTERN(void) jsimd_idct_float_sse JPP((void * dct_table,
276 JCOEFPTR coef_block,
277 JSAMPARRAY output_buf,
278 JDIMENSION output_col));
279
Pierre Ossman74693862009-03-09 13:34:17 +0000280extern const int jconst_idct_float_sse2[];
281EXTERN(void) jsimd_idct_float_sse2 JPP((void * dct_table,
282 JCOEFPTR coef_block,
283 JSAMPARRAY output_buf,
284 JDIMENSION output_col));
285