blob: 7f81ddad34870016ee51ed211095da3d7a6dba46 [file] [log] [blame]
Pierre Ossman9ad52342009-03-09 13:15:56 +00001/*
2 * jsimd.c
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 *
6 * Based on the x86 SIMD extension for IJG JPEG library,
7 * Copyright (C) 1999-2006, MIYASAKA Masaru.
8 *
9 * This file contains the interface between the "normal" portions
10 * of the library and the SIMD implementations.
11 */
12
13#define JPEG_INTERNALS
14#include "jinclude.h"
15#include "jpeglib.h"
Pierre Ossman82c7f312009-03-09 13:21:27 +000016#include "jsimd.h"
Pierre Ossman9ad52342009-03-09 13:15:56 +000017#include "jdct.h"
Pierre Ossman82c7f312009-03-09 13:21:27 +000018#include "jsimddct.h"
19#include "simd/jsimd.h"
Pierre Ossman9ad52342009-03-09 13:15:56 +000020
Pierre Ossman0d37c572009-03-09 13:31:56 +000021/*
22 * In the PIC cases, we have no guarantee that constants will keep
23 * their alignment. This macro allows us to verify it at runtime.
24 */
25#ifdef WITH_SIMD
26#define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0)
27#else
28#define IS_ALIGNED(ptr, order) (0)
29#endif
30
31#define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
32
Pierre Ossman9ad52342009-03-09 13:15:56 +000033static unsigned int simd_support = ~0;
34
35/*
36 * Check what SIMD accelerations are supported.
37 *
38 * FIXME: This code is racy under a multi-threaded environment.
39 */
40LOCAL(void)
41init_simd (void)
42{
DRCbec58d82009-04-03 11:27:17 +000043#ifdef WITH_SIMD
44 char *env = NULL;
45#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +000046 if (simd_support != ~0)
47 return;
48
Pierre Ossman82c7f312009-03-09 13:21:27 +000049#ifdef WITH_SIMD
50 simd_support = jpeg_simd_cpu_support();
DRCbec58d82009-04-03 11:27:17 +000051 if((env=getenv("JSIMD_FORCEMMX"))!=NULL && !strcmp(env, "1"))
52 simd_support = JSIMD_MMX;
53 else if((env=getenv("JSIMD_FORCESSE2"))!=NULL && !strcmp(env, "1"))
54 simd_support = JSIMD_SSE2;
Pierre Ossman82c7f312009-03-09 13:21:27 +000055#else
Pierre Ossman9ad52342009-03-09 13:15:56 +000056 simd_support = JSIMD_NONE;
Pierre Ossman82c7f312009-03-09 13:21:27 +000057#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +000058}
59
60GLOBAL(int)
61jsimd_can_rgb_ycc (void)
62{
63 init_simd();
64
Pierre Ossman3e0e2de2009-03-09 13:25:30 +000065 /* The code is optimised for these values only */
66 if (BITS_IN_JSAMPLE != 8)
67 return 0;
68 if (sizeof(JDIMENSION) != 4)
69 return 0;
70 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
71 return 0;
72
Pierre Ossman74693862009-03-09 13:34:17 +000073 if ((simd_support & JSIMD_SSE2) &&
74 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
75 return 1;
Pierre Ossman3e0e2de2009-03-09 13:25:30 +000076 if (simd_support & JSIMD_MMX)
77 return 1;
78
Pierre Ossman9ad52342009-03-09 13:15:56 +000079 return 0;
80}
81
82GLOBAL(int)
83jsimd_can_ycc_rgb (void)
84{
85 init_simd();
86
Pierre Ossman3e0e2de2009-03-09 13:25:30 +000087 /* The code is optimised for these values only */
88 if (BITS_IN_JSAMPLE != 8)
89 return 0;
90 if (sizeof(JDIMENSION) != 4)
91 return 0;
92 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
93 return 0;
94
Pierre Ossman74693862009-03-09 13:34:17 +000095 if ((simd_support & JSIMD_SSE2) &&
96 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
97 return 1;
Pierre Ossman3e0e2de2009-03-09 13:25:30 +000098 if (simd_support & JSIMD_MMX)
99 return 1;
100
Pierre Ossman9ad52342009-03-09 13:15:56 +0000101 return 0;
102}
103
104GLOBAL(void)
105jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
106 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
107 JDIMENSION output_row, int num_rows)
108{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000109#ifdef WITH_SIMD
Pierre Ossman74693862009-03-09 13:34:17 +0000110 if ((simd_support & JSIMD_SSE2) &&
111 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
112 jsimd_rgb_ycc_convert_sse2(cinfo->image_width, input_buf,
113 output_buf, output_row, num_rows);
114 else if (simd_support & JSIMD_MMX)
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000115 jsimd_rgb_ycc_convert_mmx(cinfo->image_width, input_buf,
116 output_buf, output_row, num_rows);
117#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000118}
119
120GLOBAL(void)
121jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
122 JSAMPIMAGE input_buf, JDIMENSION input_row,
123 JSAMPARRAY output_buf, int num_rows)
124{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000125#ifdef WITH_SIMD
Pierre Ossman74693862009-03-09 13:34:17 +0000126 if ((simd_support & JSIMD_SSE2) &&
127 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
128 jsimd_ycc_rgb_convert_sse2(cinfo->output_width, input_buf,
129 input_row, output_buf, num_rows);
130 else if (simd_support & JSIMD_MMX)
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000131 jsimd_ycc_rgb_convert_mmx(cinfo->output_width, input_buf,
132 input_row, output_buf, num_rows);
133#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000134}
135
136GLOBAL(int)
137jsimd_can_h2v2_downsample (void)
138{
139 init_simd();
140
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000141 /* The code is optimised for these values only */
142 if (BITS_IN_JSAMPLE != 8)
143 return 0;
144 if (sizeof(JDIMENSION) != 4)
145 return 0;
146
Pierre Ossman74693862009-03-09 13:34:17 +0000147 if (simd_support & JSIMD_SSE2)
148 return 1;
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000149 if (simd_support & JSIMD_MMX)
150 return 1;
151
Pierre Ossman9ad52342009-03-09 13:15:56 +0000152 return 0;
153}
154
155GLOBAL(int)
156jsimd_can_h2v1_downsample (void)
157{
158 init_simd();
159
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000160 /* The code is optimised for these values only */
161 if (BITS_IN_JSAMPLE != 8)
162 return 0;
163 if (sizeof(JDIMENSION) != 4)
164 return 0;
165
Pierre Ossman74693862009-03-09 13:34:17 +0000166 if (simd_support & JSIMD_SSE2)
167 return 1;
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000168 if (simd_support & JSIMD_MMX)
169 return 1;
170
Pierre Ossman9ad52342009-03-09 13:15:56 +0000171 return 0;
172}
173
174GLOBAL(void)
175jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
176 JSAMPARRAY input_data, JSAMPARRAY output_data)
177{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000178#ifdef WITH_SIMD
Pierre Ossman74693862009-03-09 13:34:17 +0000179 if (simd_support & JSIMD_SSE2)
180 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
181 compptr->v_samp_factor, compptr->width_in_blocks,
182 input_data, output_data);
183 else if (simd_support & JSIMD_MMX)
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000184 jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
185 compptr->v_samp_factor, compptr->width_in_blocks,
186 input_data, output_data);
187#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000188}
189
190GLOBAL(void)
191jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
192 JSAMPARRAY input_data, JSAMPARRAY output_data)
193{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000194#ifdef WITH_SIMD
Pierre Ossman74693862009-03-09 13:34:17 +0000195 if (simd_support & JSIMD_SSE2)
196 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
197 compptr->v_samp_factor, compptr->width_in_blocks,
198 input_data, output_data);
199 else if (simd_support & JSIMD_MMX)
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000200 jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
201 compptr->v_samp_factor, compptr->width_in_blocks,
202 input_data, output_data);
203#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000204}
205
206GLOBAL(int)
207jsimd_can_h2v2_upsample (void)
208{
209 init_simd();
210
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000211 /* The code is optimised for these values only */
212 if (BITS_IN_JSAMPLE != 8)
213 return 0;
214 if (sizeof(JDIMENSION) != 4)
215 return 0;
216
Pierre Ossman74693862009-03-09 13:34:17 +0000217 if (simd_support & JSIMD_SSE2)
218 return 1;
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000219 if (simd_support & JSIMD_MMX)
220 return 1;
221
Pierre Ossman9ad52342009-03-09 13:15:56 +0000222 return 0;
223}
224
225GLOBAL(int)
226jsimd_can_h2v1_upsample (void)
227{
228 init_simd();
229
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000230 /* The code is optimised for these values only */
231 if (BITS_IN_JSAMPLE != 8)
232 return 0;
233 if (sizeof(JDIMENSION) != 4)
234 return 0;
235
Pierre Ossman74693862009-03-09 13:34:17 +0000236 if (simd_support & JSIMD_SSE2)
237 return 1;
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000238 if (simd_support & JSIMD_MMX)
239 return 1;
240
Pierre Ossman9ad52342009-03-09 13:15:56 +0000241 return 0;
242}
243
244GLOBAL(void)
245jsimd_h2v2_upsample (j_decompress_ptr cinfo,
246 jpeg_component_info * compptr,
247 JSAMPARRAY input_data,
248 JSAMPARRAY * output_data_ptr)
249{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000250#ifdef WITH_SIMD
Pierre Ossman74693862009-03-09 13:34:17 +0000251 if (simd_support & JSIMD_SSE2)
252 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor,
253 cinfo->output_width, input_data, output_data_ptr);
254 else if (simd_support & JSIMD_MMX)
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000255 jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor,
256 cinfo->output_width, input_data, output_data_ptr);
257#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000258}
259
260GLOBAL(void)
261jsimd_h2v1_upsample (j_decompress_ptr cinfo,
262 jpeg_component_info * compptr,
263 JSAMPARRAY input_data,
264 JSAMPARRAY * output_data_ptr)
265{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000266#ifdef WITH_SIMD
Pierre Ossman74693862009-03-09 13:34:17 +0000267 if (simd_support & JSIMD_SSE2)
268 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor,
269 cinfo->output_width, input_data, output_data_ptr);
270 else if (simd_support & JSIMD_MMX)
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000271 jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor,
272 cinfo->output_width, input_data, output_data_ptr);
273#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000274}
275
276GLOBAL(int)
277jsimd_can_h2v2_fancy_upsample (void)
278{
279 init_simd();
280
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000281 /* The code is optimised for these values only */
282 if (BITS_IN_JSAMPLE != 8)
283 return 0;
284 if (sizeof(JDIMENSION) != 4)
285 return 0;
286
Pierre Ossman74693862009-03-09 13:34:17 +0000287 if ((simd_support & JSIMD_SSE2) &&
288 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
289 return 1;
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000290 if (simd_support & JSIMD_MMX)
291 return 1;
292
Pierre Ossman9ad52342009-03-09 13:15:56 +0000293 return 0;
294}
295
296GLOBAL(int)
297jsimd_can_h2v1_fancy_upsample (void)
298{
299 init_simd();
300
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000301 /* The code is optimised for these values only */
302 if (BITS_IN_JSAMPLE != 8)
303 return 0;
304 if (sizeof(JDIMENSION) != 4)
305 return 0;
306
Pierre Ossman74693862009-03-09 13:34:17 +0000307 if ((simd_support & JSIMD_SSE2) &&
308 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
309 return 1;
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000310 if (simd_support & JSIMD_MMX)
311 return 1;
312
Pierre Ossman9ad52342009-03-09 13:15:56 +0000313 return 0;
314}
315
316GLOBAL(void)
317jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
318 jpeg_component_info * compptr,
319 JSAMPARRAY input_data,
320 JSAMPARRAY * output_data_ptr)
321{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000322#ifdef WITH_SIMD
Pierre Ossman74693862009-03-09 13:34:17 +0000323 if ((simd_support & JSIMD_SSE2) &&
324 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
325 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
326 compptr->downsampled_width, input_data, output_data_ptr);
327 else if (simd_support & JSIMD_MMX)
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000328 jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor,
329 compptr->downsampled_width, input_data, output_data_ptr);
330#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000331}
332
333GLOBAL(void)
334jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
335 jpeg_component_info * compptr,
336 JSAMPARRAY input_data,
337 JSAMPARRAY * output_data_ptr)
338{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000339#ifdef WITH_SIMD
Pierre Ossman74693862009-03-09 13:34:17 +0000340 if ((simd_support & JSIMD_SSE2) &&
341 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
342 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
343 compptr->downsampled_width, input_data, output_data_ptr);
344 else if (simd_support & JSIMD_MMX)
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000345 jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor,
346 compptr->downsampled_width, input_data, output_data_ptr);
347#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000348}
349
350GLOBAL(int)
351jsimd_can_h2v2_merged_upsample (void)
352{
353 init_simd();
354
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000355 /* The code is optimised for these values only */
356 if (BITS_IN_JSAMPLE != 8)
357 return 0;
358 if (sizeof(JDIMENSION) != 4)
359 return 0;
360
Pierre Ossman74693862009-03-09 13:34:17 +0000361 if ((simd_support & JSIMD_SSE2) &&
362 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
363 return 1;
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000364 if (simd_support & JSIMD_MMX)
365 return 1;
366
Pierre Ossman9ad52342009-03-09 13:15:56 +0000367 return 0;
368}
369
370GLOBAL(int)
371jsimd_can_h2v1_merged_upsample (void)
372{
373 init_simd();
374
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000375 /* The code is optimised for these values only */
376 if (BITS_IN_JSAMPLE != 8)
377 return 0;
378 if (sizeof(JDIMENSION) != 4)
379 return 0;
380
Pierre Ossman74693862009-03-09 13:34:17 +0000381 if ((simd_support & JSIMD_SSE2) &&
382 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
383 return 1;
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000384 if (simd_support & JSIMD_MMX)
385 return 1;
386
Pierre Ossman9ad52342009-03-09 13:15:56 +0000387 return 0;
388}
389
390GLOBAL(void)
391jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
392 JSAMPIMAGE input_buf,
393 JDIMENSION in_row_group_ctr,
394 JSAMPARRAY output_buf)
395{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000396#ifdef WITH_SIMD
Pierre Ossman74693862009-03-09 13:34:17 +0000397 if ((simd_support & JSIMD_SSE2) &&
398 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
399 jsimd_h2v2_merged_upsample_sse2(cinfo->output_width, input_buf,
400 in_row_group_ctr, output_buf);
401 else if (simd_support & JSIMD_MMX)
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000402 jsimd_h2v2_merged_upsample_mmx(cinfo->output_width, input_buf,
403 in_row_group_ctr, output_buf);
404#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000405}
406
407GLOBAL(void)
408jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
409 JSAMPIMAGE input_buf,
410 JDIMENSION in_row_group_ctr,
411 JSAMPARRAY output_buf)
412{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000413#ifdef WITH_SIMD
Pierre Ossman74693862009-03-09 13:34:17 +0000414 if ((simd_support & JSIMD_SSE2) &&
415 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
416 jsimd_h2v1_merged_upsample_sse2(cinfo->output_width, input_buf,
417 in_row_group_ctr, output_buf);
418 else if (simd_support & JSIMD_MMX)
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000419 jsimd_h2v1_merged_upsample_mmx(cinfo->output_width, input_buf,
420 in_row_group_ctr, output_buf);
421#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000422}
423
424GLOBAL(int)
425jsimd_can_convsamp (void)
426{
427 init_simd();
428
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000429 /* The code is optimised for these values only */
430 if (DCTSIZE != 8)
431 return 0;
432 if (BITS_IN_JSAMPLE != 8)
433 return 0;
434 if (sizeof(JDIMENSION) != 4)
435 return 0;
436 if (sizeof(DCTELEM) != 2)
437 return 0;
438
Pierre Ossman74693862009-03-09 13:34:17 +0000439 if (simd_support & JSIMD_SSE2)
440 return 1;
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000441 if (simd_support & JSIMD_MMX)
442 return 1;
443
Pierre Ossman9ad52342009-03-09 13:15:56 +0000444 return 0;
445}
446
447GLOBAL(int)
448jsimd_can_convsamp_float (void)
449{
450 init_simd();
451
Pierre Ossman2c2e54b2009-03-09 13:28:10 +0000452 /* The code is optimised for these values only */
453 if (DCTSIZE != 8)
454 return 0;
455 if (BITS_IN_JSAMPLE != 8)
456 return 0;
457 if (sizeof(JDIMENSION) != 4)
458 return 0;
459 if (sizeof(FAST_FLOAT) != 4)
460 return 0;
461
Pierre Ossman74693862009-03-09 13:34:17 +0000462 if (simd_support & JSIMD_SSE2)
463 return 1;
Pierre Ossman0d37c572009-03-09 13:31:56 +0000464 if (simd_support & JSIMD_SSE)
465 return 1;
Pierre Ossman2c2e54b2009-03-09 13:28:10 +0000466 if (simd_support & JSIMD_3DNOW)
467 return 1;
468
Pierre Ossman9ad52342009-03-09 13:15:56 +0000469 return 0;
470}
471
472GLOBAL(void)
473jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
474 DCTELEM * workspace)
475{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000476#ifdef WITH_SIMD
Pierre Ossman74693862009-03-09 13:34:17 +0000477 if (simd_support & JSIMD_SSE2)
478 jsimd_convsamp_sse2(sample_data, start_col, workspace);
479 else if (simd_support & JSIMD_MMX)
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000480 jsimd_convsamp_mmx(sample_data, start_col, workspace);
481#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000482}
483
484GLOBAL(void)
485jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
486 FAST_FLOAT * workspace)
487{
Pierre Ossman2c2e54b2009-03-09 13:28:10 +0000488#ifdef WITH_SIMD
Pierre Ossman74693862009-03-09 13:34:17 +0000489 if (simd_support & JSIMD_SSE2)
490 jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
491 else if (simd_support & JSIMD_SSE)
Pierre Ossman0d37c572009-03-09 13:31:56 +0000492 jsimd_convsamp_float_sse(sample_data, start_col, workspace);
493 else if (simd_support & JSIMD_3DNOW)
Pierre Ossman2c2e54b2009-03-09 13:28:10 +0000494 jsimd_convsamp_float_3dnow(sample_data, start_col, workspace);
495#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000496}
497
498GLOBAL(int)
499jsimd_can_fdct_islow (void)
500{
501 init_simd();
502
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000503 /* The code is optimised for these values only */
504 if (DCTSIZE != 8)
505 return 0;
506 if (sizeof(DCTELEM) != 2)
507 return 0;
508
Pierre Ossman74693862009-03-09 13:34:17 +0000509 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
510 return 1;
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000511 if (simd_support & JSIMD_MMX)
512 return 1;
513
Pierre Ossman9ad52342009-03-09 13:15:56 +0000514 return 0;
515}
516
517GLOBAL(int)
518jsimd_can_fdct_ifast (void)
519{
520 init_simd();
521
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000522 /* The code is optimised for these values only */
523 if (DCTSIZE != 8)
524 return 0;
525 if (sizeof(DCTELEM) != 2)
526 return 0;
527
Pierre Ossman74693862009-03-09 13:34:17 +0000528 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
529 return 1;
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000530 if (simd_support & JSIMD_MMX)
531 return 1;
532
Pierre Ossman9ad52342009-03-09 13:15:56 +0000533 return 0;
534}
535
536GLOBAL(int)
537jsimd_can_fdct_float (void)
538{
539 init_simd();
540
Pierre Ossman2c2e54b2009-03-09 13:28:10 +0000541 /* The code is optimised for these values only */
542 if (DCTSIZE != 8)
543 return 0;
544 if (sizeof(FAST_FLOAT) != 4)
545 return 0;
546
Pierre Ossman0d37c572009-03-09 13:31:56 +0000547 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
548 return 1;
Pierre Ossman2c2e54b2009-03-09 13:28:10 +0000549 if (simd_support & JSIMD_3DNOW)
550 return 1;
551
Pierre Ossman9ad52342009-03-09 13:15:56 +0000552 return 0;
553}
554
555GLOBAL(void)
556jsimd_fdct_islow (DCTELEM * data)
557{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000558#ifdef WITH_SIMD
Pierre Ossman74693862009-03-09 13:34:17 +0000559 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
560 jsimd_fdct_islow_sse2(data);
561 else if (simd_support & JSIMD_MMX)
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000562 jsimd_fdct_islow_mmx(data);
563#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000564}
565
566GLOBAL(void)
567jsimd_fdct_ifast (DCTELEM * data)
568{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000569#ifdef WITH_SIMD
Pierre Ossman74693862009-03-09 13:34:17 +0000570 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
571 jsimd_fdct_ifast_sse2(data);
572 else if (simd_support & JSIMD_MMX)
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000573 jsimd_fdct_ifast_mmx(data);
574#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000575}
576
577GLOBAL(void)
578jsimd_fdct_float (FAST_FLOAT * data)
579{
Pierre Ossman2c2e54b2009-03-09 13:28:10 +0000580#ifdef WITH_SIMD
Pierre Ossman0d37c572009-03-09 13:31:56 +0000581 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
582 jsimd_fdct_float_sse(data);
583 else if (simd_support & JSIMD_3DNOW)
Pierre Ossman2c2e54b2009-03-09 13:28:10 +0000584 jsimd_fdct_float_3dnow(data);
585#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000586}
587
588GLOBAL(int)
589jsimd_can_quantize (void)
590{
591 init_simd();
592
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000593 /* The code is optimised for these values only */
594 if (DCTSIZE != 8)
595 return 0;
596 if (sizeof(JCOEF) != 2)
597 return 0;
598 if (sizeof(DCTELEM) != 2)
599 return 0;
600
Pierre Ossman74693862009-03-09 13:34:17 +0000601 if (simd_support & JSIMD_SSE2)
602 return 1;
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000603 if (simd_support & JSIMD_MMX)
604 return 1;
605
Pierre Ossman9ad52342009-03-09 13:15:56 +0000606 return 0;
607}
608
609GLOBAL(int)
610jsimd_can_quantize_float (void)
611{
612 init_simd();
613
Pierre Ossman2c2e54b2009-03-09 13:28:10 +0000614 /* The code is optimised for these values only */
615 if (DCTSIZE != 8)
616 return 0;
617 if (sizeof(JCOEF) != 2)
618 return 0;
619 if (sizeof(FAST_FLOAT) != 4)
620 return 0;
621
Pierre Ossman74693862009-03-09 13:34:17 +0000622 if (simd_support & JSIMD_SSE2)
623 return 1;
Pierre Ossman0d37c572009-03-09 13:31:56 +0000624 if (simd_support & JSIMD_SSE)
625 return 1;
Pierre Ossman2c2e54b2009-03-09 13:28:10 +0000626 if (simd_support & JSIMD_3DNOW)
627 return 1;
628
Pierre Ossman9ad52342009-03-09 13:15:56 +0000629 return 0;
630}
631
632GLOBAL(void)
633jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
634 DCTELEM * workspace)
635{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000636#ifdef WITH_SIMD
Pierre Ossman74693862009-03-09 13:34:17 +0000637 if (simd_support & JSIMD_SSE2)
638 jsimd_quantize_sse2(coef_block, divisors, workspace);
639 else if (simd_support & JSIMD_MMX)
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000640 jsimd_quantize_mmx(coef_block, divisors, workspace);
641#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000642}
643
644GLOBAL(void)
645jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
646 FAST_FLOAT * workspace)
647{
Pierre Ossman2c2e54b2009-03-09 13:28:10 +0000648#ifdef WITH_SIMD
Pierre Ossman74693862009-03-09 13:34:17 +0000649 if (simd_support & JSIMD_SSE2)
650 jsimd_quantize_float_sse2(coef_block, divisors, workspace);
651 else if (simd_support & JSIMD_SSE)
Pierre Ossman0d37c572009-03-09 13:31:56 +0000652 jsimd_quantize_float_sse(coef_block, divisors, workspace);
653 else if (simd_support & JSIMD_3DNOW)
Pierre Ossman2c2e54b2009-03-09 13:28:10 +0000654 jsimd_quantize_float_3dnow(coef_block, divisors, workspace);
655#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000656}
657
658GLOBAL(int)
659jsimd_can_idct_2x2 (void)
660{
661 init_simd();
662
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000663 /* The code is optimised for these values only */
664 if (DCTSIZE != 8)
665 return 0;
666 if (sizeof(JCOEF) != 2)
667 return 0;
668 if (BITS_IN_JSAMPLE != 8)
669 return 0;
670 if (sizeof(JDIMENSION) != 4)
671 return 0;
672 if (sizeof(ISLOW_MULT_TYPE) != 2)
673 return 0;
674
Pierre Ossman74693862009-03-09 13:34:17 +0000675 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
676 return 1;
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000677 if (simd_support & JSIMD_MMX)
678 return 1;
679
Pierre Ossman9ad52342009-03-09 13:15:56 +0000680 return 0;
681}
682
683GLOBAL(int)
684jsimd_can_idct_4x4 (void)
685{
686 init_simd();
687
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000688 /* The code is optimised for these values only */
689 if (DCTSIZE != 8)
690 return 0;
691 if (sizeof(JCOEF) != 2)
692 return 0;
693 if (BITS_IN_JSAMPLE != 8)
694 return 0;
695 if (sizeof(JDIMENSION) != 4)
696 return 0;
697 if (sizeof(ISLOW_MULT_TYPE) != 2)
698 return 0;
699
Pierre Ossman74693862009-03-09 13:34:17 +0000700 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
701 return 1;
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000702 if (simd_support & JSIMD_MMX)
703 return 1;
704
Pierre Ossman9ad52342009-03-09 13:15:56 +0000705 return 0;
706}
707
708GLOBAL(void)
709jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
710 JCOEFPTR coef_block, JSAMPARRAY output_buf,
711 JDIMENSION output_col)
712{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000713#if WITH_SIMD
Pierre Ossman74693862009-03-09 13:34:17 +0000714 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
715 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
716 else if (simd_support & JSIMD_MMX)
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000717 jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col);
718#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000719}
720
721GLOBAL(void)
722jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
723 JCOEFPTR coef_block, JSAMPARRAY output_buf,
724 JDIMENSION output_col)
725{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000726#if WITH_SIMD
Pierre Ossman74693862009-03-09 13:34:17 +0000727 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
728 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
729 else if (simd_support & JSIMD_MMX)
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000730 jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col);
731#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000732}
733
734GLOBAL(int)
735jsimd_can_idct_islow (void)
736{
737 init_simd();
738
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000739 /* The code is optimised for these values only */
740 if (DCTSIZE != 8)
741 return 0;
742 if (sizeof(JCOEF) != 2)
743 return 0;
744 if (BITS_IN_JSAMPLE != 8)
745 return 0;
746 if (sizeof(JDIMENSION) != 4)
747 return 0;
748 if (sizeof(ISLOW_MULT_TYPE) != 2)
749 return 0;
750
Pierre Ossman74693862009-03-09 13:34:17 +0000751 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
752 return 1;
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000753 if (simd_support & JSIMD_MMX)
754 return 1;
755
Pierre Ossman9ad52342009-03-09 13:15:56 +0000756 return 0;
757}
758
759GLOBAL(int)
760jsimd_can_idct_ifast (void)
761{
762 init_simd();
763
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000764 /* The code is optimised for these values only */
765 if (DCTSIZE != 8)
766 return 0;
767 if (sizeof(JCOEF) != 2)
768 return 0;
769 if (BITS_IN_JSAMPLE != 8)
770 return 0;
771 if (sizeof(JDIMENSION) != 4)
772 return 0;
773 if (sizeof(IFAST_MULT_TYPE) != 2)
774 return 0;
775 if (IFAST_SCALE_BITS != 2)
776 return 0;
777
Pierre Ossman74693862009-03-09 13:34:17 +0000778 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
779 return 1;
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000780 if (simd_support & JSIMD_MMX)
781 return 1;
782
Pierre Ossman9ad52342009-03-09 13:15:56 +0000783 return 0;
784}
785
786GLOBAL(int)
787jsimd_can_idct_float (void)
788{
789 init_simd();
790
Pierre Ossman2c2e54b2009-03-09 13:28:10 +0000791 if (DCTSIZE != 8)
792 return 0;
793 if (sizeof(JCOEF) != 2)
794 return 0;
795 if (BITS_IN_JSAMPLE != 8)
796 return 0;
797 if (sizeof(JDIMENSION) != 4)
798 return 0;
799 if (sizeof(FAST_FLOAT) != 4)
800 return 0;
801 if (sizeof(FLOAT_MULT_TYPE) != 4)
802 return 0;
803
Pierre Ossman74693862009-03-09 13:34:17 +0000804 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
805 return 1;
Pierre Ossman0d37c572009-03-09 13:31:56 +0000806 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
807 return 1;
Pierre Ossman2c2e54b2009-03-09 13:28:10 +0000808 if (simd_support & JSIMD_3DNOW)
809 return 1;
810
Pierre Ossman9ad52342009-03-09 13:15:56 +0000811 return 0;
812}
813
814GLOBAL(void)
815jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
816 JCOEFPTR coef_block, JSAMPARRAY output_buf,
817 JDIMENSION output_col)
818{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000819#if WITH_SIMD
Pierre Ossman74693862009-03-09 13:34:17 +0000820 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
821 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, output_col);
822 else if (simd_support & JSIMD_MMX)
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000823 jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf, output_col);
824#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000825}
826
827GLOBAL(void)
828jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
829 JCOEFPTR coef_block, JSAMPARRAY output_buf,
830 JDIMENSION output_col)
831{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000832#if WITH_SIMD
Pierre Ossman74693862009-03-09 13:34:17 +0000833 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
834 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, output_col);
835 else if (simd_support & JSIMD_MMX)
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000836 jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf, output_col);
837#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000838}
839
840GLOBAL(void)
841jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
842 JCOEFPTR coef_block, JSAMPARRAY output_buf,
843 JDIMENSION output_col)
844{
Pierre Ossman2c2e54b2009-03-09 13:28:10 +0000845#if WITH_SIMD
Pierre Ossman74693862009-03-09 13:34:17 +0000846 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
847 jsimd_idct_float_sse2(compptr->dct_table, coef_block,
848 output_buf, output_col);
849 else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
Pierre Ossman0d37c572009-03-09 13:31:56 +0000850 jsimd_idct_float_sse(compptr->dct_table, coef_block,
851 output_buf, output_col);
852 else if (simd_support & JSIMD_3DNOW)
Pierre Ossman2c2e54b2009-03-09 13:28:10 +0000853 jsimd_idct_float_3dnow(compptr->dct_table, coef_block,
854 output_buf, output_col);
855#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000856}
857