blob: eb1f37bd0b6c6fa2cec0b7b474819dfc5c65b050 [file] [log] [blame]
Pierre Ossman9ad52342009-03-09 13:15:56 +00001/*
2 * jsimd.c
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 *
6 * Based on the x86 SIMD extension for IJG JPEG library,
7 * Copyright (C) 1999-2006, MIYASAKA Masaru.
8 *
9 * This file contains the interface between the "normal" portions
10 * of the library and the SIMD implementations.
11 */
12
13#define JPEG_INTERNALS
14#include "jinclude.h"
15#include "jpeglib.h"
Pierre Ossman82c7f312009-03-09 13:21:27 +000016#include "jsimd.h"
Pierre Ossman9ad52342009-03-09 13:15:56 +000017#include "jdct.h"
Pierre Ossman82c7f312009-03-09 13:21:27 +000018#include "jsimddct.h"
19#include "simd/jsimd.h"
Pierre Ossman9ad52342009-03-09 13:15:56 +000020
21static unsigned int simd_support = ~0;
22
23/*
24 * Check what SIMD accelerations are supported.
25 *
26 * FIXME: This code is racy under a multi-threaded environment.
27 */
28LOCAL(void)
29init_simd (void)
30{
31 if (simd_support != ~0)
32 return;
33
Pierre Ossman82c7f312009-03-09 13:21:27 +000034#ifdef WITH_SIMD
35 simd_support = jpeg_simd_cpu_support();
36#else
Pierre Ossman9ad52342009-03-09 13:15:56 +000037 simd_support = JSIMD_NONE;
Pierre Ossman82c7f312009-03-09 13:21:27 +000038#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +000039}
40
41GLOBAL(int)
42jsimd_can_rgb_ycc (void)
43{
44 init_simd();
45
Pierre Ossman3e0e2de2009-03-09 13:25:30 +000046 /* The code is optimised for these values only */
47 if (BITS_IN_JSAMPLE != 8)
48 return 0;
49 if (sizeof(JDIMENSION) != 4)
50 return 0;
51 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
52 return 0;
53
54 if (simd_support & JSIMD_MMX)
55 return 1;
56
Pierre Ossman9ad52342009-03-09 13:15:56 +000057 return 0;
58}
59
60GLOBAL(int)
61jsimd_can_ycc_rgb (void)
62{
63 init_simd();
64
Pierre Ossman3e0e2de2009-03-09 13:25:30 +000065 /* The code is optimised for these values only */
66 if (BITS_IN_JSAMPLE != 8)
67 return 0;
68 if (sizeof(JDIMENSION) != 4)
69 return 0;
70 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
71 return 0;
72
73 if (simd_support & JSIMD_MMX)
74 return 1;
75
Pierre Ossman9ad52342009-03-09 13:15:56 +000076 return 0;
77}
78
79GLOBAL(void)
80jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
81 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
82 JDIMENSION output_row, int num_rows)
83{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +000084#ifdef WITH_SIMD
85 if (simd_support & JSIMD_MMX)
86 jsimd_rgb_ycc_convert_mmx(cinfo->image_width, input_buf,
87 output_buf, output_row, num_rows);
88#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +000089}
90
91GLOBAL(void)
92jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
93 JSAMPIMAGE input_buf, JDIMENSION input_row,
94 JSAMPARRAY output_buf, int num_rows)
95{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +000096#ifdef WITH_SIMD
97 if (simd_support & JSIMD_MMX)
98 jsimd_ycc_rgb_convert_mmx(cinfo->output_width, input_buf,
99 input_row, output_buf, num_rows);
100#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000101}
102
103GLOBAL(int)
104jsimd_can_h2v2_downsample (void)
105{
106 init_simd();
107
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000108 /* The code is optimised for these values only */
109 if (BITS_IN_JSAMPLE != 8)
110 return 0;
111 if (sizeof(JDIMENSION) != 4)
112 return 0;
113
114 if (simd_support & JSIMD_MMX)
115 return 1;
116
Pierre Ossman9ad52342009-03-09 13:15:56 +0000117 return 0;
118}
119
120GLOBAL(int)
121jsimd_can_h2v1_downsample (void)
122{
123 init_simd();
124
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000125 /* The code is optimised for these values only */
126 if (BITS_IN_JSAMPLE != 8)
127 return 0;
128 if (sizeof(JDIMENSION) != 4)
129 return 0;
130
131 if (simd_support & JSIMD_MMX)
132 return 1;
133
Pierre Ossman9ad52342009-03-09 13:15:56 +0000134 return 0;
135}
136
137GLOBAL(void)
138jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
139 JSAMPARRAY input_data, JSAMPARRAY output_data)
140{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000141#ifdef WITH_SIMD
142 if (simd_support & JSIMD_MMX)
143 jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
144 compptr->v_samp_factor, compptr->width_in_blocks,
145 input_data, output_data);
146#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000147}
148
149GLOBAL(void)
150jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
151 JSAMPARRAY input_data, JSAMPARRAY output_data)
152{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000153#ifdef WITH_SIMD
154 if (simd_support & JSIMD_MMX)
155 jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
156 compptr->v_samp_factor, compptr->width_in_blocks,
157 input_data, output_data);
158#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000159}
160
161GLOBAL(int)
162jsimd_can_h2v2_upsample (void)
163{
164 init_simd();
165
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000166 /* The code is optimised for these values only */
167 if (BITS_IN_JSAMPLE != 8)
168 return 0;
169 if (sizeof(JDIMENSION) != 4)
170 return 0;
171
172 if (simd_support & JSIMD_MMX)
173 return 1;
174
Pierre Ossman9ad52342009-03-09 13:15:56 +0000175 return 0;
176}
177
178GLOBAL(int)
179jsimd_can_h2v1_upsample (void)
180{
181 init_simd();
182
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000183 /* The code is optimised for these values only */
184 if (BITS_IN_JSAMPLE != 8)
185 return 0;
186 if (sizeof(JDIMENSION) != 4)
187 return 0;
188
189 if (simd_support & JSIMD_MMX)
190 return 1;
191
Pierre Ossman9ad52342009-03-09 13:15:56 +0000192 return 0;
193}
194
195GLOBAL(void)
196jsimd_h2v2_upsample (j_decompress_ptr cinfo,
197 jpeg_component_info * compptr,
198 JSAMPARRAY input_data,
199 JSAMPARRAY * output_data_ptr)
200{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000201#ifdef WITH_SIMD
202 if (simd_support & JSIMD_MMX)
203 jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor,
204 cinfo->output_width, input_data, output_data_ptr);
205#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000206}
207
208GLOBAL(void)
209jsimd_h2v1_upsample (j_decompress_ptr cinfo,
210 jpeg_component_info * compptr,
211 JSAMPARRAY input_data,
212 JSAMPARRAY * output_data_ptr)
213{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000214#ifdef WITH_SIMD
215 if (simd_support & JSIMD_MMX)
216 jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor,
217 cinfo->output_width, input_data, output_data_ptr);
218#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000219}
220
221GLOBAL(int)
222jsimd_can_h2v2_fancy_upsample (void)
223{
224 init_simd();
225
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000226 /* The code is optimised for these values only */
227 if (BITS_IN_JSAMPLE != 8)
228 return 0;
229 if (sizeof(JDIMENSION) != 4)
230 return 0;
231
232 if (simd_support & JSIMD_MMX)
233 return 1;
234
Pierre Ossman9ad52342009-03-09 13:15:56 +0000235 return 0;
236}
237
238GLOBAL(int)
239jsimd_can_h2v1_fancy_upsample (void)
240{
241 init_simd();
242
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000243 /* The code is optimised for these values only */
244 if (BITS_IN_JSAMPLE != 8)
245 return 0;
246 if (sizeof(JDIMENSION) != 4)
247 return 0;
248
249 if (simd_support & JSIMD_MMX)
250 return 1;
251
Pierre Ossman9ad52342009-03-09 13:15:56 +0000252 return 0;
253}
254
255GLOBAL(void)
256jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
257 jpeg_component_info * compptr,
258 JSAMPARRAY input_data,
259 JSAMPARRAY * output_data_ptr)
260{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000261#ifdef WITH_SIMD
262 if (simd_support & JSIMD_MMX)
263 jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor,
264 compptr->downsampled_width, input_data, output_data_ptr);
265#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000266}
267
268GLOBAL(void)
269jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
270 jpeg_component_info * compptr,
271 JSAMPARRAY input_data,
272 JSAMPARRAY * output_data_ptr)
273{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000274#ifdef WITH_SIMD
275 if (simd_support & JSIMD_MMX)
276 jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor,
277 compptr->downsampled_width, input_data, output_data_ptr);
278#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000279}
280
281GLOBAL(int)
282jsimd_can_h2v2_merged_upsample (void)
283{
284 init_simd();
285
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000286 /* The code is optimised for these values only */
287 if (BITS_IN_JSAMPLE != 8)
288 return 0;
289 if (sizeof(JDIMENSION) != 4)
290 return 0;
291
292 if (simd_support & JSIMD_MMX)
293 return 1;
294
Pierre Ossman9ad52342009-03-09 13:15:56 +0000295 return 0;
296}
297
298GLOBAL(int)
299jsimd_can_h2v1_merged_upsample (void)
300{
301 init_simd();
302
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000303 /* The code is optimised for these values only */
304 if (BITS_IN_JSAMPLE != 8)
305 return 0;
306 if (sizeof(JDIMENSION) != 4)
307 return 0;
308
309 if (simd_support & JSIMD_MMX)
310 return 1;
311
Pierre Ossman9ad52342009-03-09 13:15:56 +0000312 return 0;
313}
314
315GLOBAL(void)
316jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
317 JSAMPIMAGE input_buf,
318 JDIMENSION in_row_group_ctr,
319 JSAMPARRAY output_buf)
320{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000321#ifdef WITH_SIMD
322 if (simd_support & JSIMD_MMX)
323 jsimd_h2v2_merged_upsample_mmx(cinfo->output_width, input_buf,
324 in_row_group_ctr, output_buf);
325#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000326}
327
328GLOBAL(void)
329jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
330 JSAMPIMAGE input_buf,
331 JDIMENSION in_row_group_ctr,
332 JSAMPARRAY output_buf)
333{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000334#ifdef WITH_SIMD
335 if (simd_support & JSIMD_MMX)
336 jsimd_h2v1_merged_upsample_mmx(cinfo->output_width, input_buf,
337 in_row_group_ctr, output_buf);
338#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000339}
340
341GLOBAL(int)
342jsimd_can_convsamp (void)
343{
344 init_simd();
345
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000346 /* The code is optimised for these values only */
347 if (DCTSIZE != 8)
348 return 0;
349 if (BITS_IN_JSAMPLE != 8)
350 return 0;
351 if (sizeof(JDIMENSION) != 4)
352 return 0;
353 if (sizeof(DCTELEM) != 2)
354 return 0;
355
356 if (simd_support & JSIMD_MMX)
357 return 1;
358
Pierre Ossman9ad52342009-03-09 13:15:56 +0000359 return 0;
360}
361
362GLOBAL(int)
363jsimd_can_convsamp_float (void)
364{
365 init_simd();
366
367 return 0;
368}
369
370GLOBAL(void)
371jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
372 DCTELEM * workspace)
373{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000374#ifdef WITH_SIMD
375 if (simd_support & JSIMD_MMX)
376 jsimd_convsamp_mmx(sample_data, start_col, workspace);
377#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000378}
379
380GLOBAL(void)
381jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
382 FAST_FLOAT * workspace)
383{
384}
385
386GLOBAL(int)
387jsimd_can_fdct_islow (void)
388{
389 init_simd();
390
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000391 /* The code is optimised for these values only */
392 if (DCTSIZE != 8)
393 return 0;
394 if (sizeof(DCTELEM) != 2)
395 return 0;
396
397 if (simd_support & JSIMD_MMX)
398 return 1;
399
Pierre Ossman9ad52342009-03-09 13:15:56 +0000400 return 0;
401}
402
403GLOBAL(int)
404jsimd_can_fdct_ifast (void)
405{
406 init_simd();
407
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000408 /* The code is optimised for these values only */
409 if (DCTSIZE != 8)
410 return 0;
411 if (sizeof(DCTELEM) != 2)
412 return 0;
413
414 if (simd_support & JSIMD_MMX)
415 return 1;
416
Pierre Ossman9ad52342009-03-09 13:15:56 +0000417 return 0;
418}
419
420GLOBAL(int)
421jsimd_can_fdct_float (void)
422{
423 init_simd();
424
425 return 0;
426}
427
428GLOBAL(void)
429jsimd_fdct_islow (DCTELEM * data)
430{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000431#ifdef WITH_SIMD
432 if (simd_support & JSIMD_MMX)
433 jsimd_fdct_islow_mmx(data);
434#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000435}
436
437GLOBAL(void)
438jsimd_fdct_ifast (DCTELEM * data)
439{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000440#ifdef WITH_SIMD
441 if (simd_support & JSIMD_MMX)
442 jsimd_fdct_ifast_mmx(data);
443#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000444}
445
446GLOBAL(void)
447jsimd_fdct_float (FAST_FLOAT * data)
448{
449}
450
451GLOBAL(int)
452jsimd_can_quantize (void)
453{
454 init_simd();
455
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000456 /* The code is optimised for these values only */
457 if (DCTSIZE != 8)
458 return 0;
459 if (sizeof(JCOEF) != 2)
460 return 0;
461 if (sizeof(DCTELEM) != 2)
462 return 0;
463
464 if (simd_support & JSIMD_MMX)
465 return 1;
466
Pierre Ossman9ad52342009-03-09 13:15:56 +0000467 return 0;
468}
469
470GLOBAL(int)
471jsimd_can_quantize_float (void)
472{
473 init_simd();
474
475 return 0;
476}
477
478GLOBAL(void)
479jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
480 DCTELEM * workspace)
481{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000482#ifdef WITH_SIMD
483 if (simd_support & JSIMD_MMX)
484 jsimd_quantize_mmx(coef_block, divisors, workspace);
485#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000486}
487
488GLOBAL(void)
489jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
490 FAST_FLOAT * workspace)
491{
492}
493
494GLOBAL(int)
495jsimd_can_idct_2x2 (void)
496{
497 init_simd();
498
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000499 /* The code is optimised for these values only */
500 if (DCTSIZE != 8)
501 return 0;
502 if (sizeof(JCOEF) != 2)
503 return 0;
504 if (BITS_IN_JSAMPLE != 8)
505 return 0;
506 if (sizeof(JDIMENSION) != 4)
507 return 0;
508 if (sizeof(ISLOW_MULT_TYPE) != 2)
509 return 0;
510
511 if (simd_support & JSIMD_MMX)
512 return 1;
513
Pierre Ossman9ad52342009-03-09 13:15:56 +0000514 return 0;
515}
516
517GLOBAL(int)
518jsimd_can_idct_4x4 (void)
519{
520 init_simd();
521
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000522 /* The code is optimised for these values only */
523 if (DCTSIZE != 8)
524 return 0;
525 if (sizeof(JCOEF) != 2)
526 return 0;
527 if (BITS_IN_JSAMPLE != 8)
528 return 0;
529 if (sizeof(JDIMENSION) != 4)
530 return 0;
531 if (sizeof(ISLOW_MULT_TYPE) != 2)
532 return 0;
533
534 if (simd_support & JSIMD_MMX)
535 return 1;
536
Pierre Ossman9ad52342009-03-09 13:15:56 +0000537 return 0;
538}
539
540GLOBAL(void)
541jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
542 JCOEFPTR coef_block, JSAMPARRAY output_buf,
543 JDIMENSION output_col)
544{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000545#if WITH_SIMD
546 if (simd_support & JSIMD_MMX)
547 jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col);
548#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000549}
550
551GLOBAL(void)
552jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
553 JCOEFPTR coef_block, JSAMPARRAY output_buf,
554 JDIMENSION output_col)
555{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000556#if WITH_SIMD
557 if (simd_support & JSIMD_MMX)
558 jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col);
559#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000560}
561
562GLOBAL(int)
563jsimd_can_idct_islow (void)
564{
565 init_simd();
566
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000567 /* The code is optimised for these values only */
568 if (DCTSIZE != 8)
569 return 0;
570 if (sizeof(JCOEF) != 2)
571 return 0;
572 if (BITS_IN_JSAMPLE != 8)
573 return 0;
574 if (sizeof(JDIMENSION) != 4)
575 return 0;
576 if (sizeof(ISLOW_MULT_TYPE) != 2)
577 return 0;
578
579 if (simd_support & JSIMD_MMX)
580 return 1;
581
Pierre Ossman9ad52342009-03-09 13:15:56 +0000582 return 0;
583}
584
585GLOBAL(int)
586jsimd_can_idct_ifast (void)
587{
588 init_simd();
589
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000590 /* The code is optimised for these values only */
591 if (DCTSIZE != 8)
592 return 0;
593 if (sizeof(JCOEF) != 2)
594 return 0;
595 if (BITS_IN_JSAMPLE != 8)
596 return 0;
597 if (sizeof(JDIMENSION) != 4)
598 return 0;
599 if (sizeof(IFAST_MULT_TYPE) != 2)
600 return 0;
601 if (IFAST_SCALE_BITS != 2)
602 return 0;
603
604 if (simd_support & JSIMD_MMX)
605 return 1;
606
Pierre Ossman9ad52342009-03-09 13:15:56 +0000607 return 0;
608}
609
610GLOBAL(int)
611jsimd_can_idct_float (void)
612{
613 init_simd();
614
615 return 0;
616}
617
618GLOBAL(void)
619jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
620 JCOEFPTR coef_block, JSAMPARRAY output_buf,
621 JDIMENSION output_col)
622{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000623#if WITH_SIMD
624 if (simd_support & JSIMD_MMX)
625 jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf, output_col);
626#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000627}
628
629GLOBAL(void)
630jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
631 JCOEFPTR coef_block, JSAMPARRAY output_buf,
632 JDIMENSION output_col)
633{
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000634#if WITH_SIMD
635 if (simd_support & JSIMD_MMX)
636 jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf, output_col);
637#endif
Pierre Ossman9ad52342009-03-09 13:15:56 +0000638}
639
640GLOBAL(void)
641jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
642 JCOEFPTR coef_block, JSAMPARRAY output_buf,
643 JDIMENSION output_col)
644{
645}
646