blob: 7659249e14c00b217b35a220468cacabdc9e5512 [file] [log] [blame]
Pierre Ossman0b7301e2009-06-29 11:20:42 +00001/*
2 * jsimd_x86_64.c
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright 2009 D. R. Commander
6 *
7 * Based on the x86 SIMD extension for IJG JPEG library,
8 * Copyright (C) 1999-2006, MIYASAKA Masaru.
DRCa6da9f32011-02-02 05:45:43 +00009 * For conditions of distribution and use, see copyright notice in jsimdext.inc
Pierre Ossman0b7301e2009-06-29 11:20:42 +000010 *
11 * This file contains the interface between the "normal" portions
12 * of the library and the SIMD implementations when running on a
13 * x86_64 architecture.
14 */
15
16#define JPEG_INTERNALS
17#include "../jinclude.h"
18#include "../jpeglib.h"
19#include "../jsimd.h"
20#include "../jdct.h"
21#include "../jsimddct.h"
DRCdb18b742010-09-30 06:42:45 +000022#include "jsimd.h"
Pierre Ossman0b7301e2009-06-29 11:20:42 +000023
24/*
25 * In the PIC cases, we have no guarantee that constants will keep
26 * their alignment. This macro allows us to verify it at runtime.
27 */
DRCda73d262010-02-26 23:27:25 +000028#define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0)
Pierre Ossman0b7301e2009-06-29 11:20:42 +000029
30#define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
31
32GLOBAL(int)
33jsimd_can_rgb_ycc (void)
34{
35 /* The code is optimised for these values only */
36 if (BITS_IN_JSAMPLE != 8)
37 return 0;
38 if (sizeof(JDIMENSION) != 4)
39 return 0;
40 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
41 return 0;
42
43 if (!IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
44 return 0;
45
46 return 1;
47}
48
49GLOBAL(int)
50jsimd_can_ycc_rgb (void)
51{
52 /* The code is optimised for these values only */
53 if (BITS_IN_JSAMPLE != 8)
54 return 0;
55 if (sizeof(JDIMENSION) != 4)
56 return 0;
57 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
58 return 0;
59
60 if (!IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
61 return 0;
62
63 return 1;
64}
65
66GLOBAL(void)
67jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
68 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
69 JDIMENSION output_row, int num_rows)
70{
71 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
72
73 switch(cinfo->in_color_space)
74 {
75 case JCS_EXT_RGB:
76 sse2fct=jsimd_extrgb_ycc_convert_sse2;
77 break;
78 case JCS_EXT_RGBX:
79 sse2fct=jsimd_extrgbx_ycc_convert_sse2;
80 break;
81 case JCS_EXT_BGR:
82 sse2fct=jsimd_extbgr_ycc_convert_sse2;
83 break;
84 case JCS_EXT_BGRX:
85 sse2fct=jsimd_extbgrx_ycc_convert_sse2;
86 break;
87 case JCS_EXT_XBGR:
88 sse2fct=jsimd_extxbgr_ycc_convert_sse2;
89 break;
90 case JCS_EXT_XRGB:
91 sse2fct=jsimd_extxrgb_ycc_convert_sse2;
92 break;
93 default:
94 sse2fct=jsimd_rgb_ycc_convert_sse2;
95 break;
96 }
97
98 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
99}
100
101GLOBAL(void)
102jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
103 JSAMPIMAGE input_buf, JDIMENSION input_row,
104 JSAMPARRAY output_buf, int num_rows)
105{
106 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
107
108 switch(cinfo->out_color_space)
109 {
110 case JCS_EXT_RGB:
111 sse2fct=jsimd_ycc_extrgb_convert_sse2;
112 break;
113 case JCS_EXT_RGBX:
114 sse2fct=jsimd_ycc_extrgbx_convert_sse2;
115 break;
116 case JCS_EXT_BGR:
117 sse2fct=jsimd_ycc_extbgr_convert_sse2;
118 break;
119 case JCS_EXT_BGRX:
120 sse2fct=jsimd_ycc_extbgrx_convert_sse2;
121 break;
122 case JCS_EXT_XBGR:
123 sse2fct=jsimd_ycc_extxbgr_convert_sse2;
124 break;
125 case JCS_EXT_XRGB:
126 sse2fct=jsimd_ycc_extxrgb_convert_sse2;
127 break;
128 default:
129 sse2fct=jsimd_ycc_rgb_convert_sse2;
130 break;
131 }
132
133 sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
134}
135
136GLOBAL(int)
137jsimd_can_h2v2_downsample (void)
138{
139 /* The code is optimised for these values only */
140 if (BITS_IN_JSAMPLE != 8)
141 return 0;
142 if (sizeof(JDIMENSION) != 4)
143 return 0;
144
145 return 1;
146}
147
148GLOBAL(int)
149jsimd_can_h2v1_downsample (void)
150{
151 /* The code is optimised for these values only */
152 if (BITS_IN_JSAMPLE != 8)
153 return 0;
154 if (sizeof(JDIMENSION) != 4)
155 return 0;
156
157 return 1;
158}
159
160GLOBAL(void)
161jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
162 JSAMPARRAY input_data, JSAMPARRAY output_data)
163{
164 jsimd_h2v2_downsample_sse2(cinfo->image_width,
165 cinfo->max_v_samp_factor,
166 compptr->v_samp_factor,
167 compptr->width_in_blocks,
168 input_data, output_data);
169}
170
171GLOBAL(void)
172jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
173 JSAMPARRAY input_data, JSAMPARRAY output_data)
174{
175 jsimd_h2v1_downsample_sse2(cinfo->image_width,
176 cinfo->max_v_samp_factor,
177 compptr->v_samp_factor,
178 compptr->width_in_blocks,
179 input_data, output_data);
180}
181
182GLOBAL(int)
183jsimd_can_h2v2_upsample (void)
184{
185 /* The code is optimised for these values only */
186 if (BITS_IN_JSAMPLE != 8)
187 return 0;
188 if (sizeof(JDIMENSION) != 4)
189 return 0;
190
191 return 1;
192}
193
194GLOBAL(int)
195jsimd_can_h2v1_upsample (void)
196{
197 /* The code is optimised for these values only */
198 if (BITS_IN_JSAMPLE != 8)
199 return 0;
200 if (sizeof(JDIMENSION) != 4)
201 return 0;
202
203 return 1;
204}
205
206GLOBAL(void)
207jsimd_h2v2_upsample (j_decompress_ptr cinfo,
208 jpeg_component_info * compptr,
209 JSAMPARRAY input_data,
210 JSAMPARRAY * output_data_ptr)
211{
212 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor,
213 cinfo->output_width,
214 input_data, output_data_ptr);
215}
216
217GLOBAL(void)
218jsimd_h2v1_upsample (j_decompress_ptr cinfo,
219 jpeg_component_info * compptr,
220 JSAMPARRAY input_data,
221 JSAMPARRAY * output_data_ptr)
222{
223 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor,
224 cinfo->output_width,
225 input_data, output_data_ptr);
226}
227
228GLOBAL(int)
229jsimd_can_h2v2_fancy_upsample (void)
230{
231 /* The code is optimised for these values only */
232 if (BITS_IN_JSAMPLE != 8)
233 return 0;
234 if (sizeof(JDIMENSION) != 4)
235 return 0;
236
237 if (!IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
238 return 0;
239
240 return 1;
241}
242
243GLOBAL(int)
244jsimd_can_h2v1_fancy_upsample (void)
245{
246 /* The code is optimised for these values only */
247 if (BITS_IN_JSAMPLE != 8)
248 return 0;
249 if (sizeof(JDIMENSION) != 4)
250 return 0;
251
252 if (!IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
253 return 0;
254
255 return 1;
256}
257
258GLOBAL(void)
259jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
260 jpeg_component_info * compptr,
261 JSAMPARRAY input_data,
262 JSAMPARRAY * output_data_ptr)
263{
DRCbacbbaa2010-08-07 16:12:08 +0000264 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
Pierre Ossman0b7301e2009-06-29 11:20:42 +0000265 compptr->downsampled_width,
266 input_data, output_data_ptr);
267}
268
269GLOBAL(void)
270jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
271 jpeg_component_info * compptr,
272 JSAMPARRAY input_data,
273 JSAMPARRAY * output_data_ptr)
274{
275 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
276 compptr->downsampled_width,
277 input_data, output_data_ptr);
278}
279
280GLOBAL(int)
281jsimd_can_h2v2_merged_upsample (void)
282{
283 /* The code is optimised for these values only */
284 if (BITS_IN_JSAMPLE != 8)
285 return 0;
286 if (sizeof(JDIMENSION) != 4)
287 return 0;
288
289 if (!IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
290 return 0;
291
292 return 1;
293}
294
295GLOBAL(int)
296jsimd_can_h2v1_merged_upsample (void)
297{
298 /* The code is optimised for these values only */
299 if (BITS_IN_JSAMPLE != 8)
300 return 0;
301 if (sizeof(JDIMENSION) != 4)
302 return 0;
303
304 if (!IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
305 return 0;
306
307 return 1;
308}
309
310GLOBAL(void)
311jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
312 JSAMPIMAGE input_buf,
313 JDIMENSION in_row_group_ctr,
314 JSAMPARRAY output_buf)
315{
316 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
317
318 switch(cinfo->out_color_space)
319 {
320 case JCS_EXT_RGB:
321 sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
322 break;
323 case JCS_EXT_RGBX:
324 sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
325 break;
326 case JCS_EXT_BGR:
327 sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2;
328 break;
329 case JCS_EXT_BGRX:
330 sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
331 break;
332 case JCS_EXT_XBGR:
333 sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
334 break;
335 case JCS_EXT_XRGB:
336 sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
337 break;
338 default:
339 sse2fct=jsimd_h2v2_merged_upsample_sse2;
340 break;
341 }
342
343 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
344}
345
346GLOBAL(void)
347jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
348 JSAMPIMAGE input_buf,
349 JDIMENSION in_row_group_ctr,
350 JSAMPARRAY output_buf)
351{
352 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
353
354 switch(cinfo->out_color_space)
355 {
356 case JCS_EXT_RGB:
357 sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
358 break;
359 case JCS_EXT_RGBX:
360 sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
361 break;
362 case JCS_EXT_BGR:
363 sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2;
364 break;
365 case JCS_EXT_BGRX:
366 sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
367 break;
368 case JCS_EXT_XBGR:
369 sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
370 break;
371 case JCS_EXT_XRGB:
372 sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
373 break;
374 default:
375 sse2fct=jsimd_h2v1_merged_upsample_sse2;
376 break;
377 }
378
379 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
380}
381
382GLOBAL(int)
383jsimd_can_convsamp (void)
384{
385 /* The code is optimised for these values only */
386 if (DCTSIZE != 8)
387 return 0;
388 if (BITS_IN_JSAMPLE != 8)
389 return 0;
390 if (sizeof(JDIMENSION) != 4)
391 return 0;
392 if (sizeof(DCTELEM) != 2)
393 return 0;
394
395 return 1;
396}
397
398GLOBAL(int)
399jsimd_can_convsamp_float (void)
400{
401 /* The code is optimised for these values only */
402 if (DCTSIZE != 8)
403 return 0;
404 if (BITS_IN_JSAMPLE != 8)
405 return 0;
406 if (sizeof(JDIMENSION) != 4)
407 return 0;
408 if (sizeof(FAST_FLOAT) != 4)
409 return 0;
410
411 return 1;
412}
413
414GLOBAL(void)
415jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
416 DCTELEM * workspace)
417{
418 jsimd_convsamp_sse2(sample_data, start_col, workspace);
419}
420
421GLOBAL(void)
422jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
423 FAST_FLOAT * workspace)
424{
425 jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
426}
427
428GLOBAL(int)
429jsimd_can_fdct_islow (void)
430{
431 /* The code is optimised for these values only */
432 if (DCTSIZE != 8)
433 return 0;
434 if (sizeof(DCTELEM) != 2)
435 return 0;
436
437 if (!IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
438 return 0;
439
440 return 1;
441}
442
443GLOBAL(int)
444jsimd_can_fdct_ifast (void)
445{
446 /* The code is optimised for these values only */
447 if (DCTSIZE != 8)
448 return 0;
449 if (sizeof(DCTELEM) != 2)
450 return 0;
451
452 if (!IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
453 return 0;
454
455 return 1;
456}
457
458GLOBAL(int)
459jsimd_can_fdct_float (void)
460{
461 /* The code is optimised for these values only */
462 if (DCTSIZE != 8)
463 return 0;
464 if (sizeof(FAST_FLOAT) != 4)
465 return 0;
466
467 if (!IS_ALIGNED_SSE(jconst_fdct_float_sse))
468 return 0;
469
470 return 1;
471}
472
473GLOBAL(void)
474jsimd_fdct_islow (DCTELEM * data)
475{
476 jsimd_fdct_islow_sse2(data);
477}
478
479GLOBAL(void)
480jsimd_fdct_ifast (DCTELEM * data)
481{
482 jsimd_fdct_ifast_sse2(data);
483}
484
485GLOBAL(void)
486jsimd_fdct_float (FAST_FLOAT * data)
487{
488 jsimd_fdct_float_sse(data);
489}
490
491GLOBAL(int)
492jsimd_can_quantize (void)
493{
494 /* The code is optimised for these values only */
495 if (DCTSIZE != 8)
496 return 0;
497 if (sizeof(JCOEF) != 2)
498 return 0;
499 if (sizeof(DCTELEM) != 2)
500 return 0;
501
502 return 1;
503}
504
505GLOBAL(int)
506jsimd_can_quantize_float (void)
507{
508 /* The code is optimised for these values only */
509 if (DCTSIZE != 8)
510 return 0;
511 if (sizeof(JCOEF) != 2)
512 return 0;
513 if (sizeof(FAST_FLOAT) != 4)
514 return 0;
515
516 return 1;
517}
518
519GLOBAL(void)
520jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
521 DCTELEM * workspace)
522{
523 jsimd_quantize_sse2(coef_block, divisors, workspace);
524}
525
526GLOBAL(void)
527jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
528 FAST_FLOAT * workspace)
529{
530 jsimd_quantize_float_sse2(coef_block, divisors, workspace);
531}
532
533GLOBAL(int)
534jsimd_can_idct_2x2 (void)
535{
536 /* The code is optimised for these values only */
537 if (DCTSIZE != 8)
538 return 0;
539 if (sizeof(JCOEF) != 2)
540 return 0;
541 if (BITS_IN_JSAMPLE != 8)
542 return 0;
543 if (sizeof(JDIMENSION) != 4)
544 return 0;
545 if (sizeof(ISLOW_MULT_TYPE) != 2)
546 return 0;
547
548 if (!IS_ALIGNED_SSE(jconst_idct_red_sse2))
549 return 0;
550
551 return 1;
552}
553
554GLOBAL(int)
555jsimd_can_idct_4x4 (void)
556{
557 /* The code is optimised for these values only */
558 if (DCTSIZE != 8)
559 return 0;
560 if (sizeof(JCOEF) != 2)
561 return 0;
562 if (BITS_IN_JSAMPLE != 8)
563 return 0;
564 if (sizeof(JDIMENSION) != 4)
565 return 0;
566 if (sizeof(ISLOW_MULT_TYPE) != 2)
567 return 0;
568
569 if (!IS_ALIGNED_SSE(jconst_idct_red_sse2))
570 return 0;
571
572 return 1;
573}
574
575GLOBAL(void)
576jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
577 JCOEFPTR coef_block, JSAMPARRAY output_buf,
578 JDIMENSION output_col)
579{
580 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
581}
582
583GLOBAL(void)
584jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
585 JCOEFPTR coef_block, JSAMPARRAY output_buf,
586 JDIMENSION output_col)
587{
588 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
589}
590
591GLOBAL(int)
592jsimd_can_idct_islow (void)
593{
594 /* The code is optimised for these values only */
595 if (DCTSIZE != 8)
596 return 0;
597 if (sizeof(JCOEF) != 2)
598 return 0;
599 if (BITS_IN_JSAMPLE != 8)
600 return 0;
601 if (sizeof(JDIMENSION) != 4)
602 return 0;
603 if (sizeof(ISLOW_MULT_TYPE) != 2)
604 return 0;
605
606 if (!IS_ALIGNED_SSE(jconst_idct_islow_sse2))
607 return 0;
608
609 return 1;
610}
611
612GLOBAL(int)
613jsimd_can_idct_ifast (void)
614{
615 /* The code is optimised for these values only */
616 if (DCTSIZE != 8)
617 return 0;
618 if (sizeof(JCOEF) != 2)
619 return 0;
620 if (BITS_IN_JSAMPLE != 8)
621 return 0;
622 if (sizeof(JDIMENSION) != 4)
623 return 0;
624 if (sizeof(IFAST_MULT_TYPE) != 2)
625 return 0;
626 if (IFAST_SCALE_BITS != 2)
627 return 0;
628
629 if (!IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
630 return 0;
631
632 return 1;
633}
634
635GLOBAL(int)
636jsimd_can_idct_float (void)
637{
638 if (DCTSIZE != 8)
639 return 0;
640 if (sizeof(JCOEF) != 2)
641 return 0;
642 if (BITS_IN_JSAMPLE != 8)
643 return 0;
644 if (sizeof(JDIMENSION) != 4)
645 return 0;
646 if (sizeof(FAST_FLOAT) != 4)
647 return 0;
648 if (sizeof(FLOAT_MULT_TYPE) != 4)
649 return 0;
650
651 if (!IS_ALIGNED_SSE(jconst_idct_float_sse2))
652 return 0;
653
654 return 1;
655}
656
657GLOBAL(void)
658jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
659 JCOEFPTR coef_block, JSAMPARRAY output_buf,
660 JDIMENSION output_col)
661{
662 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, output_col);
663}
664
665GLOBAL(void)
666jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
667 JCOEFPTR coef_block, JSAMPARRAY output_buf,
668 JDIMENSION output_col)
669{
670 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, output_col);
671}
672
673GLOBAL(void)
674jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
675 JCOEFPTR coef_block, JSAMPARRAY output_buf,
676 JDIMENSION output_col)
677{
678 jsimd_idct_float_sse2(compptr->dct_table, coef_block,
679 output_buf, output_col);
680}
681