blob: 98bcebc1e7b05d215934e86ff65c42f9fbd0a8a4 [file] [log] [blame]
Pierre Ossman82c7f312009-03-09 13:21:27 +00001/*
2 * simd/jsimd.h
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 *
6 * Based on the x86 SIMD extension for IJG JPEG library,
7 * Copyright (C) 1999-2006, MIYASAKA Masaru.
8 *
9 */
10
11/* Bitmask for supported acceleration methods */
12
13#define JSIMD_NONE 0x00
Pierre Ossman3e0e2de2009-03-09 13:25:30 +000014#define JSIMD_MMX 0x01
Pierre Ossman2c2e54b2009-03-09 13:28:10 +000015#define JSIMD_3DNOW 0x02
Pierre Ossman0d37c572009-03-09 13:31:56 +000016#define JSIMD_SSE 0x04
Pierre Ossman82c7f312009-03-09 13:21:27 +000017
18/* Short forms of external names for systems with brain-damaged linkers. */
19
20#ifdef NEED_SHORT_EXTERNAL_NAMES
Pierre Ossman3e0e2de2009-03-09 13:25:30 +000021#define jpeg_simd_cpu_support jSiCpuSupport
22#define jsimd_rgb_ycc_convert_mmx jSRGBYCCM
23#define jsimd_ycc_rgb_convert_mmx jSYCCRGBM
24#define jsimd_h2v2_downsample_mmx jSDnH2V2M
25#define jsimd_h2v1_downsample_mmx jSDnH2V1M
26#define jsimd_h2v2_upsample_mmx jSUpH2V2M
27#define jsimd_h2v1_upsample_mmx jSUpH2V1M
28#define jsimd_h2v2_fancy_upsample_mmx jSFUpH2V2M
29#define jsimd_h2v1_fancy_upsample_mmx jSFUpH2V1M
30#define jsimd_h2v2_merged_upsample_mmx jSMUpH2V2M
31#define jsimd_h2v1_merged_upsample_mmx jSMUpH2V1M
32#define jsimd_convsamp_mmx jSConvM
Pierre Ossman2c2e54b2009-03-09 13:28:10 +000033#define jsimd_convsamp_float_3dnow jSConvF3D
Pierre Ossman0d37c572009-03-09 13:31:56 +000034#define jsimd_convsamp_float_sse jSConvFS
Pierre Ossman3e0e2de2009-03-09 13:25:30 +000035#define jsimd_fdct_islow_mmx jSFDMIS
36#define jsimd_fdct_ifast_mmx jSFDMIF
Pierre Ossman2c2e54b2009-03-09 13:28:10 +000037#define jsimd_fdct_float_3dnow jSFD3DF
Pierre Ossman0d37c572009-03-09 13:31:56 +000038#define jconst_fdct_float_sse jSCFDSF
39#define jsimd_fdct_float_sse jSFDSF
Pierre Ossman3e0e2de2009-03-09 13:25:30 +000040#define jsimd_quantize_mmx jSQuantM
Pierre Ossman2c2e54b2009-03-09 13:28:10 +000041#define jsimd_quantize_float_3dnow jSQuantF3D
Pierre Ossman0d37c572009-03-09 13:31:56 +000042#define jsimd_quantize_float_sse jSQuantFS
Pierre Ossman3e0e2de2009-03-09 13:25:30 +000043#define jsimd_idct_2x2_mmx jSIDM22
44#define jsimd_idct_4x4_mmx jSIDM44
45#define jsimd_idct_islow_mmx jSIDMIS
46#define jsimd_idct_ifast_mmx jSIDMIF
Pierre Ossman2c2e54b2009-03-09 13:28:10 +000047#define jsimd_idct_float_3dnow jSID3DF
Pierre Ossman0d37c572009-03-09 13:31:56 +000048#define jconst_fdct_float_sse jSCIDSF
49#define jsimd_idct_float_sse jSIDSF
Pierre Ossman82c7f312009-03-09 13:21:27 +000050#endif /* NEED_SHORT_EXTERNAL_NAMES */
51
52/* SIMD Ext: retrieve SIMD/CPU information */
53EXTERN(unsigned int) jpeg_simd_cpu_support JPP((void));
54
Pierre Ossman3e0e2de2009-03-09 13:25:30 +000055/* SIMD Color Space Conversion */
56EXTERN(void) jsimd_rgb_ycc_convert_mmx
57 JPP((JDIMENSION img_width,
58 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
59 JDIMENSION output_row, int num_rows));
60EXTERN(void) jsimd_ycc_rgb_convert_mmx
61 JPP((JDIMENSION out_width,
62 JSAMPIMAGE input_buf, JDIMENSION input_row,
63 JSAMPARRAY output_buf, int num_rows));
64
65/* SIMD Downsample */
66EXTERN(void) jsimd_h2v2_downsample_mmx
67 JPP((JDIMENSION image_width, int max_v_samp_factor,
68 JDIMENSION v_samp_factor, JDIMENSION width_blocks,
69 JSAMPARRAY input_data, JSAMPARRAY output_data));
70EXTERN(void) jsimd_h2v1_downsample_mmx
71 JPP((JDIMENSION image_width, int max_v_samp_factor,
72 JDIMENSION v_samp_factor, JDIMENSION width_blocks,
73 JSAMPARRAY input_data, JSAMPARRAY output_data));
74
75/* SIMD Upsample */
76EXTERN(void) jsimd_h2v2_upsample_mmx
77 JPP((int max_v_samp_factor, JDIMENSION output_width,
78 JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
79EXTERN(void) jsimd_h2v1_upsample_mmx
80 JPP((int max_v_samp_factor, JDIMENSION output_width,
81 JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
82
83EXTERN(void) jsimd_h2v2_fancy_upsample_mmx
84 JPP((int max_v_samp_factor, JDIMENSION downsampled_width,
85 JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
86EXTERN(void) jsimd_h2v1_fancy_upsample_mmx
87 JPP((int max_v_samp_factor, JDIMENSION downsampled_width,
88 JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
89
90EXTERN(void) jsimd_h2v2_merged_upsample_mmx
91 JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
92 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf));
93EXTERN(void) jsimd_h2v1_merged_upsample_mmx
94 JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
95 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf));
96
97/* SIMD Sample Conversion */
98EXTERN(void) jsimd_convsamp_mmx JPP((JSAMPARRAY sample_data,
99 JDIMENSION start_col,
100 DCTELEM * workspace));
101
Pierre Ossman2c2e54b2009-03-09 13:28:10 +0000102EXTERN(void) jsimd_convsamp_float_3dnow JPP((JSAMPARRAY sample_data,
103 JDIMENSION start_col,
104 FAST_FLOAT * workspace));
105
Pierre Ossman0d37c572009-03-09 13:31:56 +0000106EXTERN(void) jsimd_convsamp_float_sse JPP((JSAMPARRAY sample_data,
107 JDIMENSION start_col,
108 FAST_FLOAT * workspace));
109
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000110/* SIMD Forward DCT */
111EXTERN(void) jsimd_fdct_islow_mmx JPP((DCTELEM * data));
112EXTERN(void) jsimd_fdct_ifast_mmx JPP((DCTELEM * data));
113
Pierre Ossman2c2e54b2009-03-09 13:28:10 +0000114EXTERN(void) jsimd_fdct_float_3dnow JPP((FAST_FLOAT * data));
115
Pierre Ossman0d37c572009-03-09 13:31:56 +0000116extern const int jconst_fdct_float_sse[];
117EXTERN(void) jsimd_fdct_float_sse JPP((FAST_FLOAT * data));
118
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000119/* SIMD Quantization */
120EXTERN(void) jsimd_quantize_mmx JPP((JCOEFPTR coef_block,
121 DCTELEM * divisors,
122 DCTELEM * workspace));
123
Pierre Ossman2c2e54b2009-03-09 13:28:10 +0000124EXTERN(void) jsimd_quantize_float_3dnow JPP((JCOEFPTR coef_block,
125 FAST_FLOAT * divisors,
126 FAST_FLOAT * workspace));
127
Pierre Ossman0d37c572009-03-09 13:31:56 +0000128EXTERN(void) jsimd_quantize_float_sse JPP((JCOEFPTR coef_block,
129 FAST_FLOAT * divisors,
130 FAST_FLOAT * workspace));
131
Pierre Ossman3e0e2de2009-03-09 13:25:30 +0000132/* SIMD Reduced Inverse DCT */
133EXTERN(void) jsimd_idct_2x2_mmx JPP((void * dct_table,
134 JCOEFPTR coef_block,
135 JSAMPARRAY output_buf,
136 JDIMENSION output_col));
137EXTERN(void) jsimd_idct_4x4_mmx JPP((void * dct_table,
138 JCOEFPTR coef_block,
139 JSAMPARRAY output_buf,
140 JDIMENSION output_col));
141
142/* SIMD Inverse DCT */
143EXTERN(void) jsimd_idct_islow_mmx JPP((void * dct_table,
144 JCOEFPTR coef_block,
145 JSAMPARRAY output_buf,
146 JDIMENSION output_col));
147EXTERN(void) jsimd_idct_ifast_mmx JPP((void * dct_table,
148 JCOEFPTR coef_block,
149 JSAMPARRAY output_buf,
150 JDIMENSION output_col));
151
Pierre Ossman2c2e54b2009-03-09 13:28:10 +0000152EXTERN(void) jsimd_idct_float_3dnow JPP((void * dct_table,
153 JCOEFPTR coef_block,
154 JSAMPARRAY output_buf,
155 JDIMENSION output_col));
156
Pierre Ossman0d37c572009-03-09 13:31:56 +0000157extern const int jconst_idct_float_sse[];
158EXTERN(void) jsimd_idct_float_sse JPP((void * dct_table,
159 JCOEFPTR coef_block,
160 JSAMPARRAY output_buf,
161 JDIMENSION output_col));
162