blob: 539ccc41c8c2898f23ab0daf5a2808299a224656 [file] [log] [blame]
Constantin Kaplinskya2adc8d2006-05-25 05:01:55 +00001/*
2 * jcdctmgr.c
3 *
4 * Copyright (C) 1994-1996, Thomas G. Lane.
Pierre Ossman4aa24292009-03-09 13:23:04 +00005 * Copyright (C) 1999-2006, MIYASAKA Masaru.
Pierre Ossman9ad52342009-03-09 13:15:56 +00006 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
Constantin Kaplinskya2adc8d2006-05-25 05:01:55 +00007 * This file is part of the Independent JPEG Group's software.
8 * For conditions of distribution and use, see the accompanying README file.
9 *
10 * This file contains the forward-DCT management logic.
11 * This code selects a particular DCT implementation to be used,
12 * and it performs related housekeeping chores including coefficient
13 * quantization.
14 */
15
16#define JPEG_INTERNALS
17#include "jinclude.h"
18#include "jpeglib.h"
19#include "jdct.h" /* Private declarations for DCT subsystem */
Pierre Ossman9ad52342009-03-09 13:15:56 +000020#include "jsimddct.h"
Constantin Kaplinskya2adc8d2006-05-25 05:01:55 +000021
22
23/* Private subobject for this module */
24
Pierre Ossmanb85c2f82009-03-09 10:37:20 +000025typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
26typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
27
28typedef JMETHOD(void, convsamp_method_ptr,
29 (JSAMPARRAY sample_data, JDIMENSION start_col,
30 DCTELEM * workspace));
31typedef JMETHOD(void, float_convsamp_method_ptr,
32 (JSAMPARRAY sample_data, JDIMENSION start_col,
33 FAST_FLOAT *workspace));
34
35typedef JMETHOD(void, quantize_method_ptr,
36 (JCOEFPTR coef_block, DCTELEM * divisors,
37 DCTELEM * workspace));
38typedef JMETHOD(void, float_quantize_method_ptr,
39 (JCOEFPTR coef_block, FAST_FLOAT * divisors,
40 FAST_FLOAT * workspace));
41
Constantin Kaplinskya2adc8d2006-05-25 05:01:55 +000042typedef struct {
43 struct jpeg_forward_dct pub; /* public fields */
44
45 /* Pointer to the DCT routine actually in use */
Pierre Ossmanb85c2f82009-03-09 10:37:20 +000046 forward_DCT_method_ptr dct;
47 convsamp_method_ptr convsamp;
48 quantize_method_ptr quantize;
Constantin Kaplinskya2adc8d2006-05-25 05:01:55 +000049
50 /* The actual post-DCT divisors --- not identical to the quant table
51 * entries, because of scaling (especially for an unnormalized DCT).
52 * Each table is given in normal array order.
53 */
54 DCTELEM * divisors[NUM_QUANT_TBLS];
55
56#ifdef DCT_FLOAT_SUPPORTED
57 /* Same as above for the floating-point case. */
Pierre Ossmanb85c2f82009-03-09 10:37:20 +000058 float_DCT_method_ptr float_dct;
59 float_convsamp_method_ptr float_convsamp;
60 float_quantize_method_ptr float_quantize;
Constantin Kaplinskya2adc8d2006-05-25 05:01:55 +000061 FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
62#endif
63} my_fdct_controller;
64
65typedef my_fdct_controller * my_fdct_ptr;
66
67
68/*
Pierre Ossman4aa24292009-03-09 13:23:04 +000069 * Find the highest bit in an integer through binary search.
70 */
71LOCAL(int)
72fls (UINT16 val)
73{
74 int bit;
75
76 bit = 16;
77
78 if (!val)
79 return 0;
80
81 if (!(val & 0xff00)) {
82 bit -= 8;
83 val <<= 8;
84 }
85 if (!(val & 0xf000)) {
86 bit -= 4;
87 val <<= 4;
88 }
89 if (!(val & 0xc000)) {
90 bit -= 2;
91 val <<= 2;
92 }
93 if (!(val & 0x8000)) {
94 bit -= 1;
95 val <<= 1;
96 }
97
98 return bit;
99}
100
101/*
102 * Compute values to do a division using reciprocal.
103 *
104 * This implementation is based on an algorithm described in
105 * "How to optimize for the Pentium family of microprocessors"
106 * (http://www.agner.org/assem/).
107 * More information about the basic algorithm can be found in
108 * the paper "Integer Division Using Reciprocals" by Robert Alverson.
109 *
110 * The basic idea is to replace x/d by x * d^-1. In order to store
111 * d^-1 with enough precision we shift it left a few places. It turns
112 * out that this algoright gives just enough precision, and also fits
113 * into DCTELEM:
114 *
115 * b = (the number of significant bits in divisor) - 1
116 * r = (word size) + b
117 * f = 2^r / divisor
118 *
119 * f will not be an integer for most cases, so we need to compensate
120 * for the rounding error introduced:
121 *
122 * no fractional part:
123 *
124 * result = input >> r
125 *
126 * fractional part of f < 0.5:
127 *
128 * round f down to nearest integer
129 * result = ((input + 1) * f) >> r
130 *
131 * fractional part of f > 0.5:
132 *
133 * round f up to nearest integer
134 * result = (input * f) >> r
135 *
136 * This is the original algorithm that gives truncated results. But we
137 * want properly rounded results, so we replace "input" with
138 * "input + divisor/2".
139 *
140 * In order to allow SIMD implementations we also tweak the values to
141 * allow the same calculation to be made at all times:
142 *
143 * dctbl[0] = f rounded to nearest integer
144 * dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
145 * dctbl[2] = 1 << ((word size) * 2 - r)
146 * dctbl[3] = r - (word size)
147 *
148 * dctbl[2] is for stupid instruction sets where the shift operation
149 * isn't member wise (e.g. MMX).
150 *
151 * The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
152 * is that most SIMD implementations have a "multiply and store top
153 * half" operation.
154 *
155 * Lastly, we store each of the values in their own table instead
156 * of in a consecutive manner, yet again in order to allow SIMD
157 * routines.
158 */
159LOCAL(void)
160compute_reciprocal (UINT16 divisor, DCTELEM * dtbl)
161{
162 UDCTELEM2 fq, fr;
163 UDCTELEM c;
164 int b, r;
165
166 b = fls(divisor) - 1;
167 r = sizeof(DCTELEM) * 8 + b;
168
169 fq = ((UDCTELEM2)1 << r) / divisor;
170 fr = ((UDCTELEM2)1 << r) % divisor;
171
172 c = divisor / 2; /* for rounding */
173
174 if (fr == 0) { /* divisor is power of two */
175 /* fq will be one bit too large to fit in DCTELEM, so adjust */
176 fq >>= 1;
177 r--;
178 } else if (fr <= (divisor / 2)) { /* fractional part is < 0.5 */
179 c++;
180 } else { /* fractional part is > 0.5 */
181 fq++;
182 }
183
184 dtbl[DCTSIZE2 * 0] = (DCTELEM) fq; /* reciprocal */
185 dtbl[DCTSIZE2 * 1] = (DCTELEM) c; /* correction + roundfactor */
186 dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)*8*2 - r)); /* scale */
187 dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)*8; /* shift */
188}
189
190/*
Constantin Kaplinskya2adc8d2006-05-25 05:01:55 +0000191 * Initialize for a processing pass.
192 * Verify that all referenced Q-tables are present, and set up
193 * the divisor table for each one.
194 * In the current implementation, DCT of all components is done during
195 * the first pass, even if only some components will be output in the
196 * first scan. Hence all components should be examined here.
197 */
198
199METHODDEF(void)
200start_pass_fdctmgr (j_compress_ptr cinfo)
201{
202 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
203 int ci, qtblno, i;
204 jpeg_component_info *compptr;
205 JQUANT_TBL * qtbl;
206 DCTELEM * dtbl;
207
208 for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
209 ci++, compptr++) {
210 qtblno = compptr->quant_tbl_no;
211 /* Make sure specified quantization table is present */
212 if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
213 cinfo->quant_tbl_ptrs[qtblno] == NULL)
214 ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
215 qtbl = cinfo->quant_tbl_ptrs[qtblno];
216 /* Compute divisors for this quant table */
217 /* We may do this more than once for same table, but it's not a big deal */
218 switch (cinfo->dct_method) {
219#ifdef DCT_ISLOW_SUPPORTED
220 case JDCT_ISLOW:
221 /* For LL&M IDCT method, divisors are equal to raw quantization
222 * coefficients multiplied by 8 (to counteract scaling).
223 */
224 if (fdct->divisors[qtblno] == NULL) {
225 fdct->divisors[qtblno] = (DCTELEM *)
226 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
Pierre Ossman4aa24292009-03-09 13:23:04 +0000227 (DCTSIZE2 * 4) * SIZEOF(DCTELEM));
Constantin Kaplinskya2adc8d2006-05-25 05:01:55 +0000228 }
229 dtbl = fdct->divisors[qtblno];
230 for (i = 0; i < DCTSIZE2; i++) {
Pierre Ossman4aa24292009-03-09 13:23:04 +0000231 compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]);
Constantin Kaplinskya2adc8d2006-05-25 05:01:55 +0000232 }
233 break;
234#endif
235#ifdef DCT_IFAST_SUPPORTED
236 case JDCT_IFAST:
237 {
238 /* For AA&N IDCT method, divisors are equal to quantization
239 * coefficients scaled by scalefactor[row]*scalefactor[col], where
240 * scalefactor[0] = 1
241 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
242 * We apply a further scale factor of 8.
243 */
244#define CONST_BITS 14
245 static const INT16 aanscales[DCTSIZE2] = {
246 /* precomputed values scaled up by 14 bits */
247 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
248 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
249 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
250 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
251 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
252 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
253 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
254 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
255 };
256 SHIFT_TEMPS
257
258 if (fdct->divisors[qtblno] == NULL) {
259 fdct->divisors[qtblno] = (DCTELEM *)
260 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
Pierre Ossman4aa24292009-03-09 13:23:04 +0000261 (DCTSIZE2 * 4) * SIZEOF(DCTELEM));
Constantin Kaplinskya2adc8d2006-05-25 05:01:55 +0000262 }
263 dtbl = fdct->divisors[qtblno];
264 for (i = 0; i < DCTSIZE2; i++) {
Pierre Ossman4aa24292009-03-09 13:23:04 +0000265 compute_reciprocal(
Constantin Kaplinskya2adc8d2006-05-25 05:01:55 +0000266 DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
267 (INT32) aanscales[i]),
Pierre Ossman4aa24292009-03-09 13:23:04 +0000268 CONST_BITS-3), &dtbl[i]);
Constantin Kaplinskya2adc8d2006-05-25 05:01:55 +0000269 }
270 }
271 break;
272#endif
273#ifdef DCT_FLOAT_SUPPORTED
274 case JDCT_FLOAT:
275 {
276 /* For float AA&N IDCT method, divisors are equal to quantization
277 * coefficients scaled by scalefactor[row]*scalefactor[col], where
278 * scalefactor[0] = 1
279 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
280 * We apply a further scale factor of 8.
281 * What's actually stored is 1/divisor so that the inner loop can
282 * use a multiplication rather than a division.
283 */
284 FAST_FLOAT * fdtbl;
285 int row, col;
286 static const double aanscalefactor[DCTSIZE] = {
287 1.0, 1.387039845, 1.306562965, 1.175875602,
288 1.0, 0.785694958, 0.541196100, 0.275899379
289 };
290
291 if (fdct->float_divisors[qtblno] == NULL) {
292 fdct->float_divisors[qtblno] = (FAST_FLOAT *)
293 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
294 DCTSIZE2 * SIZEOF(FAST_FLOAT));
295 }
296 fdtbl = fdct->float_divisors[qtblno];
297 i = 0;
298 for (row = 0; row < DCTSIZE; row++) {
299 for (col = 0; col < DCTSIZE; col++) {
300 fdtbl[i] = (FAST_FLOAT)
301 (1.0 / (((double) qtbl->quantval[i] *
302 aanscalefactor[row] * aanscalefactor[col] * 8.0)));
303 i++;
304 }
305 }
306 }
307 break;
308#endif
309 default:
310 ERREXIT(cinfo, JERR_NOT_COMPILED);
311 break;
312 }
313 }
314}
315
316
317/*
Pierre Ossmanb85c2f82009-03-09 10:37:20 +0000318 * Load data into workspace, applying unsigned->signed conversion.
319 */
320
321METHODDEF(void)
322convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)
323{
324 register DCTELEM *workspaceptr;
325 register JSAMPROW elemptr;
326 register int elemr;
327
328 workspaceptr = workspace;
329 for (elemr = 0; elemr < DCTSIZE; elemr++) {
330 elemptr = sample_data[elemr] + start_col;
331
332#if DCTSIZE == 8 /* unroll the inner loop */
333 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
334 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
335 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
336 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
337 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
338 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
339 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
340 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
341#else
342 {
343 register int elemc;
344 for (elemc = DCTSIZE; elemc > 0; elemc--)
345 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
346 }
347#endif
348 }
349}
350
351
352/*
353 * Quantize/descale the coefficients, and store into coef_blocks[].
354 */
355
356METHODDEF(void)
357quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
358{
Pierre Ossman4aa24292009-03-09 13:23:04 +0000359 int i;
360 DCTELEM temp;
361 UDCTELEM recip, corr, shift;
362 UDCTELEM2 product;
363 JCOEFPTR output_ptr = coef_block;
Pierre Ossmanb85c2f82009-03-09 10:37:20 +0000364
365 for (i = 0; i < DCTSIZE2; i++) {
Pierre Ossmanb85c2f82009-03-09 10:37:20 +0000366 temp = workspace[i];
Pierre Ossman4aa24292009-03-09 13:23:04 +0000367 recip = divisors[i + DCTSIZE2 * 0];
368 corr = divisors[i + DCTSIZE2 * 1];
369 shift = divisors[i + DCTSIZE2 * 3];
Pierre Ossmanb85c2f82009-03-09 10:37:20 +0000370
371 if (temp < 0) {
372 temp = -temp;
Pierre Ossman4aa24292009-03-09 13:23:04 +0000373 product = (UDCTELEM2)(temp + corr) * recip;
374 product >>= shift + sizeof(DCTELEM)*8;
375 temp = product;
Pierre Ossmanb85c2f82009-03-09 10:37:20 +0000376 temp = -temp;
377 } else {
Pierre Ossman4aa24292009-03-09 13:23:04 +0000378 product = (UDCTELEM2)(temp + corr) * recip;
379 product >>= shift + sizeof(DCTELEM)*8;
380 temp = product;
Pierre Ossmanb85c2f82009-03-09 10:37:20 +0000381 }
Pierre Ossman4aa24292009-03-09 13:23:04 +0000382
Pierre Ossmanb85c2f82009-03-09 10:37:20 +0000383 output_ptr[i] = (JCOEF) temp;
384 }
385}
386
387
388/*
Constantin Kaplinskya2adc8d2006-05-25 05:01:55 +0000389 * Perform forward DCT on one or more blocks of a component.
390 *
391 * The input samples are taken from the sample_data[] array starting at
392 * position start_row/start_col, and moving to the right for any additional
393 * blocks. The quantized coefficients are returned in coef_blocks[].
394 */
395
396METHODDEF(void)
397forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
398 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
399 JDIMENSION start_row, JDIMENSION start_col,
400 JDIMENSION num_blocks)
401/* This version is used for integer DCT implementations. */
402{
403 /* This routine is heavily used, so it's worth coding it tightly. */
404 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
Constantin Kaplinskya2adc8d2006-05-25 05:01:55 +0000405 DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
406 DCTELEM workspace[DCTSIZE2]; /* work area for FDCT subroutine */
407 JDIMENSION bi;
408
Pierre Ossmanb85c2f82009-03-09 10:37:20 +0000409 /* Make sure the compiler doesn't look up these every pass */
410 forward_DCT_method_ptr do_dct = fdct->dct;
411 convsamp_method_ptr do_convsamp = fdct->convsamp;
412 quantize_method_ptr do_quantize = fdct->quantize;
413
Constantin Kaplinskya2adc8d2006-05-25 05:01:55 +0000414 sample_data += start_row; /* fold in the vertical offset once */
415
416 for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
417 /* Load data into workspace, applying unsigned->signed conversion */
Pierre Ossmanb85c2f82009-03-09 10:37:20 +0000418 (*do_convsamp) (sample_data, start_col, workspace);
Constantin Kaplinskya2adc8d2006-05-25 05:01:55 +0000419
420 /* Perform the DCT */
421 (*do_dct) (workspace);
422
423 /* Quantize/descale the coefficients, and store into coef_blocks[] */
Pierre Ossmanb85c2f82009-03-09 10:37:20 +0000424 (*do_quantize) (coef_blocks[bi], divisors, workspace);
Constantin Kaplinskya2adc8d2006-05-25 05:01:55 +0000425 }
426}
427
428
429#ifdef DCT_FLOAT_SUPPORTED
430
Pierre Ossmanb85c2f82009-03-09 10:37:20 +0000431
432METHODDEF(void)
433convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace)
434{
435 register FAST_FLOAT *workspaceptr;
436 register JSAMPROW elemptr;
437 register int elemr;
438
439 workspaceptr = workspace;
440 for (elemr = 0; elemr < DCTSIZE; elemr++) {
441 elemptr = sample_data[elemr] + start_col;
442#if DCTSIZE == 8 /* unroll the inner loop */
443 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
444 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
445 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
446 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
447 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
448 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
449 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
450 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
451#else
452 {
453 register int elemc;
454 for (elemc = DCTSIZE; elemc > 0; elemc--)
455 *workspaceptr++ = (FAST_FLOAT)
456 (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
457 }
458#endif
459 }
460}
461
462
463METHODDEF(void)
464quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)
465{
466 register FAST_FLOAT temp;
467 register int i;
468 register JCOEFPTR output_ptr = coef_block;
469
470 for (i = 0; i < DCTSIZE2; i++) {
471 /* Apply the quantization and scaling factor */
472 temp = workspace[i] * divisors[i];
473
474 /* Round to nearest integer.
475 * Since C does not specify the direction of rounding for negative
476 * quotients, we have to force the dividend positive for portability.
477 * The maximum coefficient size is +-16K (for 12-bit data), so this
478 * code should work for either 16-bit or 32-bit ints.
479 */
480 output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
481 }
482}
483
484
Constantin Kaplinskya2adc8d2006-05-25 05:01:55 +0000485METHODDEF(void)
486forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
487 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
488 JDIMENSION start_row, JDIMENSION start_col,
489 JDIMENSION num_blocks)
490/* This version is used for floating-point DCT implementations. */
491{
492 /* This routine is heavily used, so it's worth coding it tightly. */
493 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
Constantin Kaplinskya2adc8d2006-05-25 05:01:55 +0000494 FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
495 FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
496 JDIMENSION bi;
497
Pierre Ossmanb85c2f82009-03-09 10:37:20 +0000498 /* Make sure the compiler doesn't look up these every pass */
499 float_DCT_method_ptr do_dct = fdct->float_dct;
500 float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
501 float_quantize_method_ptr do_quantize = fdct->float_quantize;
502
Constantin Kaplinskya2adc8d2006-05-25 05:01:55 +0000503 sample_data += start_row; /* fold in the vertical offset once */
504
505 for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
506 /* Load data into workspace, applying unsigned->signed conversion */
Pierre Ossmanb85c2f82009-03-09 10:37:20 +0000507 (*do_convsamp) (sample_data, start_col, workspace);
Constantin Kaplinskya2adc8d2006-05-25 05:01:55 +0000508
509 /* Perform the DCT */
510 (*do_dct) (workspace);
511
512 /* Quantize/descale the coefficients, and store into coef_blocks[] */
Pierre Ossmanb85c2f82009-03-09 10:37:20 +0000513 (*do_quantize) (coef_blocks[bi], divisors, workspace);
Constantin Kaplinskya2adc8d2006-05-25 05:01:55 +0000514 }
515}
516
517#endif /* DCT_FLOAT_SUPPORTED */
518
519
520/*
521 * Initialize FDCT manager.
522 */
523
524GLOBAL(void)
525jinit_forward_dct (j_compress_ptr cinfo)
526{
527 my_fdct_ptr fdct;
528 int i;
529
530 fdct = (my_fdct_ptr)
531 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
532 SIZEOF(my_fdct_controller));
533 cinfo->fdct = (struct jpeg_forward_dct *) fdct;
534 fdct->pub.start_pass = start_pass_fdctmgr;
535
Pierre Ossmanb85c2f82009-03-09 10:37:20 +0000536 /* First determine the DCT... */
Constantin Kaplinskya2adc8d2006-05-25 05:01:55 +0000537 switch (cinfo->dct_method) {
538#ifdef DCT_ISLOW_SUPPORTED
539 case JDCT_ISLOW:
540 fdct->pub.forward_DCT = forward_DCT;
Pierre Ossman9ad52342009-03-09 13:15:56 +0000541 if (jsimd_can_fdct_islow())
542 fdct->dct = jsimd_fdct_islow;
543 else
544 fdct->dct = jpeg_fdct_islow;
Constantin Kaplinskya2adc8d2006-05-25 05:01:55 +0000545 break;
546#endif
547#ifdef DCT_IFAST_SUPPORTED
548 case JDCT_IFAST:
549 fdct->pub.forward_DCT = forward_DCT;
Pierre Ossman9ad52342009-03-09 13:15:56 +0000550 if (jsimd_can_fdct_ifast())
551 fdct->dct = jsimd_fdct_ifast;
552 else
553 fdct->dct = jpeg_fdct_ifast;
Constantin Kaplinskya2adc8d2006-05-25 05:01:55 +0000554 break;
555#endif
556#ifdef DCT_FLOAT_SUPPORTED
557 case JDCT_FLOAT:
558 fdct->pub.forward_DCT = forward_DCT_float;
Pierre Ossman9ad52342009-03-09 13:15:56 +0000559 if (jsimd_can_fdct_float())
560 fdct->float_dct = jsimd_fdct_float;
561 else
562 fdct->float_dct = jpeg_fdct_float;
Pierre Ossmanb85c2f82009-03-09 10:37:20 +0000563 break;
564#endif
565 default:
566 ERREXIT(cinfo, JERR_NOT_COMPILED);
567 break;
568 }
569
570 /* ...then the supporting stages. */
571 switch (cinfo->dct_method) {
572#ifdef DCT_ISLOW_SUPPORTED
573 case JDCT_ISLOW:
574#endif
575#ifdef DCT_IFAST_SUPPORTED
576 case JDCT_IFAST:
577#endif
578#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
Pierre Ossman9ad52342009-03-09 13:15:56 +0000579 if (jsimd_can_convsamp())
580 fdct->convsamp = jsimd_convsamp;
581 else
582 fdct->convsamp = convsamp;
583 if (jsimd_can_quantize())
584 fdct->quantize = jsimd_quantize;
585 else
586 fdct->quantize = quantize;
Pierre Ossmanb85c2f82009-03-09 10:37:20 +0000587 break;
588#endif
589#ifdef DCT_FLOAT_SUPPORTED
590 case JDCT_FLOAT:
Pierre Ossman9ad52342009-03-09 13:15:56 +0000591 if (jsimd_can_convsamp_float())
592 fdct->float_convsamp = jsimd_convsamp_float;
593 else
594 fdct->float_convsamp = convsamp_float;
595 if (jsimd_can_quantize_float())
596 fdct->float_quantize = jsimd_quantize_float;
597 else
598 fdct->float_quantize = quantize_float;
Constantin Kaplinskya2adc8d2006-05-25 05:01:55 +0000599 break;
600#endif
601 default:
602 ERREXIT(cinfo, JERR_NOT_COMPILED);
603 break;
604 }
605
606 /* Mark divisor tables unallocated */
607 for (i = 0; i < NUM_QUANT_TBLS; i++) {
608 fdct->divisors[i] = NULL;
609#ifdef DCT_FLOAT_SUPPORTED
610 fdct->float_divisors[i] = NULL;
611#endif
612 }
613}