Blame - common/jpeg/jcdctmgr.c - android_external_tigervnc

blob: 539ccc41c8c2898f23ab0daf5a2808299a224656 [file] [log] [blame]

Constantin Kaplinsky	a2adc8d	2006-05-25 05:01:55 +0000	[diff] [blame]	1	/*
				2	* jcdctmgr.c
				3	*
				4	* Copyright (C) 1994-1996, Thomas G. Lane.
Pierre Ossman	4aa2429	2009-03-09 13:23:04 +0000	[diff] [blame^]	5	* Copyright (C) 1999-2006, MIYASAKA Masaru.
Pierre Ossman	9ad5234	2009-03-09 13:15:56 +0000	[diff] [blame]	6	* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
Constantin Kaplinsky	a2adc8d	2006-05-25 05:01:55 +0000	[diff] [blame]	7	* This file is part of the Independent JPEG Group's software.
				8	* For conditions of distribution and use, see the accompanying README file.
				9	*
				10	* This file contains the forward-DCT management logic.
				11	* This code selects a particular DCT implementation to be used,
				12	* and it performs related housekeeping chores including coefficient
				13	* quantization.
				14	*/
				15
				16	#define JPEG_INTERNALS
				17	#include "jinclude.h"
				18	#include "jpeglib.h"
				19	#include "jdct.h" /* Private declarations for DCT subsystem */
Pierre Ossman	9ad5234	2009-03-09 13:15:56 +0000	[diff] [blame]	20	#include "jsimddct.h"
Constantin Kaplinsky	a2adc8d	2006-05-25 05:01:55 +0000	[diff] [blame]	21
				22
				23	/* Private subobject for this module */
				24
Pierre Ossman	b85c2f8	2009-03-09 10:37:20 +0000	[diff] [blame]	25	typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
				26	typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
				27
				28	typedef JMETHOD(void, convsamp_method_ptr,
				29	(JSAMPARRAY sample_data, JDIMENSION start_col,
				30	DCTELEM * workspace));
				31	typedef JMETHOD(void, float_convsamp_method_ptr,
				32	(JSAMPARRAY sample_data, JDIMENSION start_col,
				33	FAST_FLOAT *workspace));
				34
				35	typedef JMETHOD(void, quantize_method_ptr,
				36	(JCOEFPTR coef_block, DCTELEM * divisors,
				37	DCTELEM * workspace));
				38	typedef JMETHOD(void, float_quantize_method_ptr,
				39	(JCOEFPTR coef_block, FAST_FLOAT * divisors,
				40	FAST_FLOAT * workspace));
				41
Constantin Kaplinsky	a2adc8d	2006-05-25 05:01:55 +0000	[diff] [blame]	42	typedef struct {
				43	struct jpeg_forward_dct pub; /* public fields */
				44
				45	/* Pointer to the DCT routine actually in use */
Pierre Ossman	b85c2f8	2009-03-09 10:37:20 +0000	[diff] [blame]	46	forward_DCT_method_ptr dct;
				47	convsamp_method_ptr convsamp;
				48	quantize_method_ptr quantize;
Constantin Kaplinsky	a2adc8d	2006-05-25 05:01:55 +0000	[diff] [blame]	49
				50	/* The actual post-DCT divisors --- not identical to the quant table
				51	* entries, because of scaling (especially for an unnormalized DCT).
				52	* Each table is given in normal array order.
				53	*/
				54	DCTELEM * divisors[NUM_QUANT_TBLS];
				55
				56	#ifdef DCT_FLOAT_SUPPORTED
				57	/* Same as above for the floating-point case. */
Pierre Ossman	b85c2f8	2009-03-09 10:37:20 +0000	[diff] [blame]	58	float_DCT_method_ptr float_dct;
				59	float_convsamp_method_ptr float_convsamp;
				60	float_quantize_method_ptr float_quantize;
Constantin Kaplinsky	a2adc8d	2006-05-25 05:01:55 +0000	[diff] [blame]	61	FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
				62	#endif
				63	} my_fdct_controller;
				64
				65	typedef my_fdct_controller * my_fdct_ptr;
				66
				67
				68	/*
Pierre Ossman	4aa2429	2009-03-09 13:23:04 +0000	[diff] [blame^]	69	* Find the highest bit in an integer through binary search.
				70	*/
				71	LOCAL(int)
				72	fls (UINT16 val)
				73	{
				74	int bit;
				75
				76	bit = 16;
				77
				78	if (!val)
				79	return 0;
				80
				81	if (!(val & 0xff00)) {
				82	bit -= 8;
				83	val <<= 8;
				84	}
				85	if (!(val & 0xf000)) {
				86	bit -= 4;
				87	val <<= 4;
				88	}
				89	if (!(val & 0xc000)) {
				90	bit -= 2;
				91	val <<= 2;
				92	}
				93	if (!(val & 0x8000)) {
				94	bit -= 1;
				95	val <<= 1;
				96	}
				97
				98	return bit;
				99	}
				100
				101	/*
				102	* Compute values to do a division using reciprocal.
				103	*
				104	* This implementation is based on an algorithm described in
				105	* "How to optimize for the Pentium family of microprocessors"
				106	* (http://www.agner.org/assem/).
				107	* More information about the basic algorithm can be found in
				108	* the paper "Integer Division Using Reciprocals" by Robert Alverson.
				109	*
				110	* The basic idea is to replace x/d by x * d^-1. In order to store
				111	* d^-1 with enough precision we shift it left a few places. It turns
				112	* out that this algoright gives just enough precision, and also fits
				113	* into DCTELEM:
				114	*
				115	* b = (the number of significant bits in divisor) - 1
				116	* r = (word size) + b
				117	* f = 2^r / divisor
				118	*
				119	* f will not be an integer for most cases, so we need to compensate
				120	* for the rounding error introduced:
				121	*
				122	* no fractional part:
				123	*
				124	* result = input >> r
				125	*
				126	* fractional part of f < 0.5:
				127	*
				128	* round f down to nearest integer
				129	* result = ((input + 1) * f) >> r
				130	*
				131	* fractional part of f > 0.5:
				132	*
				133	* round f up to nearest integer
				134	* result = (input * f) >> r
				135	*
				136	* This is the original algorithm that gives truncated results. But we
				137	* want properly rounded results, so we replace "input" with
				138	* "input + divisor/2".
				139	*
				140	* In order to allow SIMD implementations we also tweak the values to
				141	* allow the same calculation to be made at all times:
				142	*
				143	* dctbl[0] = f rounded to nearest integer
				144	* dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
				145	* dctbl[2] = 1 << ((word size) * 2 - r)
				146	* dctbl[3] = r - (word size)
				147	*
				148	* dctbl[2] is for stupid instruction sets where the shift operation
				149	* isn't member wise (e.g. MMX).
				150	*
				151	* The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
				152	* is that most SIMD implementations have a "multiply and store top
				153	* half" operation.
				154	*
				155	* Lastly, we store each of the values in their own table instead
				156	* of in a consecutive manner, yet again in order to allow SIMD
				157	* routines.
				158	*/
				159	LOCAL(void)
				160	compute_reciprocal (UINT16 divisor, DCTELEM * dtbl)
				161	{
				162	UDCTELEM2 fq, fr;
				163	UDCTELEM c;
				164	int b, r;
				165
				166	b = fls(divisor) - 1;
				167	r = sizeof(DCTELEM) * 8 + b;
				168
				169	fq = ((UDCTELEM2)1 << r) / divisor;
				170	fr = ((UDCTELEM2)1 << r) % divisor;
				171
				172	c = divisor / 2; /* for rounding */
				173
				174	if (fr == 0) { /* divisor is power of two */
				175	/* fq will be one bit too large to fit in DCTELEM, so adjust */
				176	fq >>= 1;
				177	r--;
				178	} else if (fr <= (divisor / 2)) { /* fractional part is < 0.5 */
				179	c++;
				180	} else { /* fractional part is > 0.5 */
				181	fq++;
				182	}
				183
				184	dtbl[DCTSIZE2 * 0] = (DCTELEM) fq; /* reciprocal */
				185	dtbl[DCTSIZE2 * 1] = (DCTELEM) c; /* correction + roundfactor */
				186	dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)82 - r)); /* scale */
				187	dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)8; / shift */
				188	}
				189
				190	/*
Constantin Kaplinsky	a2adc8d	2006-05-25 05:01:55 +0000	[diff] [blame]	191	* Initialize for a processing pass.
				192	* Verify that all referenced Q-tables are present, and set up
				193	* the divisor table for each one.
				194	* In the current implementation, DCT of all components is done during
				195	* the first pass, even if only some components will be output in the
				196	* first scan. Hence all components should be examined here.
				197	*/
				198
				199	METHODDEF(void)
				200	start_pass_fdctmgr (j_compress_ptr cinfo)
				201	{
				202	my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
				203	int ci, qtblno, i;
				204	jpeg_component_info *compptr;
				205	JQUANT_TBL * qtbl;
				206	DCTELEM * dtbl;
				207
				208	for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
				209	ci++, compptr++) {
				210	qtblno = compptr->quant_tbl_no;
				211	/* Make sure specified quantization table is present */
				212	if (qtblno < 0 \|\| qtblno >= NUM_QUANT_TBLS \|\|
				213	cinfo->quant_tbl_ptrs[qtblno] == NULL)
				214	ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
				215	qtbl = cinfo->quant_tbl_ptrs[qtblno];
				216	/* Compute divisors for this quant table */
				217	/* We may do this more than once for same table, but it's not a big deal */
				218	switch (cinfo->dct_method) {
				219	#ifdef DCT_ISLOW_SUPPORTED
				220	case JDCT_ISLOW:
				221	/* For LL&M IDCT method, divisors are equal to raw quantization
				222	* coefficients multiplied by 8 (to counteract scaling).
				223	*/
				224	if (fdct->divisors[qtblno] == NULL) {
				225	fdct->divisors[qtblno] = (DCTELEM *)
				226	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
Pierre Ossman	4aa2429	2009-03-09 13:23:04 +0000	[diff] [blame^]	227	(DCTSIZE2 * 4) * SIZEOF(DCTELEM));
Constantin Kaplinsky	a2adc8d	2006-05-25 05:01:55 +0000	[diff] [blame]	228	}
				229	dtbl = fdct->divisors[qtblno];
				230	for (i = 0; i < DCTSIZE2; i++) {
Pierre Ossman	4aa2429	2009-03-09 13:23:04 +0000	[diff] [blame^]	231	compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]);
Constantin Kaplinsky	a2adc8d	2006-05-25 05:01:55 +0000	[diff] [blame]	232	}
				233	break;
				234	#endif
				235	#ifdef DCT_IFAST_SUPPORTED
				236	case JDCT_IFAST:
				237	{
				238	/* For AA&N IDCT method, divisors are equal to quantization
				239	* coefficients scaled by scalefactor[row]*scalefactor[col], where
				240	* scalefactor[0] = 1
				241	* scalefactor[k] = cos(kPI/16) sqrt(2) for k=1..7
				242	* We apply a further scale factor of 8.
				243	*/
				244	#define CONST_BITS 14
				245	static const INT16 aanscales[DCTSIZE2] = {
				246	/* precomputed values scaled up by 14 bits */
				247	16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
				248	22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
				249	21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
				250	19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
				251	16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
				252	12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
				253	8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
				254	4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
				255	};
				256	SHIFT_TEMPS
				257
				258	if (fdct->divisors[qtblno] == NULL) {
				259	fdct->divisors[qtblno] = (DCTELEM *)
				260	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
Pierre Ossman	4aa2429	2009-03-09 13:23:04 +0000	[diff] [blame^]	261	(DCTSIZE2 * 4) * SIZEOF(DCTELEM));
Constantin Kaplinsky	a2adc8d	2006-05-25 05:01:55 +0000	[diff] [blame]	262	}
				263	dtbl = fdct->divisors[qtblno];
				264	for (i = 0; i < DCTSIZE2; i++) {
Pierre Ossman	4aa2429	2009-03-09 13:23:04 +0000	[diff] [blame^]	265	compute_reciprocal(
Constantin Kaplinsky	a2adc8d	2006-05-25 05:01:55 +0000	[diff] [blame]	266	DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
				267	(INT32) aanscales[i]),
Pierre Ossman	4aa2429	2009-03-09 13:23:04 +0000	[diff] [blame^]	268	CONST_BITS-3), &dtbl[i]);
Constantin Kaplinsky	a2adc8d	2006-05-25 05:01:55 +0000	[diff] [blame]	269	}
				270	}
				271	break;
				272	#endif
				273	#ifdef DCT_FLOAT_SUPPORTED
				274	case JDCT_FLOAT:
				275	{
				276	/* For float AA&N IDCT method, divisors are equal to quantization
				277	* coefficients scaled by scalefactor[row]*scalefactor[col], where
				278	* scalefactor[0] = 1
				279	* scalefactor[k] = cos(kPI/16) sqrt(2) for k=1..7
				280	* We apply a further scale factor of 8.
				281	* What's actually stored is 1/divisor so that the inner loop can
				282	* use a multiplication rather than a division.
				283	*/
				284	FAST_FLOAT * fdtbl;
				285	int row, col;
				286	static const double aanscalefactor[DCTSIZE] = {
				287	1.0, 1.387039845, 1.306562965, 1.175875602,
				288	1.0, 0.785694958, 0.541196100, 0.275899379
				289	};
				290
				291	if (fdct->float_divisors[qtblno] == NULL) {
				292	fdct->float_divisors[qtblno] = (FAST_FLOAT *)
				293	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
				294	DCTSIZE2 * SIZEOF(FAST_FLOAT));
				295	}
				296	fdtbl = fdct->float_divisors[qtblno];
				297	i = 0;
				298	for (row = 0; row < DCTSIZE; row++) {
				299	for (col = 0; col < DCTSIZE; col++) {
				300	fdtbl[i] = (FAST_FLOAT)
				301	(1.0 / (((double) qtbl->quantval[i] *
				302	aanscalefactor[row] * aanscalefactor[col] * 8.0)));
				303	i++;
				304	}
				305	}
				306	}
				307	break;
				308	#endif
				309	default:
				310	ERREXIT(cinfo, JERR_NOT_COMPILED);
				311	break;
				312	}
				313	}
				314	}
				315
				316
				317	/*
Pierre Ossman	b85c2f8	2009-03-09 10:37:20 +0000	[diff] [blame]	318	* Load data into workspace, applying unsigned->signed conversion.
				319	*/
				320
				321	METHODDEF(void)
				322	convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)
				323	{
				324	register DCTELEM *workspaceptr;
				325	register JSAMPROW elemptr;
				326	register int elemr;
				327
				328	workspaceptr = workspace;
				329	for (elemr = 0; elemr < DCTSIZE; elemr++) {
				330	elemptr = sample_data[elemr] + start_col;
				331
				332	#if DCTSIZE == 8 /* unroll the inner loop */
				333	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				334	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				335	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				336	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				337	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				338	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				339	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				340	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				341	#else
				342	{
				343	register int elemc;
				344	for (elemc = DCTSIZE; elemc > 0; elemc--)
				345	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				346	}
				347	#endif
				348	}
				349	}
				350
				351
				352	/*
				353	* Quantize/descale the coefficients, and store into coef_blocks[].
				354	*/
				355
				356	METHODDEF(void)
				357	quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
				358	{
Pierre Ossman	4aa2429	2009-03-09 13:23:04 +0000	[diff] [blame^]	359	int i;
				360	DCTELEM temp;
				361	UDCTELEM recip, corr, shift;
				362	UDCTELEM2 product;
				363	JCOEFPTR output_ptr = coef_block;
Pierre Ossman	b85c2f8	2009-03-09 10:37:20 +0000	[diff] [blame]	364
				365	for (i = 0; i < DCTSIZE2; i++) {
Pierre Ossman	b85c2f8	2009-03-09 10:37:20 +0000	[diff] [blame]	366	temp = workspace[i];
Pierre Ossman	4aa2429	2009-03-09 13:23:04 +0000	[diff] [blame^]	367	recip = divisors[i + DCTSIZE2 * 0];
				368	corr = divisors[i + DCTSIZE2 * 1];
				369	shift = divisors[i + DCTSIZE2 * 3];
Pierre Ossman	b85c2f8	2009-03-09 10:37:20 +0000	[diff] [blame]	370
				371	if (temp < 0) {
				372	temp = -temp;
Pierre Ossman	4aa2429	2009-03-09 13:23:04 +0000	[diff] [blame^]	373	product = (UDCTELEM2)(temp + corr) * recip;
				374	product >>= shift + sizeof(DCTELEM)*8;
				375	temp = product;
Pierre Ossman	b85c2f8	2009-03-09 10:37:20 +0000	[diff] [blame]	376	temp = -temp;
				377	} else {
Pierre Ossman	4aa2429	2009-03-09 13:23:04 +0000	[diff] [blame^]	378	product = (UDCTELEM2)(temp + corr) * recip;
				379	product >>= shift + sizeof(DCTELEM)*8;
				380	temp = product;
Pierre Ossman	b85c2f8	2009-03-09 10:37:20 +0000	[diff] [blame]	381	}
Pierre Ossman	4aa2429	2009-03-09 13:23:04 +0000	[diff] [blame^]	382
Pierre Ossman	b85c2f8	2009-03-09 10:37:20 +0000	[diff] [blame]	383	output_ptr[i] = (JCOEF) temp;
				384	}
				385	}
				386
				387
				388	/*
Constantin Kaplinsky	a2adc8d	2006-05-25 05:01:55 +0000	[diff] [blame]	389	* Perform forward DCT on one or more blocks of a component.
				390	*
				391	* The input samples are taken from the sample_data[] array starting at
				392	* position start_row/start_col, and moving to the right for any additional
				393	* blocks. The quantized coefficients are returned in coef_blocks[].
				394	*/
				395
				396	METHODDEF(void)
				397	forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
				398	JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
				399	JDIMENSION start_row, JDIMENSION start_col,
				400	JDIMENSION num_blocks)
				401	/* This version is used for integer DCT implementations. */
				402	{
				403	/* This routine is heavily used, so it's worth coding it tightly. */
				404	my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
Constantin Kaplinsky	a2adc8d	2006-05-25 05:01:55 +0000	[diff] [blame]	405	DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
				406	DCTELEM workspace[DCTSIZE2]; /* work area for FDCT subroutine */
				407	JDIMENSION bi;
				408
Pierre Ossman	b85c2f8	2009-03-09 10:37:20 +0000	[diff] [blame]	409	/* Make sure the compiler doesn't look up these every pass */
				410	forward_DCT_method_ptr do_dct = fdct->dct;
				411	convsamp_method_ptr do_convsamp = fdct->convsamp;
				412	quantize_method_ptr do_quantize = fdct->quantize;
				413
Constantin Kaplinsky	a2adc8d	2006-05-25 05:01:55 +0000	[diff] [blame]	414	sample_data += start_row; /* fold in the vertical offset once */
				415
				416	for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
				417	/* Load data into workspace, applying unsigned->signed conversion */
Pierre Ossman	b85c2f8	2009-03-09 10:37:20 +0000	[diff] [blame]	418	(*do_convsamp) (sample_data, start_col, workspace);
Constantin Kaplinsky	a2adc8d	2006-05-25 05:01:55 +0000	[diff] [blame]	419
				420	/* Perform the DCT */
				421	(*do_dct) (workspace);
				422
				423	/* Quantize/descale the coefficients, and store into coef_blocks[] */
Pierre Ossman	b85c2f8	2009-03-09 10:37:20 +0000	[diff] [blame]	424	(*do_quantize) (coef_blocks[bi], divisors, workspace);
Constantin Kaplinsky	a2adc8d	2006-05-25 05:01:55 +0000	[diff] [blame]	425	}
				426	}
				427
				428
				429	#ifdef DCT_FLOAT_SUPPORTED
				430
Pierre Ossman	b85c2f8	2009-03-09 10:37:20 +0000	[diff] [blame]	431
				432	METHODDEF(void)
				433	convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace)
				434	{
				435	register FAST_FLOAT *workspaceptr;
				436	register JSAMPROW elemptr;
				437	register int elemr;
				438
				439	workspaceptr = workspace;
				440	for (elemr = 0; elemr < DCTSIZE; elemr++) {
				441	elemptr = sample_data[elemr] + start_col;
				442	#if DCTSIZE == 8 /* unroll the inner loop */
				443	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				444	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				445	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				446	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				447	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				448	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				449	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				450	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				451	#else
				452	{
				453	register int elemc;
				454	for (elemc = DCTSIZE; elemc > 0; elemc--)
				455	*workspaceptr++ = (FAST_FLOAT)
				456	(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
				457	}
				458	#endif
				459	}
				460	}
				461
				462
				463	METHODDEF(void)
				464	quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)
				465	{
				466	register FAST_FLOAT temp;
				467	register int i;
				468	register JCOEFPTR output_ptr = coef_block;
				469
				470	for (i = 0; i < DCTSIZE2; i++) {
				471	/* Apply the quantization and scaling factor */
				472	temp = workspace[i] * divisors[i];
				473
				474	/* Round to nearest integer.
				475	* Since C does not specify the direction of rounding for negative
				476	* quotients, we have to force the dividend positive for portability.
				477	* The maximum coefficient size is +-16K (for 12-bit data), so this
				478	* code should work for either 16-bit or 32-bit ints.
				479	*/
				480	output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
				481	}
				482	}
				483
				484
Constantin Kaplinsky	a2adc8d	2006-05-25 05:01:55 +0000	[diff] [blame]	485	METHODDEF(void)
				486	forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
				487	JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
				488	JDIMENSION start_row, JDIMENSION start_col,
				489	JDIMENSION num_blocks)
				490	/* This version is used for floating-point DCT implementations. */
				491	{
				492	/* This routine is heavily used, so it's worth coding it tightly. */
				493	my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
Constantin Kaplinsky	a2adc8d	2006-05-25 05:01:55 +0000	[diff] [blame]	494	FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
				495	FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
				496	JDIMENSION bi;
				497
Pierre Ossman	b85c2f8	2009-03-09 10:37:20 +0000	[diff] [blame]	498	/* Make sure the compiler doesn't look up these every pass */
				499	float_DCT_method_ptr do_dct = fdct->float_dct;
				500	float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
				501	float_quantize_method_ptr do_quantize = fdct->float_quantize;
				502
Constantin Kaplinsky	a2adc8d	2006-05-25 05:01:55 +0000	[diff] [blame]	503	sample_data += start_row; /* fold in the vertical offset once */
				504
				505	for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
				506	/* Load data into workspace, applying unsigned->signed conversion */
Pierre Ossman	b85c2f8	2009-03-09 10:37:20 +0000	[diff] [blame]	507	(*do_convsamp) (sample_data, start_col, workspace);
Constantin Kaplinsky	a2adc8d	2006-05-25 05:01:55 +0000	[diff] [blame]	508
				509	/* Perform the DCT */
				510	(*do_dct) (workspace);
				511
				512	/* Quantize/descale the coefficients, and store into coef_blocks[] */
Pierre Ossman	b85c2f8	2009-03-09 10:37:20 +0000	[diff] [blame]	513	(*do_quantize) (coef_blocks[bi], divisors, workspace);
Constantin Kaplinsky	a2adc8d	2006-05-25 05:01:55 +0000	[diff] [blame]	514	}
				515	}
				516
				517	#endif /* DCT_FLOAT_SUPPORTED */
				518
				519
				520	/*
				521	* Initialize FDCT manager.
				522	*/
				523
				524	GLOBAL(void)
				525	jinit_forward_dct (j_compress_ptr cinfo)
				526	{
				527	my_fdct_ptr fdct;
				528	int i;
				529
				530	fdct = (my_fdct_ptr)
				531	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
				532	SIZEOF(my_fdct_controller));
				533	cinfo->fdct = (struct jpeg_forward_dct *) fdct;
				534	fdct->pub.start_pass = start_pass_fdctmgr;
				535
Pierre Ossman	b85c2f8	2009-03-09 10:37:20 +0000	[diff] [blame]	536	/* First determine the DCT... */
Constantin Kaplinsky	a2adc8d	2006-05-25 05:01:55 +0000	[diff] [blame]	537	switch (cinfo->dct_method) {
				538	#ifdef DCT_ISLOW_SUPPORTED
				539	case JDCT_ISLOW:
				540	fdct->pub.forward_DCT = forward_DCT;
Pierre Ossman	9ad5234	2009-03-09 13:15:56 +0000	[diff] [blame]	541	if (jsimd_can_fdct_islow())
				542	fdct->dct = jsimd_fdct_islow;
				543	else
				544	fdct->dct = jpeg_fdct_islow;
Constantin Kaplinsky	a2adc8d	2006-05-25 05:01:55 +0000	[diff] [blame]	545	break;
				546	#endif
				547	#ifdef DCT_IFAST_SUPPORTED
				548	case JDCT_IFAST:
				549	fdct->pub.forward_DCT = forward_DCT;
Pierre Ossman	9ad5234	2009-03-09 13:15:56 +0000	[diff] [blame]	550	if (jsimd_can_fdct_ifast())
				551	fdct->dct = jsimd_fdct_ifast;
				552	else
				553	fdct->dct = jpeg_fdct_ifast;
Constantin Kaplinsky	a2adc8d	2006-05-25 05:01:55 +0000	[diff] [blame]	554	break;
				555	#endif
				556	#ifdef DCT_FLOAT_SUPPORTED
				557	case JDCT_FLOAT:
				558	fdct->pub.forward_DCT = forward_DCT_float;
Pierre Ossman	9ad5234	2009-03-09 13:15:56 +0000	[diff] [blame]	559	if (jsimd_can_fdct_float())
				560	fdct->float_dct = jsimd_fdct_float;
				561	else
				562	fdct->float_dct = jpeg_fdct_float;
Pierre Ossman	b85c2f8	2009-03-09 10:37:20 +0000	[diff] [blame]	563	break;
				564	#endif
				565	default:
				566	ERREXIT(cinfo, JERR_NOT_COMPILED);
				567	break;
				568	}
				569
				570	/* ...then the supporting stages. */
				571	switch (cinfo->dct_method) {
				572	#ifdef DCT_ISLOW_SUPPORTED
				573	case JDCT_ISLOW:
				574	#endif
				575	#ifdef DCT_IFAST_SUPPORTED
				576	case JDCT_IFAST:
				577	#endif
				578	#if defined(DCT_ISLOW_SUPPORTED) \|\| defined(DCT_IFAST_SUPPORTED)
Pierre Ossman	9ad5234	2009-03-09 13:15:56 +0000	[diff] [blame]	579	if (jsimd_can_convsamp())
				580	fdct->convsamp = jsimd_convsamp;
				581	else
				582	fdct->convsamp = convsamp;
				583	if (jsimd_can_quantize())
				584	fdct->quantize = jsimd_quantize;
				585	else
				586	fdct->quantize = quantize;
Pierre Ossman	b85c2f8	2009-03-09 10:37:20 +0000	[diff] [blame]	587	break;
				588	#endif
				589	#ifdef DCT_FLOAT_SUPPORTED
				590	case JDCT_FLOAT:
Pierre Ossman	9ad5234	2009-03-09 13:15:56 +0000	[diff] [blame]	591	if (jsimd_can_convsamp_float())
				592	fdct->float_convsamp = jsimd_convsamp_float;
				593	else
				594	fdct->float_convsamp = convsamp_float;
				595	if (jsimd_can_quantize_float())
				596	fdct->float_quantize = jsimd_quantize_float;
				597	else
				598	fdct->float_quantize = quantize_float;
Constantin Kaplinsky	a2adc8d	2006-05-25 05:01:55 +0000	[diff] [blame]	599	break;
				600	#endif
				601	default:
				602	ERREXIT(cinfo, JERR_NOT_COMPILED);
				603	break;
				604	}
				605
				606	/* Mark divisor tables unallocated */
				607	for (i = 0; i < NUM_QUANT_TBLS; i++) {
				608	fdct->divisors[i] = NULL;
				609	#ifdef DCT_FLOAT_SUPPORTED
				610	fdct->float_divisors[i] = NULL;
				611	#endif
				612	}
				613	}