Blame - libpixelflinger/fixed.cpp - android_system_core

blob: de6b4794ff50fd417cf02e2c1c2c20ef28fd4456 [file] [log] [blame]

The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	1	/* libs/pixelflinger/fixed.cpp
				2	**
				3	** Copyright 2006, The Android Open Source Project
				4	**
				5	** Licensed under the Apache License, Version 2.0 (the "License");
				6	** you may not use this file except in compliance with the License.
				7	** You may obtain a copy of the License at
				8	**
				9	** http://www.apache.org/licenses/LICENSE-2.0
				10	**
				11	** Unless required by applicable law or agreed to in writing, software
				12	** distributed under the License is distributed on an "AS IS" BASIS,
				13	** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	** See the License for the specific language governing permissions and
				15	** limitations under the License.
				16	*/
				17
				18	#include <stdio.h>
				19
				20	#include <private/pixelflinger/ggl_context.h>
				21	#include <private/pixelflinger/ggl_fixed.h>
				22
				23
				24	// ------------------------------------------------------------------------
				25
				26	int32_t gglRecipQNormalized(int32_t x, int* exponent)
				27	{
				28	const int32_t s = x>>31;
				29	uint32_t a = s ? -x : x;
				30
				31	// the result will overflow, so just set it to the biggest/inf value
				32	if (ggl_unlikely(a <= 2LU)) {
				33	*exponent = 0;
				34	return s ? FIXED_MIN : FIXED_MAX;
				35	}
				36
				37	// Newton-Raphson iteration:
				38	// x = r(2 - ar)
				39
				40	const int32_t lz = gglClz(a);
				41	a <<= lz; // 0.32
				42	uint32_t r = a;
				43	// note: if a == 0x80000000, this means x was a power-of-2, in this
				44	// case we don't need to compute anything. We get the reciprocal for
				45	// (almost) free.
				46	if (a != 0x80000000) {
				47	r = (0x2E800 << (30-16)) - (r>>(2-1)); // 2.30, r = 2.90625 - 2*a
				48	// 0.32 + 2.30 = 2.62 -> 2.30
				49	// 2.30 + 2.30 = 4.60 -> 2.30
				50	r = (((2LU<<30) - uint32_t((uint64_t(a)r) >> 32)) uint64_t(r)) >> 30;
				51	r = (((2LU<<30) - uint32_t((uint64_t(a)r) >> 32)) uint64_t(r)) >> 30;
				52	}
				53
				54	// shift right 1-bit to make room for the sign bit
				55	*exponent = 30-lz-1;
				56	r >>= 1;
				57	return s ? -r : r;
				58	}
				59
				60	int32_t gglRecipQ(GGLfixed x, int q)
				61	{
				62	int shift;
				63	x = gglRecipQNormalized(x, &shift);
				64	shift += 16-q;
Bhanu Chetlapalli	65026f9	2012-01-25 14:45:30 -0800	[diff] [blame]	65	if (shift > 0)
				66	x += 1L << (shift-1); // rounding
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	67	x >>= shift;
				68	return x;
				69	}
				70
				71	// ------------------------------------------------------------------------
				72
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	73	static const GGLfixed ggl_sqrt_reciproc_approx_tab[8] = {
				74	// 1/sqrt(x) with x = 1-N/16, N=[8...1]
				75	0x16A09, 0x15555, 0x143D1, 0x134BF, 0x1279A, 0x11C01, 0x111AC, 0x10865
				76	};
				77
				78	GGLfixed gglSqrtRecipx(GGLfixed x)
				79	{
				80	if (x == 0) return FIXED_MAX;
				81	if (x == FIXED_ONE) return x;
				82	const GGLfixed a = x;
				83	const int32_t lz = gglClz(x);
				84	x = ggl_sqrt_reciproc_approx_tab[(a>>(28-lz))&0x7];
				85	const int32_t exp = lz - 16;
				86	if (exp <= 0) x >>= -exp>>1;
				87	else x <<= (exp>>1) + (exp & 1);
				88	if (exp & 1) {
				89	x = gglMulx(x, ggl_sqrt_reciproc_approx_tab[0])>>1;
				90	}
				91	// 2 Newton-Raphson iterations: x = x/2(3-(ax)*x)
				92	x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
				93	x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
				94	return x;
				95	}
				96
				97	GGLfixed gglSqrtx(GGLfixed a)
				98	{
				99	// Compute a full precision square-root (24 bits accuracy)
				100	GGLfixed r = 0;
				101	GGLfixed bit = 0x800000;
				102	int32_t bshift = 15;
				103	do {
				104	GGLfixed temp = bit + (r<<1);
				105	if (bshift >= 8) temp <<= (bshift-8);
				106	else temp >>= (8-bshift);
				107	if (a >= temp) {
				108	r += bit;
				109	a -= temp;
				110	}
				111	bshift--;
				112	} while (bit>>=1);
				113	return r;
				114	}
				115
				116	// ------------------------------------------------------------------------
				117
				118	static const GGLfixed ggl_log_approx_tab[] = {
				119	// -ln(x)/ln(2) with x = N/16, N=[8...16]
				120	0xFFFF, 0xd47f, 0xad96, 0x8a62, 0x6a3f, 0x4caf, 0x3151, 0x17d6, 0x0000
				121	};
				122
				123	static const GGLfixed ggl_alog_approx_tab[] = { // domain [0 - 1.0]
				124	0xffff, 0xeac0, 0xd744, 0xc567, 0xb504, 0xa5fe, 0x9837, 0x8b95, 0x8000
				125	};
				126
				127	GGLfixed gglPowx(GGLfixed x, GGLfixed y)
				128	{
				129	// prerequisite: 0 <= x <= 1, and y >=0
				130
				131	// pow(x,y) = 2^(y*log2(x))
				132	// = 2^(ylog2(x(2^exp)*(2^-exp))))
				133	// = 2^(y*(log2(X)-exp))
				134	// = 2^(log2(X)y - yexp)
				135	// = 2^( - (-log2(X)y + yexp) )
				136
				137	int32_t exp = gglClz(x) - 16;
				138	GGLfixed f = x << exp;
				139	x = (f & 0x0FFF)<<4;
				140	f = (f >> 12) & 0x7;
				141	GGLfixed p = gglMulAddx(
				142	ggl_log_approx_tab[f+1] - ggl_log_approx_tab[f], x,
				143	ggl_log_approx_tab[f]);
				144	p = gglMulAddx(p, y, y*exp);
				145	exp = gglFixedToIntFloor(p);
				146	if (exp < 31) {
				147	p = gglFracx(p);
				148	x = (p & 0x1FFF)<<3;
				149	p >>= 13;
				150	p = gglMulAddx(
				151	ggl_alog_approx_tab[p+1] - ggl_alog_approx_tab[p], x,
				152	ggl_alog_approx_tab[p]);
				153	p >>= exp;
				154	} else {
				155	p = 0;
				156	}
				157	return p;
				158	// ( powf((a65536.0f), (b65536.0f)) ) * 65536.0f;
				159	}
				160
				161	// ------------------------------------------------------------------------
				162
				163	int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i)
				164	{
				165	//int32_t r =int32_t((int64_t(n)<<i)/d);
				166	const int32_t ds = n^d;
				167	if (n<0) n = -n;
				168	if (d<0) d = -d;
				169	int nd = gglClz(d) - gglClz(n);
				170	i += nd + 1;
				171	if (nd > 0) d <<= nd;
				172	else n <<= -nd;
				173	uint32_t q = 0;
				174
				175	int j = i & 7;
				176	i >>= 3;
				177
				178	// gcc deals with the code below pretty well.
				179	// we get 3.75 cycles per bit in the main loop
				180	// and 8 cycles per bit in the termination loop
				181	if (ggl_likely(i)) {
				182	n -= d;
				183	do {
				184	q <<= 8;
				185	if (n>=0) q \|= 128;
				186	else n += d;
				187	n = n*2 - d;
				188	if (n>=0) q \|= 64;
				189	else n += d;
				190	n = n*2 - d;
				191	if (n>=0) q \|= 32;
				192	else n += d;
				193	n = n*2 - d;
				194	if (n>=0) q \|= 16;
				195	else n += d;
				196	n = n*2 - d;
				197	if (n>=0) q \|= 8;
				198	else n += d;
				199	n = n*2 - d;
				200	if (n>=0) q \|= 4;
				201	else n += d;
				202	n = n*2 - d;
				203	if (n>=0) q \|= 2;
				204	else n += d;
				205	n = n*2 - d;
				206	if (n>=0) q \|= 1;
				207	else n += d;
				208
				209	if (--i == 0)
				210	goto finish;
				211
				212	n = n*2 - d;
				213	} while(true);
				214	do {
				215	q <<= 1;
				216	n = n*2 - d;
				217	if (n>=0) q \|= 1;
				218	else n += d;
				219	finish: ;
				220	} while (j--);
				221	return (ds<0) ? -q : q;
				222	}
				223
				224	n -= d;
				225	if (n>=0) q \|= 1;
				226	else n += d;
				227	j--;
				228	goto finish;
				229	}
				230
				231	// ------------------------------------------------------------------------
				232
				233	// assumes that the int32_t values of a, b, and c are all positive
				234	// use when both a and b are larger than c
				235
				236	template <typename T>
				237	static inline void swap(T& a, T& b) {
				238	T t(a);
				239	a = b;
				240	b = t;
				241	}
				242
				243	static __attribute__((noinline))
				244	int32_t slow_muldiv(uint32_t a, uint32_t b, uint32_t c)
				245	{
				246	// first we compute a*b as a 64-bit integer
				247	// (GCC generates umull with the code below)
				248	uint64_t ab = uint64_t(a)*b;
				249	uint32_t hi = ab>>32;
				250	uint32_t lo = ab;
				251	uint32_t result;
				252
				253	// now perform the division
				254	if (hi >= c) {
				255	overflow:
				256	result = 0x7fffffff; // basic overflow
				257	} else if (hi == 0) {
				258	result = lo/c; // note: c can't be 0
				259	if ((result >> 31) != 0) // result must fit in 31 bits
				260	goto overflow;
				261	} else {
				262	uint32_t r = hi;
				263	int bits = 31;
				264	result = 0;
				265	do {
				266	r = (r << 1) \| (lo >> 31);
				267	lo <<= 1;
				268	result <<= 1;
				269	if (r >= c) {
				270	r -= c;
				271	result \|= 1;
				272	}
				273	} while (bits--);
				274	}
				275	return int32_t(result);
				276	}
				277
				278	// assumes a >= 0 and c >= b >= 0
				279	static inline
				280	int32_t quick_muldiv(int32_t a, int32_t b, int32_t c)
				281	{
				282	int32_t r = 0, q = 0, i;
				283	int leading = gglClz(a);
				284	i = 32 - leading;
				285	a <<= leading;
				286	do {
				287	r <<= 1;
				288	if (a < 0)
				289	r += b;
				290	a <<= 1;
				291	q <<= 1;
				292	if (r >= c) {
				293	r -= c;
				294	q++;
				295	}
				296	asm(""::); // gcc generates better code this way
				297	if (r >= c) {
				298	r -= c;
				299	q++;
				300	}
				301	}
				302	while (--i);
				303	return q;
				304	}
				305
				306	// this function computes a*b/c with 64-bit intermediate accuracy
				307	// overflows (e.g. division by 0) are handled and return INT_MAX
				308
				309	int32_t gglMulDivi(int32_t a, int32_t b, int32_t c)
				310	{
				311	int32_t result;
				312	int32_t sign = a^b^c;
				313
				314	if (a < 0) a = -a;
				315	if (b < 0) b = -b;
				316	if (c < 0) c = -c;
				317
				318	if (a < b) {
				319	swap(a, b);
				320	}
				321
				322	if (b <= c) result = quick_muldiv(a, b, c);
				323	else result = slow_muldiv((uint32_t)a, (uint32_t)b, (uint32_t)c);
				324
				325	if (sign < 0)
				326	result = -result;
				327
				328	return result;
				329	}