Blame - libpixelflinger/fixed.cpp - android_system_core

blob: 5094537766a50de0f828839d156ba2ce245c0f13 [file] [log] [blame]

The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	1	/* libs/pixelflinger/fixed.cpp
				2	**
				3	** Copyright 2006, The Android Open Source Project
				4	**
				5	** Licensed under the Apache License, Version 2.0 (the "License");
				6	** you may not use this file except in compliance with the License.
				7	** You may obtain a copy of the License at
				8	**
				9	** http://www.apache.org/licenses/LICENSE-2.0
				10	**
				11	** Unless required by applicable law or agreed to in writing, software
				12	** distributed under the License is distributed on an "AS IS" BASIS,
				13	** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	** See the License for the specific language governing permissions and
				15	** limitations under the License.
				16	*/
				17
				18	#include <stdio.h>
				19
				20	#include <private/pixelflinger/ggl_context.h>
				21	#include <private/pixelflinger/ggl_fixed.h>
				22
				23
				24	// ------------------------------------------------------------------------
				25
				26	int32_t gglRecipQNormalized(int32_t x, int* exponent)
				27	{
				28	const int32_t s = x>>31;
				29	uint32_t a = s ? -x : x;
				30
				31	// the result will overflow, so just set it to the biggest/inf value
				32	if (ggl_unlikely(a <= 2LU)) {
				33	*exponent = 0;
				34	return s ? FIXED_MIN : FIXED_MAX;
				35	}
				36
				37	// Newton-Raphson iteration:
				38	// x = r(2 - ar)
				39
				40	const int32_t lz = gglClz(a);
				41	a <<= lz; // 0.32
				42	uint32_t r = a;
				43	// note: if a == 0x80000000, this means x was a power-of-2, in this
				44	// case we don't need to compute anything. We get the reciprocal for
				45	// (almost) free.
				46	if (a != 0x80000000) {
				47	r = (0x2E800 << (30-16)) - (r>>(2-1)); // 2.30, r = 2.90625 - 2*a
				48	// 0.32 + 2.30 = 2.62 -> 2.30
				49	// 2.30 + 2.30 = 4.60 -> 2.30
				50	r = (((2LU<<30) - uint32_t((uint64_t(a)r) >> 32)) uint64_t(r)) >> 30;
				51	r = (((2LU<<30) - uint32_t((uint64_t(a)r) >> 32)) uint64_t(r)) >> 30;
				52	}
				53
				54	// shift right 1-bit to make room for the sign bit
				55	*exponent = 30-lz-1;
				56	r >>= 1;
				57	return s ? -r : r;
				58	}
				59
				60	int32_t gglRecipQ(GGLfixed x, int q)
				61	{
				62	int shift;
				63	x = gglRecipQNormalized(x, &shift);
				64	shift += 16-q;
Bhanu Chetlapalli	65026f9	2012-01-25 14:45:30 -0800	[diff] [blame]	65	if (shift > 0)
				66	x += 1L << (shift-1); // rounding
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	67	x >>= shift;
				68	return x;
				69	}
				70
				71	// ------------------------------------------------------------------------
				72
				73	GGLfixed gglFastDivx(GGLfixed n, GGLfixed d)
				74	{
				75	if ((d>>24) && ((d>>24)+1)) {
				76	n >>= 8;
				77	d >>= 8;
				78	}
				79	return gglMulx(n, gglRecip(d));
				80	}
				81
				82	// ------------------------------------------------------------------------
				83
				84	static const GGLfixed ggl_sqrt_reciproc_approx_tab[8] = {
				85	// 1/sqrt(x) with x = 1-N/16, N=[8...1]
				86	0x16A09, 0x15555, 0x143D1, 0x134BF, 0x1279A, 0x11C01, 0x111AC, 0x10865
				87	};
				88
				89	GGLfixed gglSqrtRecipx(GGLfixed x)
				90	{
				91	if (x == 0) return FIXED_MAX;
				92	if (x == FIXED_ONE) return x;
				93	const GGLfixed a = x;
				94	const int32_t lz = gglClz(x);
				95	x = ggl_sqrt_reciproc_approx_tab[(a>>(28-lz))&0x7];
				96	const int32_t exp = lz - 16;
				97	if (exp <= 0) x >>= -exp>>1;
				98	else x <<= (exp>>1) + (exp & 1);
				99	if (exp & 1) {
				100	x = gglMulx(x, ggl_sqrt_reciproc_approx_tab[0])>>1;
				101	}
				102	// 2 Newton-Raphson iterations: x = x/2(3-(ax)*x)
				103	x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
				104	x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
				105	return x;
				106	}
				107
				108	GGLfixed gglSqrtx(GGLfixed a)
				109	{
				110	// Compute a full precision square-root (24 bits accuracy)
				111	GGLfixed r = 0;
				112	GGLfixed bit = 0x800000;
				113	int32_t bshift = 15;
				114	do {
				115	GGLfixed temp = bit + (r<<1);
				116	if (bshift >= 8) temp <<= (bshift-8);
				117	else temp >>= (8-bshift);
				118	if (a >= temp) {
				119	r += bit;
				120	a -= temp;
				121	}
				122	bshift--;
				123	} while (bit>>=1);
				124	return r;
				125	}
				126
				127	// ------------------------------------------------------------------------
				128
				129	static const GGLfixed ggl_log_approx_tab[] = {
				130	// -ln(x)/ln(2) with x = N/16, N=[8...16]
				131	0xFFFF, 0xd47f, 0xad96, 0x8a62, 0x6a3f, 0x4caf, 0x3151, 0x17d6, 0x0000
				132	};
				133
				134	static const GGLfixed ggl_alog_approx_tab[] = { // domain [0 - 1.0]
				135	0xffff, 0xeac0, 0xd744, 0xc567, 0xb504, 0xa5fe, 0x9837, 0x8b95, 0x8000
				136	};
				137
				138	GGLfixed gglPowx(GGLfixed x, GGLfixed y)
				139	{
				140	// prerequisite: 0 <= x <= 1, and y >=0
				141
				142	// pow(x,y) = 2^(y*log2(x))
				143	// = 2^(ylog2(x(2^exp)*(2^-exp))))
				144	// = 2^(y*(log2(X)-exp))
				145	// = 2^(log2(X)y - yexp)
				146	// = 2^( - (-log2(X)y + yexp) )
				147
				148	int32_t exp = gglClz(x) - 16;
				149	GGLfixed f = x << exp;
				150	x = (f & 0x0FFF)<<4;
				151	f = (f >> 12) & 0x7;
				152	GGLfixed p = gglMulAddx(
				153	ggl_log_approx_tab[f+1] - ggl_log_approx_tab[f], x,
				154	ggl_log_approx_tab[f]);
				155	p = gglMulAddx(p, y, y*exp);
				156	exp = gglFixedToIntFloor(p);
				157	if (exp < 31) {
				158	p = gglFracx(p);
				159	x = (p & 0x1FFF)<<3;
				160	p >>= 13;
				161	p = gglMulAddx(
				162	ggl_alog_approx_tab[p+1] - ggl_alog_approx_tab[p], x,
				163	ggl_alog_approx_tab[p]);
				164	p >>= exp;
				165	} else {
				166	p = 0;
				167	}
				168	return p;
				169	// ( powf((a65536.0f), (b65536.0f)) ) * 65536.0f;
				170	}
				171
				172	// ------------------------------------------------------------------------
				173
				174	int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i)
				175	{
				176	//int32_t r =int32_t((int64_t(n)<<i)/d);
				177	const int32_t ds = n^d;
				178	if (n<0) n = -n;
				179	if (d<0) d = -d;
				180	int nd = gglClz(d) - gglClz(n);
				181	i += nd + 1;
				182	if (nd > 0) d <<= nd;
				183	else n <<= -nd;
				184	uint32_t q = 0;
				185
				186	int j = i & 7;
				187	i >>= 3;
				188
				189	// gcc deals with the code below pretty well.
				190	// we get 3.75 cycles per bit in the main loop
				191	// and 8 cycles per bit in the termination loop
				192	if (ggl_likely(i)) {
				193	n -= d;
				194	do {
				195	q <<= 8;
				196	if (n>=0) q \|= 128;
				197	else n += d;
				198	n = n*2 - d;
				199	if (n>=0) q \|= 64;
				200	else n += d;
				201	n = n*2 - d;
				202	if (n>=0) q \|= 32;
				203	else n += d;
				204	n = n*2 - d;
				205	if (n>=0) q \|= 16;
				206	else n += d;
				207	n = n*2 - d;
				208	if (n>=0) q \|= 8;
				209	else n += d;
				210	n = n*2 - d;
				211	if (n>=0) q \|= 4;
				212	else n += d;
				213	n = n*2 - d;
				214	if (n>=0) q \|= 2;
				215	else n += d;
				216	n = n*2 - d;
				217	if (n>=0) q \|= 1;
				218	else n += d;
				219
				220	if (--i == 0)
				221	goto finish;
				222
				223	n = n*2 - d;
				224	} while(true);
				225	do {
				226	q <<= 1;
				227	n = n*2 - d;
				228	if (n>=0) q \|= 1;
				229	else n += d;
				230	finish: ;
				231	} while (j--);
				232	return (ds<0) ? -q : q;
				233	}
				234
				235	n -= d;
				236	if (n>=0) q \|= 1;
				237	else n += d;
				238	j--;
				239	goto finish;
				240	}
				241
				242	// ------------------------------------------------------------------------
				243
				244	// assumes that the int32_t values of a, b, and c are all positive
				245	// use when both a and b are larger than c
				246
				247	template <typename T>
				248	static inline void swap(T& a, T& b) {
				249	T t(a);
				250	a = b;
				251	b = t;
				252	}
				253
				254	static __attribute__((noinline))
				255	int32_t slow_muldiv(uint32_t a, uint32_t b, uint32_t c)
				256	{
				257	// first we compute a*b as a 64-bit integer
				258	// (GCC generates umull with the code below)
				259	uint64_t ab = uint64_t(a)*b;
				260	uint32_t hi = ab>>32;
				261	uint32_t lo = ab;
				262	uint32_t result;
				263
				264	// now perform the division
				265	if (hi >= c) {
				266	overflow:
				267	result = 0x7fffffff; // basic overflow
				268	} else if (hi == 0) {
				269	result = lo/c; // note: c can't be 0
				270	if ((result >> 31) != 0) // result must fit in 31 bits
				271	goto overflow;
				272	} else {
				273	uint32_t r = hi;
				274	int bits = 31;
				275	result = 0;
				276	do {
				277	r = (r << 1) \| (lo >> 31);
				278	lo <<= 1;
				279	result <<= 1;
				280	if (r >= c) {
				281	r -= c;
				282	result \|= 1;
				283	}
				284	} while (bits--);
				285	}
				286	return int32_t(result);
				287	}
				288
				289	// assumes a >= 0 and c >= b >= 0
				290	static inline
				291	int32_t quick_muldiv(int32_t a, int32_t b, int32_t c)
				292	{
				293	int32_t r = 0, q = 0, i;
				294	int leading = gglClz(a);
				295	i = 32 - leading;
				296	a <<= leading;
				297	do {
				298	r <<= 1;
				299	if (a < 0)
				300	r += b;
				301	a <<= 1;
				302	q <<= 1;
				303	if (r >= c) {
				304	r -= c;
				305	q++;
				306	}
				307	asm(""::); // gcc generates better code this way
				308	if (r >= c) {
				309	r -= c;
				310	q++;
				311	}
				312	}
				313	while (--i);
				314	return q;
				315	}
				316
				317	// this function computes a*b/c with 64-bit intermediate accuracy
				318	// overflows (e.g. division by 0) are handled and return INT_MAX
				319
				320	int32_t gglMulDivi(int32_t a, int32_t b, int32_t c)
				321	{
				322	int32_t result;
				323	int32_t sign = a^b^c;
				324
				325	if (a < 0) a = -a;
				326	if (b < 0) b = -b;
				327	if (c < 0) c = -c;
				328
				329	if (a < b) {
				330	swap(a, b);
				331	}
				332
				333	if (b <= c) result = quick_muldiv(a, b, c);
				334	else result = slow_muldiv((uint32_t)a, (uint32_t)b, (uint32_t)c);
				335
				336	if (sign < 0)
				337	result = -result;
				338
				339	return result;
				340	}