Blame - libpixelflinger/fixed.cpp - android_system_core

blob: 5b920628bad1e6d5a9c6a871e6e81df906d0b8a6 [file] [log] [blame]

The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	1	/* libs/pixelflinger/fixed.cpp
				2	**
				3	** Copyright 2006, The Android Open Source Project
				4	**
				5	** Licensed under the Apache License, Version 2.0 (the "License");
				6	** you may not use this file except in compliance with the License.
				7	** You may obtain a copy of the License at
				8	**
				9	** http://www.apache.org/licenses/LICENSE-2.0
				10	**
				11	** Unless required by applicable law or agreed to in writing, software
				12	** distributed under the License is distributed on an "AS IS" BASIS,
				13	** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	** See the License for the specific language governing permissions and
				15	** limitations under the License.
				16	*/
				17
				18	#include <stdio.h>
				19
				20	#include <private/pixelflinger/ggl_context.h>
				21	#include <private/pixelflinger/ggl_fixed.h>
				22
				23
				24	// ------------------------------------------------------------------------
				25
				26	int32_t gglRecipQNormalized(int32_t x, int* exponent)
				27	{
				28	const int32_t s = x>>31;
				29	uint32_t a = s ? -x : x;
				30
				31	// the result will overflow, so just set it to the biggest/inf value
				32	if (ggl_unlikely(a <= 2LU)) {
				33	*exponent = 0;
				34	return s ? FIXED_MIN : FIXED_MAX;
				35	}
				36
				37	// Newton-Raphson iteration:
				38	// x = r(2 - ar)
				39
				40	const int32_t lz = gglClz(a);
				41	a <<= lz; // 0.32
				42	uint32_t r = a;
				43	// note: if a == 0x80000000, this means x was a power-of-2, in this
				44	// case we don't need to compute anything. We get the reciprocal for
				45	// (almost) free.
				46	if (a != 0x80000000) {
				47	r = (0x2E800 << (30-16)) - (r>>(2-1)); // 2.30, r = 2.90625 - 2*a
				48	// 0.32 + 2.30 = 2.62 -> 2.30
				49	// 2.30 + 2.30 = 4.60 -> 2.30
				50	r = (((2LU<<30) - uint32_t((uint64_t(a)r) >> 32)) uint64_t(r)) >> 30;
				51	r = (((2LU<<30) - uint32_t((uint64_t(a)r) >> 32)) uint64_t(r)) >> 30;
				52	}
				53
				54	// shift right 1-bit to make room for the sign bit
				55	*exponent = 30-lz-1;
				56	r >>= 1;
				57	return s ? -r : r;
				58	}
				59
				60	int32_t gglRecipQ(GGLfixed x, int q)
				61	{
				62	int shift;
				63	x = gglRecipQNormalized(x, &shift);
				64	shift += 16-q;
				65	x += 1L << (shift-1); // rounding
				66	x >>= shift;
				67	return x;
				68	}
				69
				70	// ------------------------------------------------------------------------
				71
				72	GGLfixed gglFastDivx(GGLfixed n, GGLfixed d)
				73	{
				74	if ((d>>24) && ((d>>24)+1)) {
				75	n >>= 8;
				76	d >>= 8;
				77	}
				78	return gglMulx(n, gglRecip(d));
				79	}
				80
				81	// ------------------------------------------------------------------------
				82
				83	static const GGLfixed ggl_sqrt_reciproc_approx_tab[8] = {
				84	// 1/sqrt(x) with x = 1-N/16, N=[8...1]
				85	0x16A09, 0x15555, 0x143D1, 0x134BF, 0x1279A, 0x11C01, 0x111AC, 0x10865
				86	};
				87
				88	GGLfixed gglSqrtRecipx(GGLfixed x)
				89	{
				90	if (x == 0) return FIXED_MAX;
				91	if (x == FIXED_ONE) return x;
				92	const GGLfixed a = x;
				93	const int32_t lz = gglClz(x);
				94	x = ggl_sqrt_reciproc_approx_tab[(a>>(28-lz))&0x7];
				95	const int32_t exp = lz - 16;
				96	if (exp <= 0) x >>= -exp>>1;
				97	else x <<= (exp>>1) + (exp & 1);
				98	if (exp & 1) {
				99	x = gglMulx(x, ggl_sqrt_reciproc_approx_tab[0])>>1;
				100	}
				101	// 2 Newton-Raphson iterations: x = x/2(3-(ax)*x)
				102	x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
				103	x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
				104	return x;
				105	}
				106
				107	GGLfixed gglSqrtx(GGLfixed a)
				108	{
				109	// Compute a full precision square-root (24 bits accuracy)
				110	GGLfixed r = 0;
				111	GGLfixed bit = 0x800000;
				112	int32_t bshift = 15;
				113	do {
				114	GGLfixed temp = bit + (r<<1);
				115	if (bshift >= 8) temp <<= (bshift-8);
				116	else temp >>= (8-bshift);
				117	if (a >= temp) {
				118	r += bit;
				119	a -= temp;
				120	}
				121	bshift--;
				122	} while (bit>>=1);
				123	return r;
				124	}
				125
				126	// ------------------------------------------------------------------------
				127
				128	static const GGLfixed ggl_log_approx_tab[] = {
				129	// -ln(x)/ln(2) with x = N/16, N=[8...16]
				130	0xFFFF, 0xd47f, 0xad96, 0x8a62, 0x6a3f, 0x4caf, 0x3151, 0x17d6, 0x0000
				131	};
				132
				133	static const GGLfixed ggl_alog_approx_tab[] = { // domain [0 - 1.0]
				134	0xffff, 0xeac0, 0xd744, 0xc567, 0xb504, 0xa5fe, 0x9837, 0x8b95, 0x8000
				135	};
				136
				137	GGLfixed gglPowx(GGLfixed x, GGLfixed y)
				138	{
				139	// prerequisite: 0 <= x <= 1, and y >=0
				140
				141	// pow(x,y) = 2^(y*log2(x))
				142	// = 2^(ylog2(x(2^exp)*(2^-exp))))
				143	// = 2^(y*(log2(X)-exp))
				144	// = 2^(log2(X)y - yexp)
				145	// = 2^( - (-log2(X)y + yexp) )
				146
				147	int32_t exp = gglClz(x) - 16;
				148	GGLfixed f = x << exp;
				149	x = (f & 0x0FFF)<<4;
				150	f = (f >> 12) & 0x7;
				151	GGLfixed p = gglMulAddx(
				152	ggl_log_approx_tab[f+1] - ggl_log_approx_tab[f], x,
				153	ggl_log_approx_tab[f]);
				154	p = gglMulAddx(p, y, y*exp);
				155	exp = gglFixedToIntFloor(p);
				156	if (exp < 31) {
				157	p = gglFracx(p);
				158	x = (p & 0x1FFF)<<3;
				159	p >>= 13;
				160	p = gglMulAddx(
				161	ggl_alog_approx_tab[p+1] - ggl_alog_approx_tab[p], x,
				162	ggl_alog_approx_tab[p]);
				163	p >>= exp;
				164	} else {
				165	p = 0;
				166	}
				167	return p;
				168	// ( powf((a65536.0f), (b65536.0f)) ) * 65536.0f;
				169	}
				170
				171	// ------------------------------------------------------------------------
				172
				173	int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i)
				174	{
				175	//int32_t r =int32_t((int64_t(n)<<i)/d);
				176	const int32_t ds = n^d;
				177	if (n<0) n = -n;
				178	if (d<0) d = -d;
				179	int nd = gglClz(d) - gglClz(n);
				180	i += nd + 1;
				181	if (nd > 0) d <<= nd;
				182	else n <<= -nd;
				183	uint32_t q = 0;
				184
				185	int j = i & 7;
				186	i >>= 3;
				187
				188	// gcc deals with the code below pretty well.
				189	// we get 3.75 cycles per bit in the main loop
				190	// and 8 cycles per bit in the termination loop
				191	if (ggl_likely(i)) {
				192	n -= d;
				193	do {
				194	q <<= 8;
				195	if (n>=0) q \|= 128;
				196	else n += d;
				197	n = n*2 - d;
				198	if (n>=0) q \|= 64;
				199	else n += d;
				200	n = n*2 - d;
				201	if (n>=0) q \|= 32;
				202	else n += d;
				203	n = n*2 - d;
				204	if (n>=0) q \|= 16;
				205	else n += d;
				206	n = n*2 - d;
				207	if (n>=0) q \|= 8;
				208	else n += d;
				209	n = n*2 - d;
				210	if (n>=0) q \|= 4;
				211	else n += d;
				212	n = n*2 - d;
				213	if (n>=0) q \|= 2;
				214	else n += d;
				215	n = n*2 - d;
				216	if (n>=0) q \|= 1;
				217	else n += d;
				218
				219	if (--i == 0)
				220	goto finish;
				221
				222	n = n*2 - d;
				223	} while(true);
				224	do {
				225	q <<= 1;
				226	n = n*2 - d;
				227	if (n>=0) q \|= 1;
				228	else n += d;
				229	finish: ;
				230	} while (j--);
				231	return (ds<0) ? -q : q;
				232	}
				233
				234	n -= d;
				235	if (n>=0) q \|= 1;
				236	else n += d;
				237	j--;
				238	goto finish;
				239	}
				240
				241	// ------------------------------------------------------------------------
				242
				243	// assumes that the int32_t values of a, b, and c are all positive
				244	// use when both a and b are larger than c
				245
				246	template <typename T>
				247	static inline void swap(T& a, T& b) {
				248	T t(a);
				249	a = b;
				250	b = t;
				251	}
				252
				253	static __attribute__((noinline))
				254	int32_t slow_muldiv(uint32_t a, uint32_t b, uint32_t c)
				255	{
				256	// first we compute a*b as a 64-bit integer
				257	// (GCC generates umull with the code below)
				258	uint64_t ab = uint64_t(a)*b;
				259	uint32_t hi = ab>>32;
				260	uint32_t lo = ab;
				261	uint32_t result;
				262
				263	// now perform the division
				264	if (hi >= c) {
				265	overflow:
				266	result = 0x7fffffff; // basic overflow
				267	} else if (hi == 0) {
				268	result = lo/c; // note: c can't be 0
				269	if ((result >> 31) != 0) // result must fit in 31 bits
				270	goto overflow;
				271	} else {
				272	uint32_t r = hi;
				273	int bits = 31;
				274	result = 0;
				275	do {
				276	r = (r << 1) \| (lo >> 31);
				277	lo <<= 1;
				278	result <<= 1;
				279	if (r >= c) {
				280	r -= c;
				281	result \|= 1;
				282	}
				283	} while (bits--);
				284	}
				285	return int32_t(result);
				286	}
				287
				288	// assumes a >= 0 and c >= b >= 0
				289	static inline
				290	int32_t quick_muldiv(int32_t a, int32_t b, int32_t c)
				291	{
				292	int32_t r = 0, q = 0, i;
				293	int leading = gglClz(a);
				294	i = 32 - leading;
				295	a <<= leading;
				296	do {
				297	r <<= 1;
				298	if (a < 0)
				299	r += b;
				300	a <<= 1;
				301	q <<= 1;
				302	if (r >= c) {
				303	r -= c;
				304	q++;
				305	}
				306	asm(""::); // gcc generates better code this way
				307	if (r >= c) {
				308	r -= c;
				309	q++;
				310	}
				311	}
				312	while (--i);
				313	return q;
				314	}
				315
				316	// this function computes a*b/c with 64-bit intermediate accuracy
				317	// overflows (e.g. division by 0) are handled and return INT_MAX
				318
				319	int32_t gglMulDivi(int32_t a, int32_t b, int32_t c)
				320	{
				321	int32_t result;
				322	int32_t sign = a^b^c;
				323
				324	if (a < 0) a = -a;
				325	if (b < 0) b = -b;
				326	if (c < 0) c = -c;
				327
				328	if (a < b) {
				329	swap(a, b);
				330	}
				331
				332	if (b <= c) result = quick_muldiv(a, b, c);
				333	else result = slow_muldiv((uint32_t)a, (uint32_t)b, (uint32_t)c);
				334
				335	if (sign < 0)
				336	result = -result;
				337
				338	return result;
				339	}