Initial Contribution
diff --git a/libpixelflinger/fixed.cpp b/libpixelflinger/fixed.cpp
new file mode 100644
index 0000000..5b92062
--- /dev/null
+++ b/libpixelflinger/fixed.cpp
@@ -0,0 +1,339 @@
+/* libs/pixelflinger/fixed.cpp
+**
+** Copyright 2006, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+** http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+#include <stdio.h>
+
+#include <private/pixelflinger/ggl_context.h>
+#include <private/pixelflinger/ggl_fixed.h>
+
+
+// ------------------------------------------------------------------------
+
+int32_t gglRecipQNormalized(int32_t x, int* exponent)
+{
+ const int32_t s = x>>31;
+ uint32_t a = s ? -x : x;
+
+ // the result will overflow, so just set it to the biggest/inf value
+ if (ggl_unlikely(a <= 2LU)) {
+ *exponent = 0;
+ return s ? FIXED_MIN : FIXED_MAX;
+ }
+
+ // Newton-Raphson iteration:
+ // x = r*(2 - a*r)
+
+ const int32_t lz = gglClz(a);
+ a <<= lz; // 0.32
+ uint32_t r = a;
+ // note: if a == 0x80000000, this means x was a power-of-2, in this
+ // case we don't need to compute anything. We get the reciprocal for
+ // (almost) free.
+ if (a != 0x80000000) {
+ r = (0x2E800 << (30-16)) - (r>>(2-1)); // 2.30, r = 2.90625 - 2*a
+ // 0.32 + 2.30 = 2.62 -> 2.30
+ // 2.30 + 2.30 = 4.60 -> 2.30
+ r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30;
+ r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30;
+ }
+
+ // shift right 1-bit to make room for the sign bit
+ *exponent = 30-lz-1;
+ r >>= 1;
+ return s ? -r : r;
+}
+
+int32_t gglRecipQ(GGLfixed x, int q)
+{
+ int shift;
+ x = gglRecipQNormalized(x, &shift);
+ shift += 16-q;
+ x += 1L << (shift-1); // rounding
+ x >>= shift;
+ return x;
+}
+
+// ------------------------------------------------------------------------
+
+GGLfixed gglFastDivx(GGLfixed n, GGLfixed d)
+{
+ if ((d>>24) && ((d>>24)+1)) {
+ n >>= 8;
+ d >>= 8;
+ }
+ return gglMulx(n, gglRecip(d));
+}
+
+// ------------------------------------------------------------------------
+
+static const GGLfixed ggl_sqrt_reciproc_approx_tab[8] = {
+ // 1/sqrt(x) with x = 1-N/16, N=[8...1]
+ 0x16A09, 0x15555, 0x143D1, 0x134BF, 0x1279A, 0x11C01, 0x111AC, 0x10865
+};
+
+GGLfixed gglSqrtRecipx(GGLfixed x)
+{
+ if (x == 0) return FIXED_MAX;
+ if (x == FIXED_ONE) return x;
+ const GGLfixed a = x;
+ const int32_t lz = gglClz(x);
+ x = ggl_sqrt_reciproc_approx_tab[(a>>(28-lz))&0x7];
+ const int32_t exp = lz - 16;
+ if (exp <= 0) x >>= -exp>>1;
+ else x <<= (exp>>1) + (exp & 1);
+ if (exp & 1) {
+ x = gglMulx(x, ggl_sqrt_reciproc_approx_tab[0])>>1;
+ }
+ // 2 Newton-Raphson iterations: x = x/2*(3-(a*x)*x)
+ x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
+ x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
+ return x;
+}
+
+GGLfixed gglSqrtx(GGLfixed a)
+{
+ // Compute a full precision square-root (24 bits accuracy)
+ GGLfixed r = 0;
+ GGLfixed bit = 0x800000;
+ int32_t bshift = 15;
+ do {
+ GGLfixed temp = bit + (r<<1);
+ if (bshift >= 8) temp <<= (bshift-8);
+ else temp >>= (8-bshift);
+ if (a >= temp) {
+ r += bit;
+ a -= temp;
+ }
+ bshift--;
+ } while (bit>>=1);
+ return r;
+}
+
+// ------------------------------------------------------------------------
+
+static const GGLfixed ggl_log_approx_tab[] = {
+ // -ln(x)/ln(2) with x = N/16, N=[8...16]
+ 0xFFFF, 0xd47f, 0xad96, 0x8a62, 0x6a3f, 0x4caf, 0x3151, 0x17d6, 0x0000
+};
+
+static const GGLfixed ggl_alog_approx_tab[] = { // domain [0 - 1.0]
+ 0xffff, 0xeac0, 0xd744, 0xc567, 0xb504, 0xa5fe, 0x9837, 0x8b95, 0x8000
+};
+
+GGLfixed gglPowx(GGLfixed x, GGLfixed y)
+{
+ // prerequisite: 0 <= x <= 1, and y >=0
+
+ // pow(x,y) = 2^(y*log2(x))
+ // = 2^(y*log2(x*(2^exp)*(2^-exp))))
+ // = 2^(y*(log2(X)-exp))
+ // = 2^(log2(X)*y - y*exp)
+ // = 2^( - (-log2(X)*y + y*exp) )
+
+ int32_t exp = gglClz(x) - 16;
+ GGLfixed f = x << exp;
+ x = (f & 0x0FFF)<<4;
+ f = (f >> 12) & 0x7;
+ GGLfixed p = gglMulAddx(
+ ggl_log_approx_tab[f+1] - ggl_log_approx_tab[f], x,
+ ggl_log_approx_tab[f]);
+ p = gglMulAddx(p, y, y*exp);
+ exp = gglFixedToIntFloor(p);
+ if (exp < 31) {
+ p = gglFracx(p);
+ x = (p & 0x1FFF)<<3;
+ p >>= 13;
+ p = gglMulAddx(
+ ggl_alog_approx_tab[p+1] - ggl_alog_approx_tab[p], x,
+ ggl_alog_approx_tab[p]);
+ p >>= exp;
+ } else {
+ p = 0;
+ }
+ return p;
+ // ( powf((a*65536.0f), (b*65536.0f)) ) * 65536.0f;
+}
+
+// ------------------------------------------------------------------------
+
+int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i)
+{
+ //int32_t r =int32_t((int64_t(n)<<i)/d);
+ const int32_t ds = n^d;
+ if (n<0) n = -n;
+ if (d<0) d = -d;
+ int nd = gglClz(d) - gglClz(n);
+ i += nd + 1;
+ if (nd > 0) d <<= nd;
+ else n <<= -nd;
+ uint32_t q = 0;
+
+ int j = i & 7;
+ i >>= 3;
+
+ // gcc deals with the code below pretty well.
+ // we get 3.75 cycles per bit in the main loop
+ // and 8 cycles per bit in the termination loop
+ if (ggl_likely(i)) {
+ n -= d;
+ do {
+ q <<= 8;
+ if (n>=0) q |= 128;
+ else n += d;
+ n = n*2 - d;
+ if (n>=0) q |= 64;
+ else n += d;
+ n = n*2 - d;
+ if (n>=0) q |= 32;
+ else n += d;
+ n = n*2 - d;
+ if (n>=0) q |= 16;
+ else n += d;
+ n = n*2 - d;
+ if (n>=0) q |= 8;
+ else n += d;
+ n = n*2 - d;
+ if (n>=0) q |= 4;
+ else n += d;
+ n = n*2 - d;
+ if (n>=0) q |= 2;
+ else n += d;
+ n = n*2 - d;
+ if (n>=0) q |= 1;
+ else n += d;
+
+ if (--i == 0)
+ goto finish;
+
+ n = n*2 - d;
+ } while(true);
+ do {
+ q <<= 1;
+ n = n*2 - d;
+ if (n>=0) q |= 1;
+ else n += d;
+ finish: ;
+ } while (j--);
+ return (ds<0) ? -q : q;
+ }
+
+ n -= d;
+ if (n>=0) q |= 1;
+ else n += d;
+ j--;
+ goto finish;
+}
+
+// ------------------------------------------------------------------------
+
+// assumes that the int32_t values of a, b, and c are all positive
+// use when both a and b are larger than c
+
+template <typename T>
+static inline void swap(T& a, T& b) {
+ T t(a);
+ a = b;
+ b = t;
+}
+
+static __attribute__((noinline))
+int32_t slow_muldiv(uint32_t a, uint32_t b, uint32_t c)
+{
+ // first we compute a*b as a 64-bit integer
+ // (GCC generates umull with the code below)
+ uint64_t ab = uint64_t(a)*b;
+ uint32_t hi = ab>>32;
+ uint32_t lo = ab;
+ uint32_t result;
+
+ // now perform the division
+ if (hi >= c) {
+ overflow:
+ result = 0x7fffffff; // basic overflow
+ } else if (hi == 0) {
+ result = lo/c; // note: c can't be 0
+ if ((result >> 31) != 0) // result must fit in 31 bits
+ goto overflow;
+ } else {
+ uint32_t r = hi;
+ int bits = 31;
+ result = 0;
+ do {
+ r = (r << 1) | (lo >> 31);
+ lo <<= 1;
+ result <<= 1;
+ if (r >= c) {
+ r -= c;
+ result |= 1;
+ }
+ } while (bits--);
+ }
+ return int32_t(result);
+}
+
+// assumes a >= 0 and c >= b >= 0
+static inline
+int32_t quick_muldiv(int32_t a, int32_t b, int32_t c)
+{
+ int32_t r = 0, q = 0, i;
+ int leading = gglClz(a);
+ i = 32 - leading;
+ a <<= leading;
+ do {
+ r <<= 1;
+ if (a < 0)
+ r += b;
+ a <<= 1;
+ q <<= 1;
+ if (r >= c) {
+ r -= c;
+ q++;
+ }
+ asm(""::); // gcc generates better code this way
+ if (r >= c) {
+ r -= c;
+ q++;
+ }
+ }
+ while (--i);
+ return q;
+}
+
+// this function computes a*b/c with 64-bit intermediate accuracy
+// overflows (e.g. division by 0) are handled and return INT_MAX
+
+int32_t gglMulDivi(int32_t a, int32_t b, int32_t c)
+{
+ int32_t result;
+ int32_t sign = a^b^c;
+
+ if (a < 0) a = -a;
+ if (b < 0) b = -b;
+ if (c < 0) c = -c;
+
+ if (a < b) {
+ swap(a, b);
+ }
+
+ if (b <= c) result = quick_muldiv(a, b, c);
+ else result = slow_muldiv((uint32_t)a, (uint32_t)b, (uint32_t)c);
+
+ if (sign < 0)
+ result = -result;
+
+ return result;
+}