pixelflinger: make self-contained

pixelflinger should not be used for new projects and should be moved out
of system/core at some point.  As the first step, move all its headers
under system/core/libpixelflinger/include and update its Android.mk
files so they're not referring to the absolute system/core path anymore.

Change-Id: Idead273ab2c0450409d770f5402c4dba916192a9
Signed-off-by: Greg Hackmann <ghackmann@google.com>
diff --git a/libpixelflinger/Android.mk b/libpixelflinger/Android.mk
index aa614bc..697db25 100644
--- a/libpixelflinger/Android.mk
+++ b/libpixelflinger/Android.mk
@@ -62,6 +62,8 @@
 LOCAL_SRC_FILES_arm64 := $(PIXELFLINGER_SRC_FILES_arm64)
 LOCAL_SRC_FILES_mips := $(PIXELFLINGER_SRC_FILES_mips)
 LOCAL_CFLAGS := $(PIXELFLINGER_CFLAGS)
+LOCAL_EXPORT_C_INCLUDE_DIRS := $(LOCAL_PATH)/include
+LOCAL_C_INCLUDES += $(LOCAL_EXPORT_C_INCLUDE_DIRS)
 LOCAL_SHARED_LIBRARIES := libcutils liblog
 
 # Really this should go away entirely or at least not depend on
diff --git a/libpixelflinger/include/pixelflinger/format.h b/libpixelflinger/include/pixelflinger/format.h
new file mode 100644
index 0000000..82eeca4
--- /dev/null
+++ b/libpixelflinger/include/pixelflinger/format.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2005 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_PIXELFLINGER_FORMAT_H
+#define ANDROID_PIXELFLINGER_FORMAT_H
+
+#include <stdint.h>
+#include <sys/types.h>
+
+enum GGLPixelFormat {
+    // these constants need to match those
+    // in graphics/PixelFormat.java, ui/PixelFormat.h, BlitHardware.h
+    GGL_PIXEL_FORMAT_UNKNOWN    =   0,
+    GGL_PIXEL_FORMAT_NONE       =   0,
+
+    GGL_PIXEL_FORMAT_RGBA_8888   =   1,  // 4x8-bit ARGB
+    GGL_PIXEL_FORMAT_RGBX_8888   =   2,  // 3x8-bit RGB stored in 32-bit chunks
+    GGL_PIXEL_FORMAT_RGB_888     =   3,  // 3x8-bit RGB
+    GGL_PIXEL_FORMAT_RGB_565     =   4,  // 16-bit RGB
+    GGL_PIXEL_FORMAT_BGRA_8888   =   5,  // 4x8-bit BGRA
+    GGL_PIXEL_FORMAT_RGBA_5551   =   6,  // 16-bit RGBA
+    GGL_PIXEL_FORMAT_RGBA_4444   =   7,  // 16-bit RGBA
+
+    GGL_PIXEL_FORMAT_A_8         =   8,  // 8-bit A
+    GGL_PIXEL_FORMAT_L_8         =   9,  // 8-bit L (R=G=B = L)
+    GGL_PIXEL_FORMAT_LA_88       = 0xA,  // 16-bit LA
+    GGL_PIXEL_FORMAT_RGB_332     = 0xB,  // 8-bit RGB (non paletted)
+
+    // reserved range. don't use.
+    GGL_PIXEL_FORMAT_RESERVED_10 = 0x10,
+    GGL_PIXEL_FORMAT_RESERVED_11 = 0x11,
+    GGL_PIXEL_FORMAT_RESERVED_12 = 0x12,
+    GGL_PIXEL_FORMAT_RESERVED_13 = 0x13,
+    GGL_PIXEL_FORMAT_RESERVED_14 = 0x14,
+    GGL_PIXEL_FORMAT_RESERVED_15 = 0x15,
+    GGL_PIXEL_FORMAT_RESERVED_16 = 0x16,
+    GGL_PIXEL_FORMAT_RESERVED_17 = 0x17,
+
+    // reserved/special formats
+    GGL_PIXEL_FORMAT_Z_16       =  0x18,
+    GGL_PIXEL_FORMAT_S_8        =  0x19,
+    GGL_PIXEL_FORMAT_SZ_24      =  0x1A,
+    GGL_PIXEL_FORMAT_SZ_8       =  0x1B,
+
+    // reserved range. don't use.
+    GGL_PIXEL_FORMAT_RESERVED_20 = 0x20,
+    GGL_PIXEL_FORMAT_RESERVED_21 = 0x21,
+};
+
+enum GGLFormatComponents {
+	GGL_STENCIL_INDEX		= 0x1901,
+	GGL_DEPTH_COMPONENT		= 0x1902,
+	GGL_ALPHA				= 0x1906,
+	GGL_RGB					= 0x1907,
+	GGL_RGBA				= 0x1908,
+	GGL_LUMINANCE			= 0x1909,
+	GGL_LUMINANCE_ALPHA		= 0x190A,
+};
+
+enum GGLFormatComponentIndex {
+    GGL_INDEX_ALPHA   = 0,
+    GGL_INDEX_RED     = 1,
+    GGL_INDEX_GREEN   = 2,
+    GGL_INDEX_BLUE    = 3,
+    GGL_INDEX_STENCIL = 0,
+    GGL_INDEX_DEPTH   = 1,
+    GGL_INDEX_Y       = 0,
+    GGL_INDEX_CB      = 1,
+    GGL_INDEX_CR      = 2,
+};
+
+typedef struct {
+#ifdef __cplusplus
+    enum {
+        ALPHA   = GGL_INDEX_ALPHA,
+        RED     = GGL_INDEX_RED,
+        GREEN   = GGL_INDEX_GREEN,
+        BLUE    = GGL_INDEX_BLUE,
+        STENCIL = GGL_INDEX_STENCIL,
+        DEPTH   = GGL_INDEX_DEPTH,
+        LUMA    = GGL_INDEX_Y,
+        CHROMAB = GGL_INDEX_CB,
+        CHROMAR = GGL_INDEX_CR,
+    };
+    inline uint32_t mask(int i) const {
+            return ((1<<(c[i].h-c[i].l))-1)<<c[i].l;
+    }
+    inline uint32_t bits(int i) const {
+            return c[i].h - c[i].l;
+    }
+#endif
+	uint8_t     size;	// bytes per pixel
+    uint8_t     bitsPerPixel;
+    union {    
+        struct {
+            uint8_t     ah;		// alpha high bit position + 1
+            uint8_t     al;		// alpha low bit position
+            uint8_t     rh;		// red high bit position + 1
+            uint8_t     rl;		// red low bit position
+            uint8_t     gh;		// green high bit position + 1
+            uint8_t     gl;		// green low bit position
+            uint8_t     bh;		// blue high bit position + 1
+            uint8_t     bl;		// blue low bit position
+        };
+        struct {
+            uint8_t h;
+            uint8_t l;
+        } __attribute__((__packed__)) c[4];        
+    } __attribute__((__packed__));
+	uint16_t    components;	// GGLFormatComponents
+} GGLFormat;
+
+
+#ifdef __cplusplus
+extern "C" const GGLFormat* gglGetPixelFormatTable(size_t* numEntries = 0);
+#else
+const GGLFormat* gglGetPixelFormatTable(size_t* numEntries);
+#endif
+
+
+// ----------------------------------------------------------------------------
+
+#endif // ANDROID_PIXELFLINGER_FORMAT_H
diff --git a/libpixelflinger/include/pixelflinger/pixelflinger.h b/libpixelflinger/include/pixelflinger/pixelflinger.h
new file mode 100644
index 0000000..8a2b442
--- /dev/null
+++ b/libpixelflinger/include/pixelflinger/pixelflinger.h
@@ -0,0 +1,330 @@
+/*
+ * Copyright (C) 2007 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_PIXELFLINGER_H
+#define ANDROID_PIXELFLINGER_H
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <pixelflinger/format.h>
+
+// GGL types
+
+typedef int8_t			GGLbyte;		// b
+typedef int16_t			GGLshort;		// s
+typedef int32_t			GGLint;			// i
+typedef ssize_t			GGLsizei;		// i
+typedef int32_t			GGLfixed;		// x
+typedef int32_t			GGLclampx;		// x
+typedef float			GGLfloat;		// f
+typedef float			GGLclampf;		// f
+typedef double			GGLdouble;		// d
+typedef double			GGLclampd;		// d
+typedef uint8_t			GGLubyte;		// ub
+typedef uint8_t			GGLboolean;		// ub
+typedef uint16_t		GGLushort;		// us
+typedef uint32_t		GGLuint;		// ui
+typedef unsigned int	GGLenum;		// ui
+typedef unsigned int	GGLbitfield;	// ui
+typedef void			GGLvoid;
+typedef int32_t         GGLfixed32;
+typedef	int32_t         GGLcolor;
+typedef int32_t         GGLcoord;
+
+// ----------------------------------------------------------------------------
+
+#define GGL_MAX_VIEWPORT_DIMS           4096
+#define GGL_MAX_TEXTURE_SIZE            4096
+#define GGL_MAX_ALIASED_POINT_SIZE      0x7FFFFFF
+#define GGL_MAX_SMOOTH_POINT_SIZE       2048
+#define GGL_MAX_SMOOTH_LINE_WIDTH       2048
+
+// ----------------------------------------------------------------------------
+
+// All these names are compatible with their OpenGL equivalents
+// some of them are listed only for completeness
+enum GGLNames {
+	GGL_FALSE						= 0,
+	GGL_TRUE						= 1,
+
+	// enable/disable
+    GGL_SCISSOR_TEST                = 0x0C11,
+	GGL_TEXTURE_2D					= 0x0DE1,
+	GGL_ALPHA_TEST					= 0x0BC0,
+	GGL_BLEND						= 0x0BE2,
+	GGL_COLOR_LOGIC_OP				= 0x0BF2,
+	GGL_DITHER						= 0x0BD0,
+	GGL_STENCIL_TEST				= 0x0B90,
+	GGL_DEPTH_TEST					= 0x0B71,
+    GGL_AA                          = 0x80000001,
+    GGL_W_LERP                      = 0x80000004,
+    GGL_POINT_SMOOTH_NICE           = 0x80000005,
+
+    // buffers, pixel drawing/reading
+    GGL_COLOR                       = 0x1800,
+    
+    // fog
+    GGL_FOG                         = 0x0B60,
+    
+	// shade model
+	GGL_FLAT						= 0x1D00,
+	GGL_SMOOTH						= 0x1D01,
+
+	// Texture parameter name
+	GGL_TEXTURE_MIN_FILTER			= 0x2801,
+	GGL_TEXTURE_MAG_FILTER			= 0x2800,
+	GGL_TEXTURE_WRAP_S				= 0x2802,
+	GGL_TEXTURE_WRAP_T				= 0x2803,
+	GGL_TEXTURE_WRAP_R				= 0x2804,
+
+	// Texture Filter	
+	GGL_NEAREST						= 0x2600,
+	GGL_LINEAR						= 0x2601,
+	GGL_NEAREST_MIPMAP_NEAREST		= 0x2700,
+	GGL_LINEAR_MIPMAP_NEAREST		= 0x2701,
+	GGL_NEAREST_MIPMAP_LINEAR		= 0x2702,
+	GGL_LINEAR_MIPMAP_LINEAR		= 0x2703,
+
+	// Texture Wrap Mode
+	GGL_CLAMP						= 0x2900,
+	GGL_REPEAT						= 0x2901,
+    GGL_CLAMP_TO_EDGE               = 0x812F,
+
+	// Texture Env Mode
+	GGL_REPLACE						= 0x1E01,
+	GGL_MODULATE					= 0x2100,
+	GGL_DECAL						= 0x2101,
+	GGL_ADD							= 0x0104,
+
+	// Texture Env Parameter
+	GGL_TEXTURE_ENV_MODE			= 0x2200,
+	GGL_TEXTURE_ENV_COLOR			= 0x2201,
+
+	// Texture Env Target
+	GGL_TEXTURE_ENV					= 0x2300,
+
+    // Texture coord generation
+    GGL_TEXTURE_GEN_MODE            = 0x2500,
+    GGL_S                           = 0x2000,
+    GGL_T                           = 0x2001,
+    GGL_R                           = 0x2002,
+    GGL_Q                           = 0x2003,
+    GGL_ONE_TO_ONE                  = 0x80000002,
+    GGL_AUTOMATIC                   = 0x80000003,
+
+    // AlphaFunction
+    GGL_NEVER                       = 0x0200,
+    GGL_LESS                        = 0x0201,
+    GGL_EQUAL                       = 0x0202,
+    GGL_LEQUAL                      = 0x0203,
+    GGL_GREATER                     = 0x0204,
+    GGL_NOTEQUAL                    = 0x0205,
+    GGL_GEQUAL                      = 0x0206,
+    GGL_ALWAYS                      = 0x0207,
+
+    // LogicOp
+    GGL_CLEAR                       = 0x1500,   // 0
+    GGL_AND                         = 0x1501,   // s & d
+    GGL_AND_REVERSE                 = 0x1502,   // s & ~d
+    GGL_COPY                        = 0x1503,   // s
+    GGL_AND_INVERTED                = 0x1504,   // ~s & d
+    GGL_NOOP                        = 0x1505,   // d
+    GGL_XOR                         = 0x1506,   // s ^ d
+    GGL_OR                          = 0x1507,   // s | d
+    GGL_NOR                         = 0x1508,   // ~(s | d)
+    GGL_EQUIV                       = 0x1509,   // ~(s ^ d)
+    GGL_INVERT                      = 0x150A,   // ~d
+    GGL_OR_REVERSE                  = 0x150B,   // s | ~d
+    GGL_COPY_INVERTED               = 0x150C,   // ~s 
+    GGL_OR_INVERTED                 = 0x150D,   // ~s | d
+    GGL_NAND                        = 0x150E,   // ~(s & d)
+    GGL_SET                         = 0x150F,   // 1
+
+	// blending equation & function
+	GGL_ZERO                        = 0,		// SD
+	GGL_ONE                         = 1,		// SD
+	GGL_SRC_COLOR                   = 0x0300,	//  D
+	GGL_ONE_MINUS_SRC_COLOR         = 0x0301,	//	D
+	GGL_SRC_ALPHA                   = 0x0302,	// SD
+	GGL_ONE_MINUS_SRC_ALPHA			= 0x0303,	// SD
+	GGL_DST_ALPHA					= 0x0304,	// SD
+	GGL_ONE_MINUS_DST_ALPHA			= 0x0305,	// SD
+	GGL_DST_COLOR					= 0x0306,	// S
+	GGL_ONE_MINUS_DST_COLOR			= 0x0307,	// S
+	GGL_SRC_ALPHA_SATURATE			= 0x0308,	// S
+    
+    // clear bits
+    GGL_DEPTH_BUFFER_BIT            = 0x00000100,
+    GGL_STENCIL_BUFFER_BIT          = 0x00000400,
+    GGL_COLOR_BUFFER_BIT            = 0x00004000,
+
+    // errors
+    GGL_NO_ERROR                    = 0,
+    GGL_INVALID_ENUM                = 0x0500,
+    GGL_INVALID_VALUE               = 0x0501,
+    GGL_INVALID_OPERATION           = 0x0502,
+    GGL_STACK_OVERFLOW              = 0x0503,
+    GGL_STACK_UNDERFLOW             = 0x0504,
+    GGL_OUT_OF_MEMORY               = 0x0505
+};
+
+// ----------------------------------------------------------------------------
+
+typedef struct {
+    GGLsizei    version;    // always set to sizeof(GGLSurface)
+    GGLuint     width;      // width in pixels
+    GGLuint     height;     // height in pixels
+    GGLint      stride;     // stride in pixels
+    GGLubyte*   data;       // pointer to the bits
+    GGLubyte    format;     // pixel format
+    GGLubyte    rfu[3];     // must be zero
+    // these values are dependent on the used format
+    union {
+        GGLint  compressedFormat;
+        GGLint  vstride;
+    };
+    void*       reserved;
+} GGLSurface;
+
+
+typedef struct {
+    // immediate rendering
+    void (*pointx)(void *con, const GGLcoord* v, GGLcoord r);
+    void (*linex)(void *con, 
+            const GGLcoord* v0, const GGLcoord* v1, GGLcoord width);
+    void (*recti)(void* c, GGLint l, GGLint t, GGLint r, GGLint b); 
+    void (*trianglex)(void* c,
+            GGLcoord const* v0, GGLcoord const* v1, GGLcoord const* v2);
+
+    // scissor
+    void (*scissor)(void* c, GGLint x, GGLint y, GGLsizei width, GGLsizei height);
+
+    // Set the textures and color buffers
+    void (*activeTexture)(void* c, GGLuint tmu);
+    void (*bindTexture)(void* c, const GGLSurface* surface);
+    void (*colorBuffer)(void* c, const GGLSurface* surface);
+    void (*readBuffer)(void* c, const GGLSurface* surface);
+    void (*depthBuffer)(void* c, const GGLSurface* surface);
+    void (*bindTextureLod)(void* c, GGLuint tmu, const GGLSurface* surface);
+
+    // enable/disable features
+    void (*enable)(void* c, GGLenum name);
+    void (*disable)(void* c, GGLenum name);
+    void (*enableDisable)(void* c, GGLenum name, GGLboolean en);
+
+    // specify the fragment's color
+    void (*shadeModel)(void* c, GGLenum mode);
+    void (*color4xv)(void* c, const GGLclampx* color);
+    // specify color iterators (16.16)
+    void (*colorGrad12xv)(void* c, const GGLcolor* grad);
+
+    // specify Z coordinate iterators (0.32)
+    void (*zGrad3xv)(void* c, const GGLfixed32* grad);
+
+    // specify W coordinate iterators (16.16)
+    void (*wGrad3xv)(void* c, const GGLfixed* grad);
+
+    // specify fog iterator & color (16.16)
+    void (*fogGrad3xv)(void* c, const GGLfixed* grad);
+    void (*fogColor3xv)(void* c, const GGLclampx* color);
+
+    // specify blending parameters
+    void (*blendFunc)(void* c, GGLenum src, GGLenum dst);
+    void (*blendFuncSeparate)(void* c,  GGLenum src, GGLenum dst,
+                                        GGLenum srcAlpha, GGLenum dstAplha);
+
+    // texture environnement (REPLACE / MODULATE / DECAL / BLEND)
+    void (*texEnvi)(void* c,    GGLenum target,
+                                GGLenum pname,
+                                GGLint param);
+
+    void (*texEnvxv)(void* c, GGLenum target,
+            GGLenum pname, const GGLfixed* params);
+
+    // texture parameters (Wrapping, filter)
+    void (*texParameteri)(void* c,  GGLenum target,
+                                    GGLenum pname,
+                                    GGLint param);
+
+    // texture iterators (16.16)
+    void (*texCoord2i)(void* c, GGLint s, GGLint t);
+    void (*texCoord2x)(void* c, GGLfixed s, GGLfixed t);
+    
+    // s, dsdx, dsdy, scale, t, dtdx, dtdy, tscale
+    // This api uses block floating-point for S and T texture coordinates.
+    // All values are given in 16.16, scaled by 'scale'. In other words,
+    // set scale to 0, for 16.16 values.
+    void (*texCoordGradScale8xv)(void* c, GGLint tmu, const int32_t* grad8);
+    
+    void (*texGeni)(void* c, GGLenum coord, GGLenum pname, GGLint param);
+
+    // masking
+    void (*colorMask)(void* c,  GGLboolean red,
+                                GGLboolean green,
+                                GGLboolean blue,
+                                GGLboolean alpha);
+
+    void (*depthMask)(void* c, GGLboolean flag);
+
+    void (*stencilMask)(void* c, GGLuint mask);
+
+    // alpha func
+    void (*alphaFuncx)(void* c, GGLenum func, GGLclampx ref);
+
+    // depth func
+    void (*depthFunc)(void* c, GGLenum func);
+
+    // logic op
+    void (*logicOp)(void* c, GGLenum opcode); 
+
+    // clear
+    void (*clear)(void* c, GGLbitfield mask);
+    void (*clearColorx)(void* c,
+            GGLclampx r, GGLclampx g, GGLclampx b, GGLclampx a);
+    void (*clearDepthx)(void* c, GGLclampx depth);
+    void (*clearStencil)(void* c, GGLint s);
+
+    // framebuffer operations
+    void (*copyPixels)(void* c, GGLint x, GGLint y,
+            GGLsizei width, GGLsizei height, GGLenum type);
+    void (*rasterPos2x)(void* c, GGLfixed x, GGLfixed y);
+    void (*rasterPos2i)(void* c, GGLint x, GGLint y);
+} GGLContext;
+
+// ----------------------------------------------------------------------------
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// construct / destroy the context
+ssize_t gglInit(GGLContext** context);
+ssize_t gglUninit(GGLContext* context);
+
+GGLint gglBitBlit(
+        GGLContext* c,
+        int tmu,
+        GGLint crop[4],
+        GGLint where[4]);
+
+#ifdef __cplusplus
+};
+#endif
+
+// ----------------------------------------------------------------------------
+
+#endif // ANDROID_PIXELFLINGER_H
diff --git a/libpixelflinger/include/private/pixelflinger/ggl_context.h b/libpixelflinger/include/private/pixelflinger/ggl_context.h
new file mode 100644
index 0000000..d43655c
--- /dev/null
+++ b/libpixelflinger/include/private/pixelflinger/ggl_context.h
@@ -0,0 +1,565 @@
+/*
+ * Copyright (C) 2006 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_GGL_CONTEXT_H
+#define ANDROID_GGL_CONTEXT_H
+
+#include <stdint.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/types.h>
+#include <endian.h>
+
+#include <pixelflinger/pixelflinger.h>
+#include <private/pixelflinger/ggl_fixed.h>
+
+namespace android {
+
+// ----------------------------------------------------------------------------
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+
+inline uint32_t GGL_RGBA_TO_HOST(uint32_t v) {
+    return v;
+}
+inline uint32_t GGL_HOST_TO_RGBA(uint32_t v) {
+    return v;
+}
+
+#else
+
+inline uint32_t GGL_RGBA_TO_HOST(uint32_t v) {
+#if defined(__mips__) && __mips==32 && __mips_isa_rev>=2
+    uint32_t r;
+    __asm__("wsbh %0, %1;"
+        "rotr %0, %0, 16"
+        : "=r" (r)
+        : "r" (v)
+        );
+    return r;
+#else
+    return (v<<24) | (v>>24) | ((v<<8)&0xff0000) | ((v>>8)&0xff00);
+#endif
+}
+inline uint32_t GGL_HOST_TO_RGBA(uint32_t v) {
+#if defined(__mips__) && __mips==32 && __mips_isa_rev>=2
+    uint32_t r;
+    __asm__("wsbh %0, %1;"
+        "rotr %0, %0, 16"
+        : "=r" (r)
+        : "r" (v)
+        );
+    return r;
+#else
+    return (v<<24) | (v>>24) | ((v<<8)&0xff0000) | ((v>>8)&0xff00);
+#endif
+}
+
+#endif
+
+// ----------------------------------------------------------------------------
+
+const int GGL_DITHER_BITS = 6;  // dither weights stored on 6 bits
+const int GGL_DITHER_ORDER_SHIFT= 3;
+const int GGL_DITHER_ORDER      = (1<<GGL_DITHER_ORDER_SHIFT);
+const int GGL_DITHER_SIZE       = GGL_DITHER_ORDER * GGL_DITHER_ORDER;
+const int GGL_DITHER_MASK       = GGL_DITHER_ORDER-1;
+
+// ----------------------------------------------------------------------------
+
+const int GGL_SUBPIXEL_BITS = 4;
+
+// TRI_FRACTION_BITS defines the number of bits we want to use
+// for the sub-pixel coordinates during the edge stepping, the
+// value shouldn't be more than 7, or bad things are going to
+// happen when drawing large triangles (8 doesn't work because
+// 32 bit muls will loose the sign bit)
+
+#define  TRI_FRACTION_BITS  (GGL_SUBPIXEL_BITS)
+#define  TRI_ONE            (1 << TRI_FRACTION_BITS)
+#define  TRI_HALF           (1 << (TRI_FRACTION_BITS-1))
+#define  TRI_FROM_INT(x)    ((x) << TRI_FRACTION_BITS)
+#define  TRI_FRAC(x)        ((x)                 &  (TRI_ONE-1))
+#define  TRI_FLOOR(x)       ((x)                 & ~(TRI_ONE-1))
+#define  TRI_CEIL(x)        (((x) + (TRI_ONE-1)) & ~(TRI_ONE-1))
+#define  TRI_ROUND(x)       (((x) +  TRI_HALF  ) & ~(TRI_ONE-1))
+
+#define  TRI_ROUDNING       (1 << (16 - TRI_FRACTION_BITS - 1))
+#define  TRI_FROM_FIXED(x)  (((x)+TRI_ROUDNING) >> (16-TRI_FRACTION_BITS))
+
+#define  TRI_SNAP_NEXT_HALF(x)   (TRI_CEIL((x)+TRI_HALF) - TRI_HALF)
+#define  TRI_SNAP_PREV_HALF(x)   (TRI_CEIL((x)-TRI_HALF) - TRI_HALF)
+
+// ----------------------------------------------------------------------------
+
+const int GGL_COLOR_BITS = 24;
+
+// To maintain 8-bits color chanels, with a maximum GGLSurface
+// size of 4096 and GGL_SUBPIXEL_BITS=4, we need 8 + 12 + 4 = 24 bits
+// for encoding the color iterators
+
+inline GGLcolor gglFixedToIteratedColor(GGLfixed c) {
+    return (c << 8) - c;
+}
+
+// ----------------------------------------------------------------------------
+
+template<bool> struct CTA;
+template<> struct CTA<true> { };
+
+#define GGL_CONTEXT(con, c)         context_t *con = static_cast<context_t *>(c)
+#define GGL_OFFSETOF(field)         uintptr_t(&(((context_t*)0)->field))
+#define GGL_INIT_PROC(p, f)         p.f = ggl_ ## f;
+#define GGL_BETWEEN(x, L, H)        (uint32_t((x)-(L)) <= ((H)-(L)))
+
+#define ggl_likely(x)	__builtin_expect(!!(x), 1)
+#define ggl_unlikely(x)	__builtin_expect(!!(x), 0)
+
+const int GGL_TEXTURE_UNIT_COUNT    = 2;
+const int GGL_TMU_STATE             = 0x00000001;
+const int GGL_CB_STATE              = 0x00000002;
+const int GGL_PIXEL_PIPELINE_STATE  = 0x00000004;
+
+// ----------------------------------------------------------------------------
+
+#define GGL_RESERVE_NEEDS(name, l, s)                               \
+    const uint32_t  GGL_NEEDS_##name##_MASK = (((1LU<<(s))-1)<<l);  \
+    const uint32_t  GGL_NEEDS_##name##_SHIFT = (l);
+
+#define GGL_BUILD_NEEDS(val, name)                                  \
+    (((val)<<(GGL_NEEDS_##name##_SHIFT)) & GGL_NEEDS_##name##_MASK)
+
+#define GGL_READ_NEEDS(name, n)                                     \
+    (uint32_t(n & GGL_NEEDS_##name##_MASK) >> GGL_NEEDS_##name##_SHIFT)
+
+#define GGL_NEED_MASK(name)     (uint32_t(GGL_NEEDS_##name##_MASK))
+#define GGL_NEED(name, val)     GGL_BUILD_NEEDS(val, name)
+
+GGL_RESERVE_NEEDS( CB_FORMAT,       0, 6 )
+GGL_RESERVE_NEEDS( SHADE,           6, 1 )
+GGL_RESERVE_NEEDS( W,               7, 1 )
+GGL_RESERVE_NEEDS( BLEND_SRC,       8, 4 )
+GGL_RESERVE_NEEDS( BLEND_DST,      12, 4 )
+GGL_RESERVE_NEEDS( BLEND_SRCA,     16, 4 )
+GGL_RESERVE_NEEDS( BLEND_DSTA,     20, 4 )
+GGL_RESERVE_NEEDS( LOGIC_OP,       24, 4 )
+GGL_RESERVE_NEEDS( MASK_ARGB,      28, 4 )
+
+GGL_RESERVE_NEEDS( P_ALPHA_TEST,    0, 3 )
+GGL_RESERVE_NEEDS( P_AA,            3, 1 )
+GGL_RESERVE_NEEDS( P_DEPTH_TEST,    4, 3 )
+GGL_RESERVE_NEEDS( P_MASK_Z,        7, 1 )
+GGL_RESERVE_NEEDS( P_DITHER,        8, 1 )
+GGL_RESERVE_NEEDS( P_FOG,           9, 1 )
+GGL_RESERVE_NEEDS( P_RESERVED1,    10,22 )
+
+GGL_RESERVE_NEEDS( T_FORMAT,        0, 6 )
+GGL_RESERVE_NEEDS( T_RESERVED0,     6, 1 )
+GGL_RESERVE_NEEDS( T_POT,           7, 1 )
+GGL_RESERVE_NEEDS( T_S_WRAP,        8, 2 )
+GGL_RESERVE_NEEDS( T_T_WRAP,       10, 2 )
+GGL_RESERVE_NEEDS( T_ENV,          12, 3 )
+GGL_RESERVE_NEEDS( T_LINEAR,       15, 1 )
+
+const int GGL_NEEDS_WRAP_CLAMP_TO_EDGE  = 0;
+const int GGL_NEEDS_WRAP_REPEAT         = 1;
+const int GGL_NEEDS_WRAP_11             = 2;
+
+inline uint32_t ggl_wrap_to_needs(uint32_t e) {
+    switch (e) {
+    case GGL_CLAMP:         return GGL_NEEDS_WRAP_CLAMP_TO_EDGE;
+    case GGL_REPEAT:        return GGL_NEEDS_WRAP_REPEAT;
+    }
+    return 0;
+}
+
+inline uint32_t ggl_blendfactor_to_needs(uint32_t b) {
+    if (b <= 1) return b;
+    return (b & 0xF)+2;
+}
+
+inline uint32_t ggl_needs_to_blendfactor(uint32_t n) {
+    if (n <= 1) return n;
+    return (n - 2) + 0x300;
+}
+
+inline uint32_t ggl_env_to_needs(uint32_t e) {
+    switch (e) {
+    case GGL_REPLACE:   return 0;
+    case GGL_MODULATE:  return 1;
+    case GGL_DECAL:     return 2;
+    case GGL_BLEND:     return 3;
+    case GGL_ADD:       return 4;
+    }
+    return 0;
+}
+
+inline uint32_t ggl_needs_to_env(uint32_t n) {
+    const uint32_t envs[] = { GGL_REPLACE, GGL_MODULATE, 
+            GGL_DECAL, GGL_BLEND, GGL_ADD };
+    return envs[n];
+
+}
+
+// ----------------------------------------------------------------------------
+
+enum {
+    GGL_ENABLE_BLENDING     = 0x00000001,
+    GGL_ENABLE_SMOOTH       = 0x00000002,
+    GGL_ENABLE_AA           = 0x00000004,
+    GGL_ENABLE_LOGIC_OP     = 0x00000008,
+    GGL_ENABLE_ALPHA_TEST   = 0x00000010,
+    GGL_ENABLE_SCISSOR_TEST = 0x00000020,
+    GGL_ENABLE_TMUS         = 0x00000040,
+    GGL_ENABLE_DEPTH_TEST   = 0x00000080,
+    GGL_ENABLE_STENCIL_TEST = 0x00000100,
+    GGL_ENABLE_W            = 0x00000200,
+    GGL_ENABLE_DITHER       = 0x00000400,
+    GGL_ENABLE_FOG          = 0x00000800,
+    GGL_ENABLE_POINT_AA_NICE= 0x00001000
+};
+
+// ----------------------------------------------------------------------------
+
+class needs_filter_t;
+struct needs_t {
+    inline int match(const needs_filter_t& filter);
+    inline bool operator == (const needs_t& rhs) const {
+        return  (n==rhs.n) &&
+                (p==rhs.p) &&
+                (t[0]==rhs.t[0]) &&
+                (t[1]==rhs.t[1]);
+    }
+    inline bool operator != (const needs_t& rhs) const {
+        return !operator == (rhs);
+    }
+    uint32_t    n;
+    uint32_t    p;
+    uint32_t    t[GGL_TEXTURE_UNIT_COUNT];
+};
+
+inline int compare_type(const needs_t& lhs, const needs_t& rhs) {
+    return memcmp(&lhs, &rhs, sizeof(needs_t));
+}
+
+struct needs_filter_t {
+    needs_t     value;
+    needs_t     mask;
+};
+
+int needs_t::match(const needs_filter_t& filter) {
+    uint32_t result = 
+        ((filter.value.n ^ n)       & filter.mask.n)    |
+        ((filter.value.p ^ p)       & filter.mask.p)    |
+        ((filter.value.t[0] ^ t[0]) & filter.mask.t[0]) |
+        ((filter.value.t[1] ^ t[1]) & filter.mask.t[1]);
+    return (result == 0);
+}
+
+// ----------------------------------------------------------------------------
+
+struct context_t;
+class Assembly;
+
+struct blend_state_t {
+	uint32_t			src;
+	uint32_t			dst;
+	uint32_t			src_alpha;
+	uint32_t			dst_alpha;
+	uint8_t				reserved;
+	uint8_t				alpha_separate;
+	uint8_t				operation;
+	uint8_t				equation;
+};
+
+struct mask_state_t {
+    uint8_t             color;
+    uint8_t             depth;
+    uint32_t            stencil;
+};
+
+struct clear_state_t {
+    GGLclampx           r;
+    GGLclampx           g;
+    GGLclampx           b;
+    GGLclampx           a;
+    GGLclampx           depth;
+    GGLint              stencil;
+    uint32_t            colorPacked;
+    uint32_t            depthPacked;
+    uint32_t            stencilPacked;
+    uint32_t            dirty;
+};
+
+struct fog_state_t {
+    uint8_t     color[4];
+};
+
+struct logic_op_state_t {
+    uint16_t            opcode;
+};
+
+struct alpha_test_state_t {
+    uint16_t            func;
+    GGLcolor            ref;
+};
+
+struct depth_test_state_t {
+    uint16_t            func;
+    GGLclampx           clearValue;
+};
+
+struct scissor_t {
+    uint32_t            user_left;
+    uint32_t            user_right;
+    uint32_t            user_top;
+    uint32_t            user_bottom;
+    uint32_t            left;
+    uint32_t            right;
+    uint32_t            top;
+    uint32_t            bottom;
+};
+
+struct pixel_t {
+    uint32_t    c[4];
+    uint8_t     s[4];
+};
+
+struct surface_t {
+    union {
+        GGLSurface          s;
+        // Keep the following struct field types in line with the corresponding
+        // GGLSurface fields to avoid mismatches leading to errors.
+        struct {
+            GGLsizei        reserved;
+            GGLuint         width;
+            GGLuint         height;
+            GGLint          stride;
+            GGLubyte*       data;
+            GGLubyte        format;
+            GGLubyte        dirty;
+            GGLubyte        pad[2];
+        };
+    };
+    void                (*read) (const surface_t* s, context_t* c,
+                                uint32_t x, uint32_t y, pixel_t* pixel);
+    void                (*write)(const surface_t* s, context_t* c,
+                                uint32_t x, uint32_t y, const pixel_t* pixel);
+};
+
+// ----------------------------------------------------------------------------
+
+struct texture_shade_t {
+    union {
+        struct {
+            int32_t             is0;
+            int32_t             idsdx;
+            int32_t             idsdy;
+            int                 sscale;
+            int32_t             it0;
+            int32_t             idtdx;
+            int32_t             idtdy;
+            int                 tscale;
+        };
+        struct {
+            int32_t             v;
+            int32_t             dx;
+            int32_t             dy;
+            int                 scale;
+        } st[2];
+    };
+};
+
+struct texture_iterators_t {
+    // these are not encoded in the same way than in the
+    // texture_shade_t structure
+    union {
+        struct {
+            GGLfixed			ydsdy;
+            GGLfixed            dsdx;
+            GGLfixed            dsdy;
+            int                 sscale;
+            GGLfixed			ydtdy;
+            GGLfixed            dtdx;
+            GGLfixed            dtdy;
+            int                 tscale;
+        };
+        struct {
+            GGLfixed			ydvdy;
+            GGLfixed            dvdx;
+            GGLfixed            dvdy;
+            int                 scale;
+        } st[2];
+    };
+};
+
+struct texture_t {
+	surface_t			surface;
+	texture_iterators_t	iterators;
+    texture_shade_t     shade;
+	uint32_t			s_coord;
+	uint32_t            t_coord;
+	uint16_t			s_wrap;
+	uint16_t            t_wrap;
+	uint16_t            min_filter;
+	uint16_t            mag_filter;
+    uint16_t            env;
+    uint8_t             env_color[4];
+	uint8_t				enable;
+	uint8_t				dirty;
+};
+
+struct raster_t {
+    GGLfixed            x;
+    GGLfixed            y;
+};
+
+struct framebuffer_t {
+    surface_t           color;
+    surface_t           read;
+	surface_t			depth;
+	surface_t			stencil;
+    int16_t             *coverage;
+    size_t              coverageBufferSize;
+};
+
+// ----------------------------------------------------------------------------
+
+struct iterators_t {
+	int32_t             xl;
+	int32_t             xr;
+    int32_t             y;
+	GGLcolor			ydady;
+	GGLcolor			ydrdy;
+	GGLcolor			ydgdy;
+	GGLcolor			ydbdy;
+	GGLfixed			ydzdy;
+	GGLfixed			ydwdy;
+	GGLfixed			ydfdy;
+};
+
+struct shade_t {
+	GGLcolor			a0;
+    GGLcolor            dadx;
+    GGLcolor            dady;
+	GGLcolor			r0;
+    GGLcolor            drdx;
+    GGLcolor            drdy;
+	GGLcolor			g0;
+    GGLcolor            dgdx;
+    GGLcolor            dgdy;
+	GGLcolor			b0;
+    GGLcolor            dbdx;
+    GGLcolor            dbdy;
+	uint32_t            z0;
+    GGLfixed32          dzdx;
+    GGLfixed32          dzdy;
+	GGLfixed            w0;
+    GGLfixed            dwdx;
+    GGLfixed            dwdy;
+	uint32_t			f0;
+    GGLfixed            dfdx;
+    GGLfixed            dfdy;
+};
+
+// these are used in the generated code
+// we use this mirror structure to improve
+// data locality in the pixel pipeline
+struct generated_tex_vars_t {
+    uint32_t    width;
+    uint32_t    height;
+    uint32_t    stride;
+    uintptr_t   data;
+    int32_t     dsdx;
+    int32_t     dtdx;
+    int32_t     spill[2];
+};
+
+struct generated_vars_t {
+    struct {
+        int32_t c;
+        int32_t dx;
+    } argb[4];
+    int32_t     aref;
+    int32_t     dzdx;
+    int32_t     zbase;
+    int32_t     f;
+    int32_t     dfdx;
+    int32_t     spill[3];
+    generated_tex_vars_t    texture[GGL_TEXTURE_UNIT_COUNT];
+    int32_t     rt;
+    int32_t     lb;
+};
+
+// ----------------------------------------------------------------------------
+
+struct state_t {
+	framebuffer_t		buffers;
+	texture_t			texture[GGL_TEXTURE_UNIT_COUNT];
+    scissor_t           scissor;
+    raster_t            raster;
+	blend_state_t		blend;
+    alpha_test_state_t  alpha_test;
+    depth_test_state_t  depth_test;
+    mask_state_t        mask;
+    clear_state_t       clear;
+    fog_state_t         fog;
+    logic_op_state_t    logic_op;
+    uint32_t            enables;
+    uint32_t            enabled_tmu;
+    needs_t             needs;
+};
+
+// ----------------------------------------------------------------------------
+
+struct context_t {
+	GGLContext          procs;
+	state_t             state;
+    shade_t             shade;
+	iterators_t         iterators;
+    generated_vars_t    generated_vars                __attribute__((aligned(32)));
+    uint8_t             ditherMatrix[GGL_DITHER_SIZE] __attribute__((aligned(32)));
+    uint32_t            packed;
+    uint32_t            packed8888;
+    const GGLFormat*    formats;
+    uint32_t            dirty;
+    texture_t*          activeTMU;
+    uint32_t            activeTMUIndex;
+
+    void                (*init_y)(context_t* c, int32_t y);
+	void                (*step_y)(context_t* c);
+	void                (*scanline)(context_t* c);
+    void                (*span)(context_t* c);
+    void                (*rect)(context_t* c, size_t yc);
+    
+    void*               base;
+    Assembly*           scanline_as;
+    GGLenum             error;
+};
+
+// ----------------------------------------------------------------------------
+
+void ggl_init_context(context_t* context);
+void ggl_uninit_context(context_t* context);
+void ggl_error(context_t* c, GGLenum error);
+int64_t ggl_system_time();
+
+// ----------------------------------------------------------------------------
+
+};
+
+#endif // ANDROID_GGL_CONTEXT_H
+
diff --git a/libpixelflinger/include/private/pixelflinger/ggl_fixed.h b/libpixelflinger/include/private/pixelflinger/ggl_fixed.h
new file mode 100644
index 0000000..787f620
--- /dev/null
+++ b/libpixelflinger/include/private/pixelflinger/ggl_fixed.h
@@ -0,0 +1,633 @@
+/*
+ * Copyright (C) 2005 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_GGL_FIXED_H
+#define ANDROID_GGL_FIXED_H
+
+#include <math.h>
+#include <pixelflinger/pixelflinger.h>
+
+// ----------------------------------------------------------------------------
+
+#define CONST           __attribute__((const))
+#define ALWAYS_INLINE   __attribute__((always_inline))
+
+const GGLfixed FIXED_BITS = 16;
+const GGLfixed FIXED_EPSILON  = 1;
+const GGLfixed FIXED_ONE  = 1L<<FIXED_BITS;
+const GGLfixed FIXED_HALF = 1L<<(FIXED_BITS-1);
+const GGLfixed FIXED_MIN  = 0x80000000L;
+const GGLfixed FIXED_MAX  = 0x7FFFFFFFL;
+
+inline GGLfixed gglIntToFixed(GGLfixed i)       ALWAYS_INLINE ;
+inline GGLfixed gglFixedToIntRound(GGLfixed f)  ALWAYS_INLINE ;
+inline GGLfixed gglFixedToIntFloor(GGLfixed f)  ALWAYS_INLINE ;
+inline GGLfixed gglFixedToIntCeil(GGLfixed f)   ALWAYS_INLINE ;
+inline GGLfixed gglFracx(GGLfixed v)            ALWAYS_INLINE ;
+inline GGLfixed gglFloorx(GGLfixed v)           ALWAYS_INLINE ;
+inline GGLfixed gglCeilx(GGLfixed v)            ALWAYS_INLINE ;
+inline GGLfixed gglCenterx(GGLfixed v)          ALWAYS_INLINE ;
+inline GGLfixed gglRoundx(GGLfixed v)           ALWAYS_INLINE ;
+
+GGLfixed gglIntToFixed(GGLfixed i) {
+    return i<<FIXED_BITS;
+}
+GGLfixed gglFixedToIntRound(GGLfixed f) {
+    return (f + FIXED_HALF)>>FIXED_BITS;
+}
+GGLfixed gglFixedToIntFloor(GGLfixed f) {
+    return f>>FIXED_BITS;
+}
+GGLfixed gglFixedToIntCeil(GGLfixed f) {
+    return (f + ((1<<FIXED_BITS) - 1))>>FIXED_BITS;
+}
+
+GGLfixed gglFracx(GGLfixed v) {
+    return v & ((1<<FIXED_BITS)-1);
+}
+GGLfixed gglFloorx(GGLfixed v) {
+    return gglFixedToIntFloor(v)<<FIXED_BITS;
+}
+GGLfixed gglCeilx(GGLfixed v) {
+    return gglFixedToIntCeil(v)<<FIXED_BITS;
+}
+GGLfixed gglCenterx(GGLfixed v) {
+    return gglFloorx(v + FIXED_HALF) | FIXED_HALF;
+}
+GGLfixed gglRoundx(GGLfixed v) {
+    return gglFixedToIntRound(v)<<FIXED_BITS;
+}
+
+// conversion from (unsigned) int, short, byte to fixed...
+#define GGL_B_TO_X(_x)      GGLfixed( ((int32_t(_x)+1)>>1)<<10 )
+#define GGL_S_TO_X(_x)      GGLfixed( ((int32_t(_x)+1)>>1)<<2 )
+#define GGL_I_TO_X(_x)      GGLfixed( ((int32_t(_x)>>1)+1)>>14 )
+#define GGL_UB_TO_X(_x)     GGLfixed(   uint32_t(_x) +      \
+                                        (uint32_t(_x)<<8) + \
+                                        (uint32_t(_x)>>7) )
+#define GGL_US_TO_X(_x)     GGLfixed( (_x) + ((_x)>>15) )
+#define GGL_UI_TO_X(_x)     GGLfixed( (((_x)>>1)+1)>>15 )
+
+// ----------------------------------------------------------------------------
+
+GGLfixed gglPowx(GGLfixed x, GGLfixed y) CONST;
+GGLfixed gglSqrtx(GGLfixed a) CONST;
+GGLfixed gglSqrtRecipx(GGLfixed x) CONST;
+GGLfixed gglFastDivx(GGLfixed n, GGLfixed d) CONST;
+int32_t gglMulDivi(int32_t a, int32_t b, int32_t c);
+
+int32_t gglRecipQNormalized(int32_t x, int* exponent);
+int32_t gglRecipQ(GGLfixed x, int q) CONST;
+
+inline GGLfixed gglRecip(GGLfixed x) CONST;
+inline GGLfixed gglRecip(GGLfixed x) {
+    return gglRecipQ(x, 16);
+}
+
+inline GGLfixed gglRecip28(GGLfixed x) CONST;
+int32_t gglRecip28(GGLfixed x) {
+    return gglRecipQ(x, 28);
+}
+
+// ----------------------------------------------------------------------------
+
+#if defined(__arm__) && !defined(__thumb__)
+
+// inline ARM implementations
+inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) CONST;
+inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) {
+    GGLfixed result, t;
+    if (__builtin_constant_p(shift)) {
+    asm("smull  %[lo], %[hi], %[x], %[y]            \n"
+        "movs   %[lo], %[lo], lsr %[rshift]         \n"
+        "adc    %[lo], %[lo], %[hi], lsl %[lshift]  \n"
+        : [lo]"=r"(result), [hi]"=r"(t), [x]"=r"(x)
+        : "%[x]"(x), [y]"r"(y), [lshift] "I"(32-shift), [rshift] "I"(shift)
+        : "cc"
+        );
+    } else {
+    asm("smull  %[lo], %[hi], %[x], %[y]            \n"
+        "movs   %[lo], %[lo], lsr %[rshift]         \n"
+        "adc    %[lo], %[lo], %[hi], lsl %[lshift]  \n"
+        : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
+        : "%[x]"(x), [y]"r"(y), [lshift] "r"(32-shift), [rshift] "r"(shift)
+        : "cc"
+        );
+    }
+    return result;
+}
+
+inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
+inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) {
+    GGLfixed result, t;
+    if (__builtin_constant_p(shift)) {
+    asm("smull  %[lo], %[hi], %[x], %[y]            \n"
+        "add    %[lo], %[a],  %[lo], lsr %[rshift]  \n"
+        "add    %[lo], %[lo], %[hi], lsl %[lshift]  \n"
+        : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
+        : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "I"(32-shift), [rshift] "I"(shift)
+        );
+    } else {
+    asm("smull  %[lo], %[hi], %[x], %[y]            \n"
+        "add    %[lo], %[a],  %[lo], lsr %[rshift]  \n"
+        "add    %[lo], %[lo], %[hi], lsl %[lshift]  \n"
+        : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
+        : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "r"(32-shift), [rshift] "r"(shift)
+        );
+    }
+    return result;
+}
+
+inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
+inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) {
+    GGLfixed result, t;
+    if (__builtin_constant_p(shift)) {
+    asm("smull  %[lo], %[hi], %[x], %[y]            \n"
+        "rsb    %[lo], %[a],  %[lo], lsr %[rshift]  \n"
+        "add    %[lo], %[lo], %[hi], lsl %[lshift]  \n"
+        : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
+        : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "I"(32-shift), [rshift] "I"(shift)
+        );
+    } else {
+    asm("smull  %[lo], %[hi], %[x], %[y]            \n"
+        "rsb    %[lo], %[a],  %[lo], lsr %[rshift]  \n"
+        "add    %[lo], %[lo], %[hi], lsl %[lshift]  \n"
+        : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
+        : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "r"(32-shift), [rshift] "r"(shift)
+        );
+    }
+    return result;
+}
+
+inline int64_t gglMulii(int32_t x, int32_t y) CONST;
+inline int64_t gglMulii(int32_t x, int32_t y)
+{
+    // 64-bits result: r0=low, r1=high
+    union {
+        struct {
+            int32_t lo;
+            int32_t hi;
+        } s;
+        int64_t res;
+    };
+    asm("smull %0, %1, %2, %3   \n"
+        : "=r"(s.lo), "=&r"(s.hi)
+        : "%r"(x), "r"(y)
+        :
+        );
+    return res;
+}
+#elif defined(__mips__) && __mips_isa_rev < 6
+
+/*inline MIPS implementations*/
+inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST;
+inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) {
+    GGLfixed result,tmp,tmp1,tmp2;
+
+    if (__builtin_constant_p(shift)) {
+        if (shift == 0) {
+            asm ("mult %[a], %[b] \t\n"
+              "mflo  %[res]   \t\n"
+            : [res]"=&r"(result),[tmp]"=&r"(tmp)
+            : [a]"r"(a),[b]"r"(b)
+            : "%hi","%lo"
+            );
+        } else if (shift == 32)
+        {
+            asm ("mult %[a], %[b] \t\n"
+            "li  %[tmp],1\t\n"
+            "sll  %[tmp],%[tmp],0x1f\t\n"
+            "mflo %[res]   \t\n"
+            "addu %[tmp1],%[tmp],%[res] \t\n"
+            "sltu %[tmp1],%[tmp1],%[tmp]\t\n"   /*obit*/
+            "sra %[tmp],%[tmp],0x1f \t\n"
+            "mfhi  %[res]   \t\n"
+            "addu %[res],%[res],%[tmp]\t\n"
+            "addu %[res],%[res],%[tmp1]\t\n"
+            : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1)
+            : [a]"r"(a),[b]"r"(b),[shift]"I"(shift)
+            : "%hi","%lo"
+            );
+        } else if ((shift >0) && (shift < 32))
+        {
+            asm ("mult %[a], %[b] \t\n"
+            "li  %[tmp],1 \t\n"
+            "sll  %[tmp],%[tmp],%[shiftm1] \t\n"
+            "mflo  %[res]   \t\n"
+            "addu %[tmp1],%[tmp],%[res] \t\n"
+            "sltu %[tmp1],%[tmp1],%[tmp] \t\n"  /*obit?*/
+            "addu  %[res],%[res],%[tmp] \t\n"
+            "mfhi  %[tmp]   \t\n"
+            "addu  %[tmp],%[tmp],%[tmp1] \t\n"
+            "sll   %[tmp],%[tmp],%[lshift] \t\n"
+            "srl   %[res],%[res],%[rshift]    \t\n"
+            "or    %[res],%[res],%[tmp] \t\n"
+            : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+            : [a]"r"(a),[b]"r"(b),[lshift]"I"(32-shift),[rshift]"I"(shift),[shiftm1]"I"(shift-1)
+            : "%hi","%lo"
+            );
+        } else {
+            asm ("mult %[a], %[b] \t\n"
+            "li  %[tmp],1 \t\n"
+            "sll  %[tmp],%[tmp],%[shiftm1] \t\n"
+            "mflo  %[res]   \t\n"
+            "addu %[tmp1],%[tmp],%[res] \t\n"
+            "sltu %[tmp1],%[tmp1],%[tmp] \t\n"  /*obit?*/
+            "sra  %[tmp2],%[tmp],0x1f \t\n"
+            "addu  %[res],%[res],%[tmp] \t\n"
+            "mfhi  %[tmp]   \t\n"
+            "addu  %[tmp],%[tmp],%[tmp2] \t\n"
+            "addu  %[tmp],%[tmp],%[tmp1] \t\n"            /*tmp=hi*/
+            "srl   %[tmp2],%[res],%[rshift]    \t\n"
+            "srav  %[res], %[tmp],%[rshift]\t\n"
+            "sll   %[tmp],%[tmp],1 \t\n"
+            "sll   %[tmp],%[tmp],%[norbits] \t\n"
+            "or    %[tmp],%[tmp],%[tmp2] \t\n"
+            "movz  %[res],%[tmp],%[bit5] \t\n"
+            : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+            : [a]"r"(a),[b]"r"(b),[norbits]"I"(~(shift)),[rshift]"I"(shift),[shiftm1] "I"(shift-1),[bit5]"I"(shift & 0x20)
+            : "%hi","%lo"
+            );
+        }
+    } else {
+        asm ("mult %[a], %[b] \t\n"
+        "li  %[tmp],1 \t\n"
+        "sll  %[tmp],%[tmp],%[shiftm1] \t\n"
+        "mflo  %[res]   \t\n"
+        "addu %[tmp1],%[tmp],%[res] \t\n"
+        "sltu %[tmp1],%[tmp1],%[tmp] \t\n"  /*obit?*/
+        "sra  %[tmp2],%[tmp],0x1f \t\n"
+        "addu  %[res],%[res],%[tmp] \t\n"
+        "mfhi  %[tmp]   \t\n"
+        "addu  %[tmp],%[tmp],%[tmp2] \t\n"
+        "addu  %[tmp],%[tmp],%[tmp1] \t\n"            /*tmp=hi*/
+        "srl   %[tmp2],%[res],%[rshift]    \t\n"
+        "srav  %[res], %[tmp],%[rshift]\t\n"
+        "sll   %[tmp],%[tmp],1 \t\n"
+        "sll   %[tmp],%[tmp],%[norbits] \t\n"
+        "or    %[tmp],%[tmp],%[tmp2] \t\n"
+        "movz  %[res],%[tmp],%[bit5] \t\n"
+         : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+         : [a]"r"(a),[b]"r"(b),[norbits]"r"(~(shift)),[rshift] "r"(shift),[shiftm1]"r"(shift-1),[bit5] "r"(shift & 0x20)
+         : "%hi","%lo"
+         );
+        }
+
+        return result;
+}
+
+inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
+inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
+    GGLfixed result,t,tmp1,tmp2;
+
+    if (__builtin_constant_p(shift)) {
+        if (shift == 0) {
+                 asm ("mult %[a], %[b] \t\n"
+                 "mflo  %[lo]   \t\n"
+                 "addu  %[lo],%[lo],%[c]    \t\n"
+                 : [lo]"=&r"(result)
+                 : [a]"r"(a),[b]"r"(b),[c]"r"(c)
+                 : "%hi","%lo"
+                 );
+                } else if (shift == 32) {
+                    asm ("mult %[a], %[b] \t\n"
+                    "mfhi  %[lo]   \t\n"
+                    "addu  %[lo],%[lo],%[c]    \t\n"
+                    : [lo]"=&r"(result)
+                    : [a]"r"(a),[b]"r"(b),[c]"r"(c)
+                    : "%hi","%lo"
+                    );
+                } else if ((shift>0) && (shift<32)) {
+                    asm ("mult %[a], %[b] \t\n"
+                    "mflo  %[res]   \t\n"
+                    "mfhi  %[t]   \t\n"
+                    "srl   %[res],%[res],%[rshift]    \t\n"
+                    "sll   %[t],%[t],%[lshift]     \t\n"
+                    "or  %[res],%[res],%[t]    \t\n"
+                    "addu  %[res],%[res],%[c]    \t\n"
+                    : [res]"=&r"(result),[t]"=&r"(t)
+                    : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
+                    : "%hi","%lo"
+                    );
+                } else {
+                    asm ("mult %[a], %[b] \t\n"
+                    "nor %[tmp1],$zero,%[shift]\t\n"
+                    "mflo  %[res]   \t\n"
+                    "mfhi  %[t]   \t\n"
+                    "srl   %[res],%[res],%[shift]    \t\n"
+                    "sll   %[tmp2],%[t],1     \t\n"
+                    "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
+                    "or  %[tmp1],%[tmp2],%[res]    \t\n"
+                    "srav  %[res],%[t],%[shift]     \t\n"
+                    "andi %[tmp2],%[shift],0x20\t\n"
+                    "movz %[res],%[tmp1],%[tmp2]\t\n"
+                    "addu  %[res],%[res],%[c]    \t\n"
+                    : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+                    : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
+                    : "%hi","%lo"
+                    );
+                }
+            } else {
+                asm ("mult %[a], %[b] \t\n"
+                "nor %[tmp1],$zero,%[shift]\t\n"
+                "mflo  %[res]   \t\n"
+                "mfhi  %[t]   \t\n"
+                "srl   %[res],%[res],%[shift]    \t\n"
+                "sll   %[tmp2],%[t],1     \t\n"
+                "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
+                "or  %[tmp1],%[tmp2],%[res]    \t\n"
+                "srav  %[res],%[t],%[shift]     \t\n"
+                "andi %[tmp2],%[shift],0x20\t\n"
+                "movz %[res],%[tmp1],%[tmp2]\t\n"
+                "addu  %[res],%[res],%[c]    \t\n"
+                : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+                : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
+                : "%hi","%lo"
+                );
+            }
+            return result;
+}
+
+inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
+inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
+    GGLfixed result,t,tmp1,tmp2;
+
+    if (__builtin_constant_p(shift)) {
+        if (shift == 0) {
+                 asm ("mult %[a], %[b] \t\n"
+                 "mflo  %[lo]   \t\n"
+                 "subu  %[lo],%[lo],%[c]    \t\n"
+                 : [lo]"=&r"(result)
+                 : [a]"r"(a),[b]"r"(b),[c]"r"(c)
+                 : "%hi","%lo"
+                 );
+                } else if (shift == 32) {
+                    asm ("mult %[a], %[b] \t\n"
+                    "mfhi  %[lo]   \t\n"
+                    "subu  %[lo],%[lo],%[c]    \t\n"
+                    : [lo]"=&r"(result)
+                    : [a]"r"(a),[b]"r"(b),[c]"r"(c)
+                    : "%hi","%lo"
+                    );
+                } else if ((shift>0) && (shift<32)) {
+                    asm ("mult %[a], %[b] \t\n"
+                    "mflo  %[res]   \t\n"
+                    "mfhi  %[t]   \t\n"
+                    "srl   %[res],%[res],%[rshift]    \t\n"
+                    "sll   %[t],%[t],%[lshift]     \t\n"
+                    "or  %[res],%[res],%[t]    \t\n"
+                    "subu  %[res],%[res],%[c]    \t\n"
+                    : [res]"=&r"(result),[t]"=&r"(t)
+                    : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
+                    : "%hi","%lo"
+                    );
+                } else {
+                    asm ("mult %[a], %[b] \t\n"
+                    "nor %[tmp1],$zero,%[shift]\t\n"
+                     "mflo  %[res]   \t\n"
+                     "mfhi  %[t]   \t\n"
+                     "srl   %[res],%[res],%[shift]    \t\n"
+                     "sll   %[tmp2],%[t],1     \t\n"
+                     "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
+                     "or  %[tmp1],%[tmp2],%[res]    \t\n"
+                     "srav  %[res],%[t],%[shift]     \t\n"
+                     "andi %[tmp2],%[shift],0x20\t\n"
+                     "movz %[res],%[tmp1],%[tmp2]\t\n"
+                     "subu  %[res],%[res],%[c]    \t\n"
+                     : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+                     : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
+                     : "%hi","%lo"
+                     );
+                    }
+                } else {
+                asm ("mult %[a], %[b] \t\n"
+                "nor %[tmp1],$zero,%[shift]\t\n"
+                "mflo  %[res]   \t\n"
+                "mfhi  %[t]   \t\n"
+                "srl   %[res],%[res],%[shift]    \t\n"
+                "sll   %[tmp2],%[t],1     \t\n"
+                "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
+                "or  %[tmp1],%[tmp2],%[res]    \t\n"
+                "srav  %[res],%[t],%[shift]     \t\n"
+                "andi %[tmp2],%[shift],0x20\t\n"
+                "movz %[res],%[tmp1],%[tmp2]\t\n"
+                "subu  %[res],%[res],%[c]    \t\n"
+                : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+                : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
+                : "%hi","%lo"
+                );
+            }
+    return result;
+}
+
+inline int64_t gglMulii(int32_t x, int32_t y) CONST;
+inline int64_t gglMulii(int32_t x, int32_t y) {
+    union {
+        struct {
+#if defined(__MIPSEL__)
+            int32_t lo;
+            int32_t hi;
+#elif defined(__MIPSEB__)
+            int32_t hi;
+            int32_t lo;
+#endif
+        } s;
+        int64_t res;
+    }u;
+    asm("mult %2, %3 \t\n"
+        "mfhi %1   \t\n"
+        "mflo %0   \t\n"
+        : "=r"(u.s.lo), "=&r"(u.s.hi)
+        : "%r"(x), "r"(y)
+	: "%hi","%lo"
+        );
+    return u.res;
+}
+
+#elif defined(__aarch64__)
+
+// inline AArch64 implementations
+
+inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) CONST;
+inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift)
+{
+    GGLfixed result;
+    GGLfixed round;
+
+    asm("mov    %x[round], #1                        \n"
+        "lsl    %x[round], %x[round], %x[shift]      \n"
+        "lsr    %x[round], %x[round], #1             \n"
+        "smaddl %x[result], %w[x], %w[y],%x[round]   \n"
+        "lsr    %x[result], %x[result], %x[shift]    \n"
+        : [round]"=&r"(round), [result]"=&r"(result) \
+        : [x]"r"(x), [y]"r"(y), [shift] "r"(shift)   \
+        :
+       );
+    return result;
+}
+inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
+inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift)
+{
+    GGLfixed result;
+    asm("smull  %x[result], %w[x], %w[y]                     \n"
+        "lsr    %x[result], %x[result], %x[shift]            \n"
+        "add    %w[result], %w[result], %w[a]                \n"
+        : [result]"=&r"(result)                               \
+        : [x]"r"(x), [y]"r"(y), [a]"r"(a), [shift] "r"(shift) \
+        :
+        );
+    return result;
+}
+
+inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
+inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift)
+{
+
+    GGLfixed result;
+    int rshift;
+
+    asm("smull  %x[result], %w[x], %w[y]                     \n"
+        "lsr    %x[result], %x[result], %x[shift]            \n"
+        "sub    %w[result], %w[result], %w[a]                \n"
+        : [result]"=&r"(result)                               \
+        : [x]"r"(x), [y]"r"(y), [a]"r"(a), [shift] "r"(shift) \
+        :
+        );
+    return result;
+}
+inline int64_t gglMulii(int32_t x, int32_t y) CONST;
+inline int64_t gglMulii(int32_t x, int32_t y)
+{
+    int64_t res;
+    asm("smull  %x0, %w1, %w2 \n"
+        : "=r"(res)
+        : "%r"(x), "r"(y)
+        :
+        );
+    return res;
+}
+
+#else // ----------------------------------------------------------------------
+
+inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST;
+inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) {
+    return GGLfixed((int64_t(a)*b + (1<<(shift-1)))>>shift);
+}
+inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
+inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
+    return GGLfixed((int64_t(a)*b)>>shift) + c;
+}
+inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
+inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
+    return GGLfixed((int64_t(a)*b)>>shift) - c;
+}
+inline int64_t gglMulii(int32_t a, int32_t b) CONST;
+inline int64_t gglMulii(int32_t a, int32_t b) {
+    return int64_t(a)*b;
+}
+
+#endif
+
+// ------------------------------------------------------------------------
+
+inline GGLfixed gglMulx(GGLfixed a, GGLfixed b) CONST;
+inline GGLfixed gglMulx(GGLfixed a, GGLfixed b) {
+    return gglMulx(a, b, 16);
+}
+inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c) CONST;
+inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c) {
+    return gglMulAddx(a, b, c, 16);
+}
+inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c) CONST;
+inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c) {
+    return gglMulSubx(a, b, c, 16);
+}
+
+// ------------------------------------------------------------------------
+
+inline int32_t gglClz(int32_t x) CONST;
+inline int32_t gglClz(int32_t x)
+{
+#if (defined(__arm__) && !defined(__thumb__)) || defined(__mips__) || defined(__aarch64__)
+    return __builtin_clz(x);
+#else
+    if (!x) return 32;
+    int32_t exp = 31;
+    if (x & 0xFFFF0000) { exp -=16; x >>= 16; }
+    if (x & 0x0000ff00) { exp -= 8; x >>= 8; }
+    if (x & 0x000000f0) { exp -= 4; x >>= 4; }
+    if (x & 0x0000000c) { exp -= 2; x >>= 2; }
+    if (x & 0x00000002) { exp -= 1; }
+    return exp;
+#endif
+}
+
+// ------------------------------------------------------------------------
+
+int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i) CONST;
+
+inline int32_t gglDivQ16(GGLfixed n, GGLfixed d) CONST;
+inline int32_t gglDivQ16(GGLfixed n, GGLfixed d) {
+    return gglDivQ(n, d, 16);
+}
+
+inline int32_t gglDivx(GGLfixed n, GGLfixed d) CONST;
+inline int32_t gglDivx(GGLfixed n, GGLfixed d) {
+    return gglDivQ(n, d, 16);
+}
+
+// ------------------------------------------------------------------------
+
+inline GGLfixed gglRecipFast(GGLfixed x) CONST;
+inline GGLfixed gglRecipFast(GGLfixed x)
+{
+    // This is a really bad approximation of 1/x, but it's also
+    // very fast. x must be strictly positive.
+    // if x between [0.5, 1[ , then 1/x = 3-2*x
+    // (we use 2.30 fixed-point)
+    const int32_t lz = gglClz(x);
+    return (0xC0000000 - (x << (lz - 1))) >> (30-lz);
+}
+
+// ------------------------------------------------------------------------
+
+inline GGLfixed gglClampx(GGLfixed c) CONST;
+inline GGLfixed gglClampx(GGLfixed c)
+{
+#if defined(__thumb__)
+    // clamp without branches
+    c &= ~(c>>31);  c = FIXED_ONE - c;
+    c &= ~(c>>31);  c = FIXED_ONE - c;
+#else
+#if defined(__arm__)
+    // I don't know why gcc thinks its smarter than me! The code below
+    // clamps to zero in one instruction, but gcc won't generate it and
+    // replace it by a cmp + movlt (it's quite amazing actually).
+    asm("bic %0, %1, %1, asr #31\n" : "=r"(c) : "r"(c));
+#elif defined(__aarch64__)
+    asm("bic %w0, %w1, %w1, asr #31\n" : "=r"(c) : "r"(c));
+#else
+    c &= ~(c>>31);
+#endif
+    if (c>FIXED_ONE)
+        c = FIXED_ONE;
+#endif
+    return c;
+}
+
+// ------------------------------------------------------------------------
+
+#endif // ANDROID_GGL_FIXED_H
diff --git a/libpixelflinger/tests/arch-arm64/assembler/Android.mk b/libpixelflinger/tests/arch-arm64/assembler/Android.mk
index 961f323..448d298 100644
--- a/libpixelflinger/tests/arch-arm64/assembler/Android.mk
+++ b/libpixelflinger/tests/arch-arm64/assembler/Android.mk
@@ -13,7 +13,7 @@
     libpixelflinger
 
 LOCAL_C_INCLUDES := \
-    system/core/libpixelflinger
+    $(LOCAL_PATH)/../../..
 
 LOCAL_MODULE:= test-pixelflinger-arm64-assembler-test
 
diff --git a/libpixelflinger/tests/arch-arm64/disassembler/Android.mk b/libpixelflinger/tests/arch-arm64/disassembler/Android.mk
index 8f62f09..d8f7e69 100644
--- a/libpixelflinger/tests/arch-arm64/disassembler/Android.mk
+++ b/libpixelflinger/tests/arch-arm64/disassembler/Android.mk
@@ -7,9 +7,6 @@
 
 LOCAL_SHARED_LIBRARIES :=
 
-LOCAL_C_INCLUDES := \
-    system/core/libpixelflinger/codeflinger
-
 LOCAL_MODULE:= test-pixelflinger-arm64-disassembler-test
 
 LOCAL_MODULE_TAGS := tests
diff --git a/libpixelflinger/tests/codegen/Android.mk b/libpixelflinger/tests/codegen/Android.mk
index bc07015..2f9ca2f 100644
--- a/libpixelflinger/tests/codegen/Android.mk
+++ b/libpixelflinger/tests/codegen/Android.mk
@@ -9,7 +9,7 @@
     libpixelflinger
 
 LOCAL_C_INCLUDES := \
-	system/core/libpixelflinger
+	$(LOCAL_PATH)/../..
 
 LOCAL_MODULE:= test-opengl-codegen
 
diff --git a/libpixelflinger/tests/gglmul/Android.mk b/libpixelflinger/tests/gglmul/Android.mk
index f479fa1..75bd39e 100644
--- a/libpixelflinger/tests/gglmul/Android.mk
+++ b/libpixelflinger/tests/gglmul/Android.mk
@@ -7,7 +7,7 @@
 LOCAL_SHARED_LIBRARIES :=
 
 LOCAL_C_INCLUDES := \
-	system/core/libpixelflinger
+	$(LOCAL_PATH)/../../include
 
 LOCAL_MODULE:= test-pixelflinger-gglmul