Add MIPS64r6 support for libpixelflinger

Added ARMtoMips64Assembler class that translates ARM instructions to MIPS64r6
instructions.
Added MIPS64r6 assembly implementations for col32cb16_blend and t32cb16_blend
functions.
Added MIPS32r2 assembly implementation for col32cb16_blend function.
Added tests for MIPS64r6 (assembler, disassembler and assembly implementation
functions).
Added MIPS32 tests for assembly implementation functions.
Minor bug fixes for MIPS32r2 branch.

Change-Id: I69e49622117be5b8167628e9702db6aafb1849d7
diff --git a/libpixelflinger/scanline.cpp b/libpixelflinger/scanline.cpp
index 3d14531..a718b02 100644
--- a/libpixelflinger/scanline.cpp
+++ b/libpixelflinger/scanline.cpp
@@ -41,6 +41,8 @@
 #include "codeflinger/Arm64Assembler.h"
 #elif defined(__mips__) && !defined(__LP64__) && __mips_isa_rev < 6
 #include "codeflinger/MIPSAssembler.h"
+#elif defined(__mips__) && defined(__LP64__)
+#include "codeflinger/MIPS64Assembler.h"
 #endif
 //#include "codeflinger/ARMAssemblerOptimizer.h"
 
@@ -59,7 +61,7 @@
 #   define ANDROID_CODEGEN      ANDROID_CODEGEN_GENERATED
 #endif
 
-#if defined(__arm__) || (defined(__mips__) && !defined(__LP64__) && __mips_isa_rev < 6) || defined(__aarch64__)
+#if defined(__arm__) || (defined(__mips__) && ((!defined(__LP64__) && __mips_isa_rev < 6) || defined(__LP64__))) || defined(__aarch64__)
 #   define ANDROID_ARM_CODEGEN  1
 #else
 #   define ANDROID_ARM_CODEGEN  0
@@ -73,7 +75,7 @@
  */
 #define DEBUG_NEEDS  0
 
-#if defined( __mips__) && !defined(__LP64__) && __mips_isa_rev < 6
+#if defined( __mips__) && ((!defined(__LP64__) && __mips_isa_rev < 6) || defined(__LP64__))
 #define ASSEMBLY_SCRATCH_SIZE   4096
 #elif defined(__aarch64__)
 #define ASSEMBLY_SCRATCH_SIZE   8192
@@ -136,6 +138,9 @@
 extern "C" void scanline_col32cb16blend_arm64(uint16_t *dst, uint32_t col, size_t ct);
 #elif defined(__mips__) && !defined(__LP64__) && __mips_isa_rev < 6
 extern "C" void scanline_t32cb16blend_mips(uint16_t*, uint32_t*, size_t);
+#elif defined(__mips__) && defined(__LP64__)
+extern "C" void scanline_t32cb16blend_mips64(uint16_t*, uint32_t*, size_t);
+extern "C" void scanline_col32cb16blend_mips64(uint16_t *dst, uint32_t col, size_t ct);
 #endif
 
 // ----------------------------------------------------------------------------
@@ -286,7 +291,7 @@
 
 #if ANDROID_ARM_CODEGEN
 
-#if defined(__mips__) && !defined(__LP64__) && __mips_isa_rev < 6
+#if defined(__mips__) && ((!defined(__LP64__) && __mips_isa_rev < 6) || defined(__LP64__))
 static CodeCache gCodeCache(32 * 1024);
 #elif defined(__aarch64__)
 static CodeCache gCodeCache(48 * 1024);
@@ -406,8 +411,10 @@
         //GGLAssembler assembler(
         //        new ARMAssemblerOptimizer(new ARMAssembler(a)) );
 #endif
-#if defined(__mips__)
+#if defined(__mips__) && !defined(__LP64__) && __mips_isa_rev < 6
         GGLAssembler assembler( new ArmToMipsAssembler(a) );
+#elif defined(__mips__) && defined(__LP64__)
+        GGLAssembler assembler( new ArmToMips64Assembler(a) );
 #elif defined(__aarch64__)
         GGLAssembler assembler( new ArmToArm64Assembler(a) );
 #endif
@@ -2103,6 +2110,8 @@
 #endif // defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN
 #elif ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__aarch64__))
     scanline_col32cb16blend_arm64(dst, GGL_RGBA_TO_HOST(c->packed8888), ct);
+#elif ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && (defined(__mips__) && defined(__LP64__)))
+    scanline_col32cb16blend_mips64(dst, GGL_RGBA_TO_HOST(c->packed8888), ct);
 #else
     uint32_t s = GGL_RGBA_TO_HOST(c->packed8888);
     int sA = (s>>24);
@@ -2175,7 +2184,8 @@
 
 void scanline_t32cb16blend(context_t* c)
 {
-#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && (defined(__arm__) || (defined(__mips__) && !defined(__LP64__) && __mips_isa_rev < 6) || defined(__aarch64__)))
+#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && (defined(__arm__) || defined(__aarch64__) || \
+    (defined(__mips__) && ((!defined(__LP64__) && __mips_isa_rev < 6) || defined(__LP64__)))))
     int32_t x = c->iterators.xl;
     size_t ct = c->iterators.xr - x;
     int32_t y = c->iterators.y;
@@ -2191,8 +2201,10 @@
     scanline_t32cb16blend_arm(dst, src, ct);
 #elif defined(__aarch64__)
     scanline_t32cb16blend_arm64(dst, src, ct);
-#elif defined(__mips__)
+#elif defined(__mips__) && !defined(__LP64__) && __mips_isa_rev < 6
     scanline_t32cb16blend_mips(dst, src, ct);
+#elif defined(__mips__) && defined(__LP64__)
+    scanline_t32cb16blend_mips64(dst, src, ct);
 #endif
 #else
     dst_iterator16  di(c);