Revert "Revert "Add MIPS64r6 support for libpixelflinger""

This reverts commit 7fd121788a892a0dfd4a9594304cad85fe366349.

Change-Id: Ic1204a8407c235b07c643764d5f2800631fecd72
diff --git a/libpixelflinger/arch-mips/col32cb16blend.S b/libpixelflinger/arch-mips/col32cb16blend.S
new file mode 100644
index 0000000..5d18e55
--- /dev/null
+++ b/libpixelflinger/arch-mips/col32cb16blend.S
@@ -0,0 +1,134 @@
+/*
+** Copyright 2015, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+**     http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+       .macro pixel dreg src f sR sG sB shift
+
+#if __mips==32 && __mips_isa_rev>=2
+       /* extract red */
+       ext $t4,\src,\shift+11,5
+       mul $t4,$t4,\f
+
+       /* extract green */
+       ext $t5,\src,\shift+5,6
+       mul $t5,$t5,\f
+
+       /* extract blue */
+       ext $t6,\src,\shift,5
+       mul $t6,$t6,\f
+#else
+       /* extract red */
+       srl $t4,\src,\shift+11
+       andi $t4, 0x1f
+       mul $t4,$t4,\f
+
+       /* extract green */
+       srl $t5,\src,\shift+5
+       andi $t5, 0x3f
+       mul $t5,$t5,\f
+
+       /* extract blue */
+       srl $t6,\src,\shift
+       andi $t6, 0x1f
+       mul $t6,$t6,\f
+#endif
+
+       srl $t4,$t4,8
+       srl $t5,$t5,8
+       srl $t6,$t6,8
+       addu $t4,$t4,\sR
+       addu $t5,$t5,\sG
+       addu \dreg,$t6,\sB
+       sll $t4,$t4,11
+       sll $t5,$t5,5
+       or \dreg,\dreg,$t4
+       or \dreg,\dreg,$t5
+       andi \dreg, 0xffff
+       .endm
+
+       .text
+       .align
+
+       .global scanline_col32cb16blend_mips
+       .ent    scanline_col32cb16blend_mips
+scanline_col32cb16blend_mips:
+
+       /* check if count is zero */
+       srl     $v0,$a1,24 /* sA */
+       beqz    $a2,done
+       li      $t4, 0x100
+       srl     $v1,$v0,7
+       addu    $v0,$v1,$v0
+       subu    $v0,$t4,$v0 /* f */
+#if __mips==32 && __mips_isa_rev>=2
+       ext     $a3,$a1,3,5 /* sR */
+       ext     $t0,$a1,10,6 /* sG */
+       ext     $t1,$a1,19,5 /* sB */
+#else
+       srl     $a3, $a1, 3
+       andi    $a3, 0x1f    /* sR */
+       srl     $t0, $a1, 10
+       andi    $t0, 0x3f    /* sG */
+       srl     $t1, $a1, 19
+       andi    $t1, 0x1f    /* sB */
+#endif
+
+       /* check if cnt is at least 4 */
+       addiu   $a2,$a2,-4
+       bltz    $a2,tail
+
+loop_4pixels:
+       lw      $t7,0($a0)
+       lw      $t8,4($a0)
+       addiu   $a0,$a0,8
+       addiu   $a2,$a2,-4
+       pixel   $t2 $t7 $v0 $a3 $t0 $t1 0
+       pixel   $t3 $t7 $v0 $a3 $t0 $t1 16
+#if __mips==32 && __mips_isa_rev>=2
+       ins     $t2,$t3,16,16
+#else
+       sll $t3, 16
+       or  $t2, $t2, $t3
+#endif
+       pixel   $t7 $t8 $v0 $a3 $t0 $t1 0
+       pixel   $t3 $t8 $v0 $a3 $t0 $t1 16
+#if __mips==32 && __mips_isa_rev>=2
+       ins     $t7,$t3,16,16
+#else
+       sll $t3, 16
+       or  $t7, $t7, $t3
+#endif
+       sw      $t2,-8($a0)
+       sw      $t7,-4($a0)
+       bgez    $a2, loop_4pixels
+
+tail:
+       /* the pixel count underran, restore it now */
+       addiu   $a2,$a2,4
+
+       /* handle the last 0..3 pixels */
+       beqz    $a2,done
+
+loop_1pixel:
+       lhu     $t7,0($a0)
+       addiu   $a0,$a0,2
+       addiu   $a2,$a2,-1
+       pixel   $t2 $t7 $v0 $a3 $t0 $t1 0
+       sh      $t2, -2($a0)
+       bnez    $a2,loop_1pixel
+
+done:
+       j       $ra
+       .end    scanline_col32cb16blend_mips
diff --git a/libpixelflinger/arch-mips/t32cb16blend.S b/libpixelflinger/arch-mips/t32cb16blend.S
index c911fbb..236a2c9 100644
--- a/libpixelflinger/arch-mips/t32cb16blend.S
+++ b/libpixelflinger/arch-mips/t32cb16blend.S
@@ -33,232 +33,241 @@
  */
 
 #if __mips==32 && __mips_isa_rev>=2
-	.macro pixel dreg src fb shift
-	/*
-	 * sA = s >> 24
-	 * f = 0x100 - (sA + (sA>>7))
-	 */
-DBG	.set	noat
-DBG	rdhwr	$at,$2
-DBG	.set	at
+    .macro pixel dreg src fb shift
+    /*
+     * sA = s >> 24
+     * f = 0x100 - (sA + (sA>>7))
+     */
+DBG .set    noat
+DBG rdhwr   $at,$2
+DBG .set    at
 
-	srl	$t7,\src,24
-	srl	$t6,$t7,7
-	addu	$t7,$t6
-	li	$t6,0x100
-	subu	$t7,$t6,$t7
+    srl  $t7,\src,24
+    srl  $t6,$t7,7
+    addu $t7,$t6
+    li   $t6,0x100
+    subu $t7,$t6,$t7
 
-	/* red */
-	ext	$t8,\dreg,\shift+6+5,5			# dst[\shift:15..11]
-	mul	$t6,$t8,$t7
-	ext	$t0,\dreg,\shift+5,6			# start green extraction dst[\shift:10..5]
-	ext	$t8,\src,3,5				# src[7..3]
-	srl	$t6,8
-	addu	$t8,$t6
-	ins	\fb,$t8,\shift+6+5,5			# dst[\shift:15..11]
+    /* red */
+    ext  $t8,\dreg,\shift+6+5,5         # dst[\shift:15..11]
+    mul  $t6,$t8,$t7
+    ext  $t0,\dreg,\shift+5,6           # start green extraction dst[\shift:10..5]
+    ext  $t8,\src,3,5               # src[7..3]
+    srl  $t6,8
+    addu $t8,$t6
+.if \shift!=0
+    sll  $t8,\shift+11
+    or   \fb,$t8
+.else
+    sll  \fb,$t8,11
+.endif
 
-        /* green */
-	mul	$t8,$t0,$t7
-	ext	$t0,\dreg,\shift,5			# start blue extraction dst[\shift:4..0]
-	ext	$t6,\src,2+8,6				# src[15..10]
-	srl	$t8,8
-        addu	$t8,$t6
+    /* green */
+    mul  $t8,$t0,$t7
+    ext  $t0,\dreg,\shift,5         # start blue extraction dst[\shift:4..0]
+    ext  $t6,\src,2+8,6             # src[15..10]
+    srl  $t8,8
+    addu $t8,$t6
 
-	/* blue */
-	mul	$t0,$t0,$t7
-	ins	\fb,$t8,\shift+5,6			# finish green insertion dst[\shift:10..5]
-	ext	$t6,\src,(3+8+8),5
-	srl	$t8,$t0,8
-	addu	$t8,$t6
-	ins	\fb,$t8,\shift,5
+    /* blue */
+    mul  $t0,$t0,$t7
+    sll  $t8, $t8, \shift+5
+    or   \fb, \fb, $t8
+    ext  $t6,\src,(3+8+8),5
+    srl  $t8,$t0,8
+    addu $t8,$t6
+    sll  $t8, $t8, \shift
+    or   \fb, \fb, $t8
 
-DBG	.set	noat
-DBG	rdhwr	$t8,$2
-DBG	subu	$t8,$at
-DBG	sltu	$at,$t8,$v0
-DBG	movn	$v0,$t8,$at
-DBG	sgtu	$at,$t8,$v1
-DBG	movn	$v1,$t8,$at
-DBG	.set	at
-	.endm
+DBG .set    noat
+DBG rdhwr $t8,$2
+DBG subu  $t8,$at
+DBG sltu  $at,$t8,$v0
+DBG movn  $v0,$t8,$at
+DBG sgtu  $at,$t8,$v1
+DBG movn  $v1,$t8,$at
+DBG .set    at
+    .endm
 
 #else
 
-	.macro pixel dreg src fb shift
-	/*
-	 * sA = s >> 24
-	 * f = 0x100 - (sA + (sA>>7))
-	 */
-DBG	.set	push
-DBG	.set	noat
-DBG	.set	mips32r2
-DBG 	rdhwr	$at,$2
-DBG	.set	pop
+    .macro pixel dreg src fb shift
+    /*
+     * sA = s >> 24
+     * f = 0x100 - (sA + (sA>>7))
+     */
+DBG .set    push
+DBG .set    noat
+DBG .set    mips32r2
+DBG rdhwr   $at,$2
+DBG .set    pop
 
-	srl	$t7,\src,24
-	srl	$t6,$t7,7
-	addu	$t7,$t6
-	li	$t6,0x100
-	subu	$t7,$t6,$t7
+    srl  $t7,\src,24
+    srl  $t6,$t7,7
+    addu $t7,$t6
+    li   $t6,0x100
+    subu $t7,$t6,$t7
 
-	/*
-	 * red
-	 * dR = (d >> (6 + 5)) & 0x1f;
-	 * dR = (f*dR)>>8
-	 * sR = (s >> (   3)) & 0x1f;
-	 * sR += dR
-	 * fb |= sR << 11
-	 */
-	srl	$t8,\dreg,\shift+6+5
+    /*
+     * red
+     * dR = (d >> (6 + 5)) & 0x1f;
+     * dR = (f*dR)>>8
+     * sR = (s >> (   3)) & 0x1f;
+     * sR += dR
+     * fb |= sR << 11
+     */
+    srl  $t8,\dreg,\shift+6+5
 .if \shift==0
-	and     $t8,0x1f
+    and  $t8,0x1f
 .endif
-	mul	$t8,$t8,$t7
-	srl	$t6,\src,3
-	and	$t6,0x1f
-	srl	$t8,8
-	addu	$t8,$t6
+    mul  $t8,$t8,$t7
+    srl  $t6,\src,3
+    and  $t6,0x1f
+    srl  $t8,8
+    addu $t8,$t6
 .if \shift!=0
-	sll	$t8,\shift+11
-	or	\fb,$t8
+    sll  $t8,\shift+11
+    or   \fb,$t8
 .else
-	sll	\fb,$t8,11
+    sll  \fb,$t8,11
 .endif
 
         /*
-	 * green
-	 * dG = (d >> 5) & 0x3f
-	 * dG = (f*dG) >> 8
-	 * sG = (s >> ( 8+2))&0x3F;
-	 */
-	srl	$t8,\dreg,\shift+5
-        and	$t8,0x3f
-	mul	$t8,$t8,$t7
-        srl	$t6,\src,8+2
-        and     $t6,0x3f
-	srl	$t8,8
-        addu	$t8,$t6
-	sll	$t8,\shift + 5
-	or	\fb,$t8
+     * green
+     * dG = (d >> 5) & 0x3f
+     * dG = (f*dG) >> 8
+     * sG = (s >> ( 8+2))&0x3F;
+     */
+    srl  $t8,\dreg,\shift+5
+    and  $t8,0x3f
+    mul  $t8,$t8,$t7
+    srl  $t6,\src,8+2
+    and  $t6,0x3f
+    srl  $t8,8
+    addu $t8,$t6
+    sll  $t8,\shift + 5
+    or   \fb,$t8
 
-	/* blue */
+    /* blue */
 .if \shift!=0
-	srl	$t8,\dreg,\shift
-	and	$t8,0x1f
+    srl  $t8,\dreg,\shift
+    and  $t8,0x1f
 .else
-	and	$t8,\dreg,0x1f
+    and  $t8,\dreg,0x1f
 .endif
-	mul	$t8,$t8,$t7
-	srl	$t6,\src,(8+8+3)
-	and	$t6,0x1f
-	srl	$t8,8
-	addu	$t8,$t6
+    mul  $t8,$t8,$t7
+    srl  $t6,\src,(8+8+3)
+    and  $t6,0x1f
+    srl  $t8,8
+    addu $t8,$t6
 .if \shift!=0
-	sll	$t8,\shift
+    sll  $t8,\shift
 .endif
-	or	\fb,$t8
-DBG	.set	push
-DBG	.set	noat
-DBG	.set	mips32r2
-DBG	rdhwr	$t8,$2
-DBG	subu	$t8,$at
-DBG	sltu	$at,$t8,$v0
-DBG	movn	$v0,$t8,$at
-DBG	sgtu	$at,$t8,$v1
-DBG	movn	$v1,$t8,$at
-DBG	.set	pop
-	.endm
+    or   \fb,$t8
+DBG .set    push
+DBG .set    noat
+DBG .set    mips32r2
+DBG rdhwr   $t8,$2
+DBG subu    $t8,$at
+DBG sltu    $at,$t8,$v0
+DBG movn    $v0,$t8,$at
+DBG sgtu    $at,$t8,$v1
+DBG movn    $v1,$t8,$at
+DBG .set    pop
+    .endm
 #endif
 
-	.text
-	.align
+    .text
+    .align
 
-	.global scanline_t32cb16blend_mips
-	.ent	scanline_t32cb16blend_mips
+    .global scanline_t32cb16blend_mips
+    .ent    scanline_t32cb16blend_mips
 scanline_t32cb16blend_mips:
-DBG	li	$v0,0xffffffff
-DBG	li	$v1,0
-	/* Align the destination if necessary */
-	and	$t0,$a0,3
-	beqz	$t0,aligned
+DBG li    $v0,0xffffffff
+DBG li    $v1,0
+    /* Align the destination if necessary */
+    and   $t0,$a0,3
+    beqz  $t0,aligned
 
-	/* as long as there is at least one pixel */
-	beqz	$a2,done
+    /* as long as there is at least one pixel */
+    beqz  $a2,done
 
-	lw	$t4,($a1)
-	addu	$a0,2
-	addu	$a1,4
-	beqz	$t4,1f
-	lhu	$t3,-2($a0)
-	pixel   $t3,$t4,$t1,0
-	sh	$t1,-2($a0)
-1:	subu	$a2,1
+    lw    $t4,($a1)
+    addu  $a0,2
+    addu  $a1,4
+    beqz  $t4,1f
+    lhu   $t3,-2($a0)
+    pixel $t3,$t4,$t1,0
+    sh    $t1,-2($a0)
+1:  subu  $a2,1
 
 aligned:
-	/* Check to see if its worth unrolling the loop */
-	subu	$a2,4
-	bltz	$a2,tail
+    /* Check to see if its worth unrolling the loop */
+    subu  $a2,4
+    bltz  $a2,tail
 
-	/* Process 4 pixels at a time */
+    /* Process 4 pixels at a time */
 fourpixels:
-	/* 1st pair of pixels */
-	lw	$t4,0($a1)
-	lw	$t5,4($a1)
-	addu	$a0,8
-	addu	$a1,16
+    /* 1st pair of pixels */
+    lw    $t4,0($a1)
+    lw    $t5,4($a1)
+    addu  $a0,8
+    addu  $a1,16
 
-	/* both are zero, skip this pair */
-	or	$t3,$t4,$t5
-	beqz	$t3,1f
+    /* both are zero, skip this pair */
+    or    $t3,$t4,$t5
+    beqz  $t3,1f
 
-	/* load the destination */
-	lw	$t3,-8($a0)
+    /* load the destination */
+    lw    $t3,-8($a0)
 
-	pixel	$t3,$t4,$t1,0
-	pixel	$t3,$t5,$t1,16
-	sw	$t1,-8($a0)
+    pixel $t3,$t4,$t1,0
+    andi  $t1, 0xFFFF
+    pixel $t3,$t5,$t1,16
+    sw    $t1,-8($a0)
 
 1:
-	/* 2nd pair of pixels */
-	lw	$t4,-8($a1)
-	lw	$t5,-4($a1)
+    /* 2nd pair of pixels */
+    lw    $t4,-8($a1)
+    lw    $t5,-4($a1)
 
-	/* both are zero, skip this pair */
-	or	$t3,$t4,$t5
-	beqz	$t3,1f
+    /* both are zero, skip this pair */
+    or    $t3,$t4,$t5
+    beqz  $t3,1f
 
-	/* load the destination */
-	lw	$t3,-4($a0)
+    /* load the destination */
+    lw    $t3,-4($a0)
 
-	pixel	$t3,$t4,$t1,0
-	pixel	$t3,$t5,$t1,16
-	sw	$t1,-4($a0)
+    pixel $t3,$t4,$t1,0
+    andi  $t1, 0xFFFF
+    pixel $t3,$t5,$t1,16
+    sw    $t1,-4($a0)
 
-1:	subu    $a2,4
-	bgtz	$a2,fourpixels
+1:  subu  $a2,4
+    bgtz  $a2,fourpixels
 
 tail:
-	/* the pixel count underran, restore it now */
-	addu	$a2,4
+    /* the pixel count underran, restore it now */
+    addu  $a2,4
 
-	/* handle the last 0..3 pixels */
-	beqz	$a2,done
+    /* handle the last 0..3 pixels */
+    beqz  $a2,done
 onepixel:
-	lw	$t4,($a1)
-	addu	$a0,2
-	addu	$a1,4
-	beqz	$t4,1f
-	lhu	$t3,-2($a0)
-	pixel   $t3,$t4,$t1,0
-	sh	$t1,-2($a0)
-1:	subu	$a2,1
-	bnez	$a2,onepixel
+    lw    $t4,($a1)
+    addu  $a0,2
+    addu  $a1,4
+    beqz  $t4,1f
+    lhu   $t3,-2($a0)
+    pixel $t3,$t4,$t1,0
+    sh    $t1,-2($a0)
+1:  subu  $a2,1
+    bnez  $a2,onepixel
 done:
-DBG	.set    push
-DBG	.set    mips32r2
-DBG 	rdhwr	$a0,$3
-DBG 	mul	$v0,$a0
-DBG 	mul	$v1,$a0
-DBG	.set    pop
-	j	$ra
-	.end	scanline_t32cb16blend_mips
+DBG .set    push
+DBG .set    mips32r2
+DBG rdhwr   $a0,$3
+DBG mul     $v0,$a0
+DBG mul     $v1,$a0
+DBG .set    pop
+    j     $ra
+    .end    scanline_t32cb16blend_mips