Revert "Revert "Add MIPS64r6 support for libpixelflinger""

This reverts commit 7fd121788a892a0dfd4a9594304cad85fe366349.

Change-Id: Ic1204a8407c235b07c643764d5f2800631fecd72
diff --git a/libpixelflinger/arch-mips/t32cb16blend.S b/libpixelflinger/arch-mips/t32cb16blend.S
index c911fbb..236a2c9 100644
--- a/libpixelflinger/arch-mips/t32cb16blend.S
+++ b/libpixelflinger/arch-mips/t32cb16blend.S
@@ -33,232 +33,241 @@
  */
 
 #if __mips==32 && __mips_isa_rev>=2
-	.macro pixel dreg src fb shift
-	/*
-	 * sA = s >> 24
-	 * f = 0x100 - (sA + (sA>>7))
-	 */
-DBG	.set	noat
-DBG	rdhwr	$at,$2
-DBG	.set	at
+    .macro pixel dreg src fb shift
+    /*
+     * sA = s >> 24
+     * f = 0x100 - (sA + (sA>>7))
+     */
+DBG .set    noat
+DBG rdhwr   $at,$2
+DBG .set    at
 
-	srl	$t7,\src,24
-	srl	$t6,$t7,7
-	addu	$t7,$t6
-	li	$t6,0x100
-	subu	$t7,$t6,$t7
+    srl  $t7,\src,24
+    srl  $t6,$t7,7
+    addu $t7,$t6
+    li   $t6,0x100
+    subu $t7,$t6,$t7
 
-	/* red */
-	ext	$t8,\dreg,\shift+6+5,5			# dst[\shift:15..11]
-	mul	$t6,$t8,$t7
-	ext	$t0,\dreg,\shift+5,6			# start green extraction dst[\shift:10..5]
-	ext	$t8,\src,3,5				# src[7..3]
-	srl	$t6,8
-	addu	$t8,$t6
-	ins	\fb,$t8,\shift+6+5,5			# dst[\shift:15..11]
+    /* red */
+    ext  $t8,\dreg,\shift+6+5,5         # dst[\shift:15..11]
+    mul  $t6,$t8,$t7
+    ext  $t0,\dreg,\shift+5,6           # start green extraction dst[\shift:10..5]
+    ext  $t8,\src,3,5               # src[7..3]
+    srl  $t6,8
+    addu $t8,$t6
+.if \shift!=0
+    sll  $t8,\shift+11
+    or   \fb,$t8
+.else
+    sll  \fb,$t8,11
+.endif
 
-        /* green */
-	mul	$t8,$t0,$t7
-	ext	$t0,\dreg,\shift,5			# start blue extraction dst[\shift:4..0]
-	ext	$t6,\src,2+8,6				# src[15..10]
-	srl	$t8,8
-        addu	$t8,$t6
+    /* green */
+    mul  $t8,$t0,$t7
+    ext  $t0,\dreg,\shift,5         # start blue extraction dst[\shift:4..0]
+    ext  $t6,\src,2+8,6             # src[15..10]
+    srl  $t8,8
+    addu $t8,$t6
 
-	/* blue */
-	mul	$t0,$t0,$t7
-	ins	\fb,$t8,\shift+5,6			# finish green insertion dst[\shift:10..5]
-	ext	$t6,\src,(3+8+8),5
-	srl	$t8,$t0,8
-	addu	$t8,$t6
-	ins	\fb,$t8,\shift,5
+    /* blue */
+    mul  $t0,$t0,$t7
+    sll  $t8, $t8, \shift+5
+    or   \fb, \fb, $t8
+    ext  $t6,\src,(3+8+8),5
+    srl  $t8,$t0,8
+    addu $t8,$t6
+    sll  $t8, $t8, \shift
+    or   \fb, \fb, $t8
 
-DBG	.set	noat
-DBG	rdhwr	$t8,$2
-DBG	subu	$t8,$at
-DBG	sltu	$at,$t8,$v0
-DBG	movn	$v0,$t8,$at
-DBG	sgtu	$at,$t8,$v1
-DBG	movn	$v1,$t8,$at
-DBG	.set	at
-	.endm
+DBG .set    noat
+DBG rdhwr $t8,$2
+DBG subu  $t8,$at
+DBG sltu  $at,$t8,$v0
+DBG movn  $v0,$t8,$at
+DBG sgtu  $at,$t8,$v1
+DBG movn  $v1,$t8,$at
+DBG .set    at
+    .endm
 
 #else
 
-	.macro pixel dreg src fb shift
-	/*
-	 * sA = s >> 24
-	 * f = 0x100 - (sA + (sA>>7))
-	 */
-DBG	.set	push
-DBG	.set	noat
-DBG	.set	mips32r2
-DBG 	rdhwr	$at,$2
-DBG	.set	pop
+    .macro pixel dreg src fb shift
+    /*
+     * sA = s >> 24
+     * f = 0x100 - (sA + (sA>>7))
+     */
+DBG .set    push
+DBG .set    noat
+DBG .set    mips32r2
+DBG rdhwr   $at,$2
+DBG .set    pop
 
-	srl	$t7,\src,24
-	srl	$t6,$t7,7
-	addu	$t7,$t6
-	li	$t6,0x100
-	subu	$t7,$t6,$t7
+    srl  $t7,\src,24
+    srl  $t6,$t7,7
+    addu $t7,$t6
+    li   $t6,0x100
+    subu $t7,$t6,$t7
 
-	/*
-	 * red
-	 * dR = (d >> (6 + 5)) & 0x1f;
-	 * dR = (f*dR)>>8
-	 * sR = (s >> (   3)) & 0x1f;
-	 * sR += dR
-	 * fb |= sR << 11
-	 */
-	srl	$t8,\dreg,\shift+6+5
+    /*
+     * red
+     * dR = (d >> (6 + 5)) & 0x1f;
+     * dR = (f*dR)>>8
+     * sR = (s >> (   3)) & 0x1f;
+     * sR += dR
+     * fb |= sR << 11
+     */
+    srl  $t8,\dreg,\shift+6+5
 .if \shift==0
-	and     $t8,0x1f
+    and  $t8,0x1f
 .endif
-	mul	$t8,$t8,$t7
-	srl	$t6,\src,3
-	and	$t6,0x1f
-	srl	$t8,8
-	addu	$t8,$t6
+    mul  $t8,$t8,$t7
+    srl  $t6,\src,3
+    and  $t6,0x1f
+    srl  $t8,8
+    addu $t8,$t6
 .if \shift!=0
-	sll	$t8,\shift+11
-	or	\fb,$t8
+    sll  $t8,\shift+11
+    or   \fb,$t8
 .else
-	sll	\fb,$t8,11
+    sll  \fb,$t8,11
 .endif
 
         /*
-	 * green
-	 * dG = (d >> 5) & 0x3f
-	 * dG = (f*dG) >> 8
-	 * sG = (s >> ( 8+2))&0x3F;
-	 */
-	srl	$t8,\dreg,\shift+5
-        and	$t8,0x3f
-	mul	$t8,$t8,$t7
-        srl	$t6,\src,8+2
-        and     $t6,0x3f
-	srl	$t8,8
-        addu	$t8,$t6
-	sll	$t8,\shift + 5
-	or	\fb,$t8
+     * green
+     * dG = (d >> 5) & 0x3f
+     * dG = (f*dG) >> 8
+     * sG = (s >> ( 8+2))&0x3F;
+     */
+    srl  $t8,\dreg,\shift+5
+    and  $t8,0x3f
+    mul  $t8,$t8,$t7
+    srl  $t6,\src,8+2
+    and  $t6,0x3f
+    srl  $t8,8
+    addu $t8,$t6
+    sll  $t8,\shift + 5
+    or   \fb,$t8
 
-	/* blue */
+    /* blue */
 .if \shift!=0
-	srl	$t8,\dreg,\shift
-	and	$t8,0x1f
+    srl  $t8,\dreg,\shift
+    and  $t8,0x1f
 .else
-	and	$t8,\dreg,0x1f
+    and  $t8,\dreg,0x1f
 .endif
-	mul	$t8,$t8,$t7
-	srl	$t6,\src,(8+8+3)
-	and	$t6,0x1f
-	srl	$t8,8
-	addu	$t8,$t6
+    mul  $t8,$t8,$t7
+    srl  $t6,\src,(8+8+3)
+    and  $t6,0x1f
+    srl  $t8,8
+    addu $t8,$t6
 .if \shift!=0
-	sll	$t8,\shift
+    sll  $t8,\shift
 .endif
-	or	\fb,$t8
-DBG	.set	push
-DBG	.set	noat
-DBG	.set	mips32r2
-DBG	rdhwr	$t8,$2
-DBG	subu	$t8,$at
-DBG	sltu	$at,$t8,$v0
-DBG	movn	$v0,$t8,$at
-DBG	sgtu	$at,$t8,$v1
-DBG	movn	$v1,$t8,$at
-DBG	.set	pop
-	.endm
+    or   \fb,$t8
+DBG .set    push
+DBG .set    noat
+DBG .set    mips32r2
+DBG rdhwr   $t8,$2
+DBG subu    $t8,$at
+DBG sltu    $at,$t8,$v0
+DBG movn    $v0,$t8,$at
+DBG sgtu    $at,$t8,$v1
+DBG movn    $v1,$t8,$at
+DBG .set    pop
+    .endm
 #endif
 
-	.text
-	.align
+    .text
+    .align
 
-	.global scanline_t32cb16blend_mips
-	.ent	scanline_t32cb16blend_mips
+    .global scanline_t32cb16blend_mips
+    .ent    scanline_t32cb16blend_mips
 scanline_t32cb16blend_mips:
-DBG	li	$v0,0xffffffff
-DBG	li	$v1,0
-	/* Align the destination if necessary */
-	and	$t0,$a0,3
-	beqz	$t0,aligned
+DBG li    $v0,0xffffffff
+DBG li    $v1,0
+    /* Align the destination if necessary */
+    and   $t0,$a0,3
+    beqz  $t0,aligned
 
-	/* as long as there is at least one pixel */
-	beqz	$a2,done
+    /* as long as there is at least one pixel */
+    beqz  $a2,done
 
-	lw	$t4,($a1)
-	addu	$a0,2
-	addu	$a1,4
-	beqz	$t4,1f
-	lhu	$t3,-2($a0)
-	pixel   $t3,$t4,$t1,0
-	sh	$t1,-2($a0)
-1:	subu	$a2,1
+    lw    $t4,($a1)
+    addu  $a0,2
+    addu  $a1,4
+    beqz  $t4,1f
+    lhu   $t3,-2($a0)
+    pixel $t3,$t4,$t1,0
+    sh    $t1,-2($a0)
+1:  subu  $a2,1
 
 aligned:
-	/* Check to see if its worth unrolling the loop */
-	subu	$a2,4
-	bltz	$a2,tail
+    /* Check to see if its worth unrolling the loop */
+    subu  $a2,4
+    bltz  $a2,tail
 
-	/* Process 4 pixels at a time */
+    /* Process 4 pixels at a time */
 fourpixels:
-	/* 1st pair of pixels */
-	lw	$t4,0($a1)
-	lw	$t5,4($a1)
-	addu	$a0,8
-	addu	$a1,16
+    /* 1st pair of pixels */
+    lw    $t4,0($a1)
+    lw    $t5,4($a1)
+    addu  $a0,8
+    addu  $a1,16
 
-	/* both are zero, skip this pair */
-	or	$t3,$t4,$t5
-	beqz	$t3,1f
+    /* both are zero, skip this pair */
+    or    $t3,$t4,$t5
+    beqz  $t3,1f
 
-	/* load the destination */
-	lw	$t3,-8($a0)
+    /* load the destination */
+    lw    $t3,-8($a0)
 
-	pixel	$t3,$t4,$t1,0
-	pixel	$t3,$t5,$t1,16
-	sw	$t1,-8($a0)
+    pixel $t3,$t4,$t1,0
+    andi  $t1, 0xFFFF
+    pixel $t3,$t5,$t1,16
+    sw    $t1,-8($a0)
 
 1:
-	/* 2nd pair of pixels */
-	lw	$t4,-8($a1)
-	lw	$t5,-4($a1)
+    /* 2nd pair of pixels */
+    lw    $t4,-8($a1)
+    lw    $t5,-4($a1)
 
-	/* both are zero, skip this pair */
-	or	$t3,$t4,$t5
-	beqz	$t3,1f
+    /* both are zero, skip this pair */
+    or    $t3,$t4,$t5
+    beqz  $t3,1f
 
-	/* load the destination */
-	lw	$t3,-4($a0)
+    /* load the destination */
+    lw    $t3,-4($a0)
 
-	pixel	$t3,$t4,$t1,0
-	pixel	$t3,$t5,$t1,16
-	sw	$t1,-4($a0)
+    pixel $t3,$t4,$t1,0
+    andi  $t1, 0xFFFF
+    pixel $t3,$t5,$t1,16
+    sw    $t1,-4($a0)
 
-1:	subu    $a2,4
-	bgtz	$a2,fourpixels
+1:  subu  $a2,4
+    bgtz  $a2,fourpixels
 
 tail:
-	/* the pixel count underran, restore it now */
-	addu	$a2,4
+    /* the pixel count underran, restore it now */
+    addu  $a2,4
 
-	/* handle the last 0..3 pixels */
-	beqz	$a2,done
+    /* handle the last 0..3 pixels */
+    beqz  $a2,done
 onepixel:
-	lw	$t4,($a1)
-	addu	$a0,2
-	addu	$a1,4
-	beqz	$t4,1f
-	lhu	$t3,-2($a0)
-	pixel   $t3,$t4,$t1,0
-	sh	$t1,-2($a0)
-1:	subu	$a2,1
-	bnez	$a2,onepixel
+    lw    $t4,($a1)
+    addu  $a0,2
+    addu  $a1,4
+    beqz  $t4,1f
+    lhu   $t3,-2($a0)
+    pixel $t3,$t4,$t1,0
+    sh    $t1,-2($a0)
+1:  subu  $a2,1
+    bnez  $a2,onepixel
 done:
-DBG	.set    push
-DBG	.set    mips32r2
-DBG 	rdhwr	$a0,$3
-DBG 	mul	$v0,$a0
-DBG 	mul	$v1,$a0
-DBG	.set    pop
-	j	$ra
-	.end	scanline_t32cb16blend_mips
+DBG .set    push
+DBG .set    mips32r2
+DBG rdhwr   $a0,$3
+DBG mul     $v0,$a0
+DBG mul     $v1,$a0
+DBG .set    pop
+    j     $ra
+    .end    scanline_t32cb16blend_mips