Revert "Revert "Add MIPS64r6 support for libpixelflinger""
This reverts commit 7fd121788a892a0dfd4a9594304cad85fe366349.
Change-Id: Ic1204a8407c235b07c643764d5f2800631fecd72
diff --git a/libpixelflinger/arch-mips/t32cb16blend.S b/libpixelflinger/arch-mips/t32cb16blend.S
index c911fbb..236a2c9 100644
--- a/libpixelflinger/arch-mips/t32cb16blend.S
+++ b/libpixelflinger/arch-mips/t32cb16blend.S
@@ -33,232 +33,241 @@
*/
#if __mips==32 && __mips_isa_rev>=2
- .macro pixel dreg src fb shift
- /*
- * sA = s >> 24
- * f = 0x100 - (sA + (sA>>7))
- */
-DBG .set noat
-DBG rdhwr $at,$2
-DBG .set at
+ .macro pixel dreg src fb shift
+ /*
+ * sA = s >> 24
+ * f = 0x100 - (sA + (sA>>7))
+ */
+DBG .set noat
+DBG rdhwr $at,$2
+DBG .set at
- srl $t7,\src,24
- srl $t6,$t7,7
- addu $t7,$t6
- li $t6,0x100
- subu $t7,$t6,$t7
+ srl $t7,\src,24
+ srl $t6,$t7,7
+ addu $t7,$t6
+ li $t6,0x100
+ subu $t7,$t6,$t7
- /* red */
- ext $t8,\dreg,\shift+6+5,5 # dst[\shift:15..11]
- mul $t6,$t8,$t7
- ext $t0,\dreg,\shift+5,6 # start green extraction dst[\shift:10..5]
- ext $t8,\src,3,5 # src[7..3]
- srl $t6,8
- addu $t8,$t6
- ins \fb,$t8,\shift+6+5,5 # dst[\shift:15..11]
+ /* red */
+ ext $t8,\dreg,\shift+6+5,5 # dst[\shift:15..11]
+ mul $t6,$t8,$t7
+ ext $t0,\dreg,\shift+5,6 # start green extraction dst[\shift:10..5]
+ ext $t8,\src,3,5 # src[7..3]
+ srl $t6,8
+ addu $t8,$t6
+.if \shift!=0
+ sll $t8,\shift+11
+ or \fb,$t8
+.else
+ sll \fb,$t8,11
+.endif
- /* green */
- mul $t8,$t0,$t7
- ext $t0,\dreg,\shift,5 # start blue extraction dst[\shift:4..0]
- ext $t6,\src,2+8,6 # src[15..10]
- srl $t8,8
- addu $t8,$t6
+ /* green */
+ mul $t8,$t0,$t7
+ ext $t0,\dreg,\shift,5 # start blue extraction dst[\shift:4..0]
+ ext $t6,\src,2+8,6 # src[15..10]
+ srl $t8,8
+ addu $t8,$t6
- /* blue */
- mul $t0,$t0,$t7
- ins \fb,$t8,\shift+5,6 # finish green insertion dst[\shift:10..5]
- ext $t6,\src,(3+8+8),5
- srl $t8,$t0,8
- addu $t8,$t6
- ins \fb,$t8,\shift,5
+ /* blue */
+ mul $t0,$t0,$t7
+ sll $t8, $t8, \shift+5
+ or \fb, \fb, $t8
+ ext $t6,\src,(3+8+8),5
+ srl $t8,$t0,8
+ addu $t8,$t6
+ sll $t8, $t8, \shift
+ or \fb, \fb, $t8
-DBG .set noat
-DBG rdhwr $t8,$2
-DBG subu $t8,$at
-DBG sltu $at,$t8,$v0
-DBG movn $v0,$t8,$at
-DBG sgtu $at,$t8,$v1
-DBG movn $v1,$t8,$at
-DBG .set at
- .endm
+DBG .set noat
+DBG rdhwr $t8,$2
+DBG subu $t8,$at
+DBG sltu $at,$t8,$v0
+DBG movn $v0,$t8,$at
+DBG sgtu $at,$t8,$v1
+DBG movn $v1,$t8,$at
+DBG .set at
+ .endm
#else
- .macro pixel dreg src fb shift
- /*
- * sA = s >> 24
- * f = 0x100 - (sA + (sA>>7))
- */
-DBG .set push
-DBG .set noat
-DBG .set mips32r2
-DBG rdhwr $at,$2
-DBG .set pop
+ .macro pixel dreg src fb shift
+ /*
+ * sA = s >> 24
+ * f = 0x100 - (sA + (sA>>7))
+ */
+DBG .set push
+DBG .set noat
+DBG .set mips32r2
+DBG rdhwr $at,$2
+DBG .set pop
- srl $t7,\src,24
- srl $t6,$t7,7
- addu $t7,$t6
- li $t6,0x100
- subu $t7,$t6,$t7
+ srl $t7,\src,24
+ srl $t6,$t7,7
+ addu $t7,$t6
+ li $t6,0x100
+ subu $t7,$t6,$t7
- /*
- * red
- * dR = (d >> (6 + 5)) & 0x1f;
- * dR = (f*dR)>>8
- * sR = (s >> ( 3)) & 0x1f;
- * sR += dR
- * fb |= sR << 11
- */
- srl $t8,\dreg,\shift+6+5
+ /*
+ * red
+ * dR = (d >> (6 + 5)) & 0x1f;
+ * dR = (f*dR)>>8
+ * sR = (s >> ( 3)) & 0x1f;
+ * sR += dR
+ * fb |= sR << 11
+ */
+ srl $t8,\dreg,\shift+6+5
.if \shift==0
- and $t8,0x1f
+ and $t8,0x1f
.endif
- mul $t8,$t8,$t7
- srl $t6,\src,3
- and $t6,0x1f
- srl $t8,8
- addu $t8,$t6
+ mul $t8,$t8,$t7
+ srl $t6,\src,3
+ and $t6,0x1f
+ srl $t8,8
+ addu $t8,$t6
.if \shift!=0
- sll $t8,\shift+11
- or \fb,$t8
+ sll $t8,\shift+11
+ or \fb,$t8
.else
- sll \fb,$t8,11
+ sll \fb,$t8,11
.endif
/*
- * green
- * dG = (d >> 5) & 0x3f
- * dG = (f*dG) >> 8
- * sG = (s >> ( 8+2))&0x3F;
- */
- srl $t8,\dreg,\shift+5
- and $t8,0x3f
- mul $t8,$t8,$t7
- srl $t6,\src,8+2
- and $t6,0x3f
- srl $t8,8
- addu $t8,$t6
- sll $t8,\shift + 5
- or \fb,$t8
+ * green
+ * dG = (d >> 5) & 0x3f
+ * dG = (f*dG) >> 8
+ * sG = (s >> ( 8+2))&0x3F;
+ */
+ srl $t8,\dreg,\shift+5
+ and $t8,0x3f
+ mul $t8,$t8,$t7
+ srl $t6,\src,8+2
+ and $t6,0x3f
+ srl $t8,8
+ addu $t8,$t6
+ sll $t8,\shift + 5
+ or \fb,$t8
- /* blue */
+ /* blue */
.if \shift!=0
- srl $t8,\dreg,\shift
- and $t8,0x1f
+ srl $t8,\dreg,\shift
+ and $t8,0x1f
.else
- and $t8,\dreg,0x1f
+ and $t8,\dreg,0x1f
.endif
- mul $t8,$t8,$t7
- srl $t6,\src,(8+8+3)
- and $t6,0x1f
- srl $t8,8
- addu $t8,$t6
+ mul $t8,$t8,$t7
+ srl $t6,\src,(8+8+3)
+ and $t6,0x1f
+ srl $t8,8
+ addu $t8,$t6
.if \shift!=0
- sll $t8,\shift
+ sll $t8,\shift
.endif
- or \fb,$t8
-DBG .set push
-DBG .set noat
-DBG .set mips32r2
-DBG rdhwr $t8,$2
-DBG subu $t8,$at
-DBG sltu $at,$t8,$v0
-DBG movn $v0,$t8,$at
-DBG sgtu $at,$t8,$v1
-DBG movn $v1,$t8,$at
-DBG .set pop
- .endm
+ or \fb,$t8
+DBG .set push
+DBG .set noat
+DBG .set mips32r2
+DBG rdhwr $t8,$2
+DBG subu $t8,$at
+DBG sltu $at,$t8,$v0
+DBG movn $v0,$t8,$at
+DBG sgtu $at,$t8,$v1
+DBG movn $v1,$t8,$at
+DBG .set pop
+ .endm
#endif
- .text
- .align
+ .text
+ .align
- .global scanline_t32cb16blend_mips
- .ent scanline_t32cb16blend_mips
+ .global scanline_t32cb16blend_mips
+ .ent scanline_t32cb16blend_mips
scanline_t32cb16blend_mips:
-DBG li $v0,0xffffffff
-DBG li $v1,0
- /* Align the destination if necessary */
- and $t0,$a0,3
- beqz $t0,aligned
+DBG li $v0,0xffffffff
+DBG li $v1,0
+ /* Align the destination if necessary */
+ and $t0,$a0,3
+ beqz $t0,aligned
- /* as long as there is at least one pixel */
- beqz $a2,done
+ /* as long as there is at least one pixel */
+ beqz $a2,done
- lw $t4,($a1)
- addu $a0,2
- addu $a1,4
- beqz $t4,1f
- lhu $t3,-2($a0)
- pixel $t3,$t4,$t1,0
- sh $t1,-2($a0)
-1: subu $a2,1
+ lw $t4,($a1)
+ addu $a0,2
+ addu $a1,4
+ beqz $t4,1f
+ lhu $t3,-2($a0)
+ pixel $t3,$t4,$t1,0
+ sh $t1,-2($a0)
+1: subu $a2,1
aligned:
- /* Check to see if its worth unrolling the loop */
- subu $a2,4
- bltz $a2,tail
+ /* Check to see if its worth unrolling the loop */
+ subu $a2,4
+ bltz $a2,tail
- /* Process 4 pixels at a time */
+ /* Process 4 pixels at a time */
fourpixels:
- /* 1st pair of pixels */
- lw $t4,0($a1)
- lw $t5,4($a1)
- addu $a0,8
- addu $a1,16
+ /* 1st pair of pixels */
+ lw $t4,0($a1)
+ lw $t5,4($a1)
+ addu $a0,8
+ addu $a1,16
- /* both are zero, skip this pair */
- or $t3,$t4,$t5
- beqz $t3,1f
+ /* both are zero, skip this pair */
+ or $t3,$t4,$t5
+ beqz $t3,1f
- /* load the destination */
- lw $t3,-8($a0)
+ /* load the destination */
+ lw $t3,-8($a0)
- pixel $t3,$t4,$t1,0
- pixel $t3,$t5,$t1,16
- sw $t1,-8($a0)
+ pixel $t3,$t4,$t1,0
+ andi $t1, 0xFFFF
+ pixel $t3,$t5,$t1,16
+ sw $t1,-8($a0)
1:
- /* 2nd pair of pixels */
- lw $t4,-8($a1)
- lw $t5,-4($a1)
+ /* 2nd pair of pixels */
+ lw $t4,-8($a1)
+ lw $t5,-4($a1)
- /* both are zero, skip this pair */
- or $t3,$t4,$t5
- beqz $t3,1f
+ /* both are zero, skip this pair */
+ or $t3,$t4,$t5
+ beqz $t3,1f
- /* load the destination */
- lw $t3,-4($a0)
+ /* load the destination */
+ lw $t3,-4($a0)
- pixel $t3,$t4,$t1,0
- pixel $t3,$t5,$t1,16
- sw $t1,-4($a0)
+ pixel $t3,$t4,$t1,0
+ andi $t1, 0xFFFF
+ pixel $t3,$t5,$t1,16
+ sw $t1,-4($a0)
-1: subu $a2,4
- bgtz $a2,fourpixels
+1: subu $a2,4
+ bgtz $a2,fourpixels
tail:
- /* the pixel count underran, restore it now */
- addu $a2,4
+ /* the pixel count underran, restore it now */
+ addu $a2,4
- /* handle the last 0..3 pixels */
- beqz $a2,done
+ /* handle the last 0..3 pixels */
+ beqz $a2,done
onepixel:
- lw $t4,($a1)
- addu $a0,2
- addu $a1,4
- beqz $t4,1f
- lhu $t3,-2($a0)
- pixel $t3,$t4,$t1,0
- sh $t1,-2($a0)
-1: subu $a2,1
- bnez $a2,onepixel
+ lw $t4,($a1)
+ addu $a0,2
+ addu $a1,4
+ beqz $t4,1f
+ lhu $t3,-2($a0)
+ pixel $t3,$t4,$t1,0
+ sh $t1,-2($a0)
+1: subu $a2,1
+ bnez $a2,onepixel
done:
-DBG .set push
-DBG .set mips32r2
-DBG rdhwr $a0,$3
-DBG mul $v0,$a0
-DBG mul $v1,$a0
-DBG .set pop
- j $ra
- .end scanline_t32cb16blend_mips
+DBG .set push
+DBG .set mips32r2
+DBG rdhwr $a0,$3
+DBG mul $v0,$a0
+DBG mul $v1,$a0
+DBG .set pop
+ j $ra
+ .end scanline_t32cb16blend_mips