Revert "Revert "Add MIPS64r6 support for libpixelflinger""
This reverts commit 7fd121788a892a0dfd4a9594304cad85fe366349.
Change-Id: Ic1204a8407c235b07c643764d5f2800631fecd72
diff --git a/libpixelflinger/arch-mips/col32cb16blend.S b/libpixelflinger/arch-mips/col32cb16blend.S
new file mode 100644
index 0000000..5d18e55
--- /dev/null
+++ b/libpixelflinger/arch-mips/col32cb16blend.S
@@ -0,0 +1,134 @@
+/*
+** Copyright 2015, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+** http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+ .macro pixel dreg src f sR sG sB shift
+
+#if __mips==32 && __mips_isa_rev>=2
+ /* extract red */
+ ext $t4,\src,\shift+11,5
+ mul $t4,$t4,\f
+
+ /* extract green */
+ ext $t5,\src,\shift+5,6
+ mul $t5,$t5,\f
+
+ /* extract blue */
+ ext $t6,\src,\shift,5
+ mul $t6,$t6,\f
+#else
+ /* extract red */
+ srl $t4,\src,\shift+11
+ andi $t4, 0x1f
+ mul $t4,$t4,\f
+
+ /* extract green */
+ srl $t5,\src,\shift+5
+ andi $t5, 0x3f
+ mul $t5,$t5,\f
+
+ /* extract blue */
+ srl $t6,\src,\shift
+ andi $t6, 0x1f
+ mul $t6,$t6,\f
+#endif
+
+ srl $t4,$t4,8
+ srl $t5,$t5,8
+ srl $t6,$t6,8
+ addu $t4,$t4,\sR
+ addu $t5,$t5,\sG
+ addu \dreg,$t6,\sB
+ sll $t4,$t4,11
+ sll $t5,$t5,5
+ or \dreg,\dreg,$t4
+ or \dreg,\dreg,$t5
+ andi \dreg, 0xffff
+ .endm
+
+ .text
+ .align
+
+ .global scanline_col32cb16blend_mips
+ .ent scanline_col32cb16blend_mips
+scanline_col32cb16blend_mips:
+
+ /* check if count is zero */
+ srl $v0,$a1,24 /* sA */
+ beqz $a2,done
+ li $t4, 0x100
+ srl $v1,$v0,7
+ addu $v0,$v1,$v0
+ subu $v0,$t4,$v0 /* f */
+#if __mips==32 && __mips_isa_rev>=2
+ ext $a3,$a1,3,5 /* sR */
+ ext $t0,$a1,10,6 /* sG */
+ ext $t1,$a1,19,5 /* sB */
+#else
+ srl $a3, $a1, 3
+ andi $a3, 0x1f /* sR */
+ srl $t0, $a1, 10
+ andi $t0, 0x3f /* sG */
+ srl $t1, $a1, 19
+ andi $t1, 0x1f /* sB */
+#endif
+
+ /* check if cnt is at least 4 */
+ addiu $a2,$a2,-4
+ bltz $a2,tail
+
+loop_4pixels:
+ lw $t7,0($a0)
+ lw $t8,4($a0)
+ addiu $a0,$a0,8
+ addiu $a2,$a2,-4
+ pixel $t2 $t7 $v0 $a3 $t0 $t1 0
+ pixel $t3 $t7 $v0 $a3 $t0 $t1 16
+#if __mips==32 && __mips_isa_rev>=2
+ ins $t2,$t3,16,16
+#else
+ sll $t3, 16
+ or $t2, $t2, $t3
+#endif
+ pixel $t7 $t8 $v0 $a3 $t0 $t1 0
+ pixel $t3 $t8 $v0 $a3 $t0 $t1 16
+#if __mips==32 && __mips_isa_rev>=2
+ ins $t7,$t3,16,16
+#else
+ sll $t3, 16
+ or $t7, $t7, $t3
+#endif
+ sw $t2,-8($a0)
+ sw $t7,-4($a0)
+ bgez $a2, loop_4pixels
+
+tail:
+ /* the pixel count underran, restore it now */
+ addiu $a2,$a2,4
+
+ /* handle the last 0..3 pixels */
+ beqz $a2,done
+
+loop_1pixel:
+ lhu $t7,0($a0)
+ addiu $a0,$a0,2
+ addiu $a2,$a2,-1
+ pixel $t2 $t7 $v0 $a3 $t0 $t1 0
+ sh $t2, -2($a0)
+ bnez $a2,loop_1pixel
+
+done:
+ j $ra
+ .end scanline_col32cb16blend_mips
diff --git a/libpixelflinger/arch-mips/t32cb16blend.S b/libpixelflinger/arch-mips/t32cb16blend.S
index c911fbb..236a2c9 100644
--- a/libpixelflinger/arch-mips/t32cb16blend.S
+++ b/libpixelflinger/arch-mips/t32cb16blend.S
@@ -33,232 +33,241 @@
*/
#if __mips==32 && __mips_isa_rev>=2
- .macro pixel dreg src fb shift
- /*
- * sA = s >> 24
- * f = 0x100 - (sA + (sA>>7))
- */
-DBG .set noat
-DBG rdhwr $at,$2
-DBG .set at
+ .macro pixel dreg src fb shift
+ /*
+ * sA = s >> 24
+ * f = 0x100 - (sA + (sA>>7))
+ */
+DBG .set noat
+DBG rdhwr $at,$2
+DBG .set at
- srl $t7,\src,24
- srl $t6,$t7,7
- addu $t7,$t6
- li $t6,0x100
- subu $t7,$t6,$t7
+ srl $t7,\src,24
+ srl $t6,$t7,7
+ addu $t7,$t6
+ li $t6,0x100
+ subu $t7,$t6,$t7
- /* red */
- ext $t8,\dreg,\shift+6+5,5 # dst[\shift:15..11]
- mul $t6,$t8,$t7
- ext $t0,\dreg,\shift+5,6 # start green extraction dst[\shift:10..5]
- ext $t8,\src,3,5 # src[7..3]
- srl $t6,8
- addu $t8,$t6
- ins \fb,$t8,\shift+6+5,5 # dst[\shift:15..11]
+ /* red */
+ ext $t8,\dreg,\shift+6+5,5 # dst[\shift:15..11]
+ mul $t6,$t8,$t7
+ ext $t0,\dreg,\shift+5,6 # start green extraction dst[\shift:10..5]
+ ext $t8,\src,3,5 # src[7..3]
+ srl $t6,8
+ addu $t8,$t6
+.if \shift!=0
+ sll $t8,\shift+11
+ or \fb,$t8
+.else
+ sll \fb,$t8,11
+.endif
- /* green */
- mul $t8,$t0,$t7
- ext $t0,\dreg,\shift,5 # start blue extraction dst[\shift:4..0]
- ext $t6,\src,2+8,6 # src[15..10]
- srl $t8,8
- addu $t8,$t6
+ /* green */
+ mul $t8,$t0,$t7
+ ext $t0,\dreg,\shift,5 # start blue extraction dst[\shift:4..0]
+ ext $t6,\src,2+8,6 # src[15..10]
+ srl $t8,8
+ addu $t8,$t6
- /* blue */
- mul $t0,$t0,$t7
- ins \fb,$t8,\shift+5,6 # finish green insertion dst[\shift:10..5]
- ext $t6,\src,(3+8+8),5
- srl $t8,$t0,8
- addu $t8,$t6
- ins \fb,$t8,\shift,5
+ /* blue */
+ mul $t0,$t0,$t7
+ sll $t8, $t8, \shift+5
+ or \fb, \fb, $t8
+ ext $t6,\src,(3+8+8),5
+ srl $t8,$t0,8
+ addu $t8,$t6
+ sll $t8, $t8, \shift
+ or \fb, \fb, $t8
-DBG .set noat
-DBG rdhwr $t8,$2
-DBG subu $t8,$at
-DBG sltu $at,$t8,$v0
-DBG movn $v0,$t8,$at
-DBG sgtu $at,$t8,$v1
-DBG movn $v1,$t8,$at
-DBG .set at
- .endm
+DBG .set noat
+DBG rdhwr $t8,$2
+DBG subu $t8,$at
+DBG sltu $at,$t8,$v0
+DBG movn $v0,$t8,$at
+DBG sgtu $at,$t8,$v1
+DBG movn $v1,$t8,$at
+DBG .set at
+ .endm
#else
- .macro pixel dreg src fb shift
- /*
- * sA = s >> 24
- * f = 0x100 - (sA + (sA>>7))
- */
-DBG .set push
-DBG .set noat
-DBG .set mips32r2
-DBG rdhwr $at,$2
-DBG .set pop
+ .macro pixel dreg src fb shift
+ /*
+ * sA = s >> 24
+ * f = 0x100 - (sA + (sA>>7))
+ */
+DBG .set push
+DBG .set noat
+DBG .set mips32r2
+DBG rdhwr $at,$2
+DBG .set pop
- srl $t7,\src,24
- srl $t6,$t7,7
- addu $t7,$t6
- li $t6,0x100
- subu $t7,$t6,$t7
+ srl $t7,\src,24
+ srl $t6,$t7,7
+ addu $t7,$t6
+ li $t6,0x100
+ subu $t7,$t6,$t7
- /*
- * red
- * dR = (d >> (6 + 5)) & 0x1f;
- * dR = (f*dR)>>8
- * sR = (s >> ( 3)) & 0x1f;
- * sR += dR
- * fb |= sR << 11
- */
- srl $t8,\dreg,\shift+6+5
+ /*
+ * red
+ * dR = (d >> (6 + 5)) & 0x1f;
+ * dR = (f*dR)>>8
+ * sR = (s >> ( 3)) & 0x1f;
+ * sR += dR
+ * fb |= sR << 11
+ */
+ srl $t8,\dreg,\shift+6+5
.if \shift==0
- and $t8,0x1f
+ and $t8,0x1f
.endif
- mul $t8,$t8,$t7
- srl $t6,\src,3
- and $t6,0x1f
- srl $t8,8
- addu $t8,$t6
+ mul $t8,$t8,$t7
+ srl $t6,\src,3
+ and $t6,0x1f
+ srl $t8,8
+ addu $t8,$t6
.if \shift!=0
- sll $t8,\shift+11
- or \fb,$t8
+ sll $t8,\shift+11
+ or \fb,$t8
.else
- sll \fb,$t8,11
+ sll \fb,$t8,11
.endif
/*
- * green
- * dG = (d >> 5) & 0x3f
- * dG = (f*dG) >> 8
- * sG = (s >> ( 8+2))&0x3F;
- */
- srl $t8,\dreg,\shift+5
- and $t8,0x3f
- mul $t8,$t8,$t7
- srl $t6,\src,8+2
- and $t6,0x3f
- srl $t8,8
- addu $t8,$t6
- sll $t8,\shift + 5
- or \fb,$t8
+ * green
+ * dG = (d >> 5) & 0x3f
+ * dG = (f*dG) >> 8
+ * sG = (s >> ( 8+2))&0x3F;
+ */
+ srl $t8,\dreg,\shift+5
+ and $t8,0x3f
+ mul $t8,$t8,$t7
+ srl $t6,\src,8+2
+ and $t6,0x3f
+ srl $t8,8
+ addu $t8,$t6
+ sll $t8,\shift + 5
+ or \fb,$t8
- /* blue */
+ /* blue */
.if \shift!=0
- srl $t8,\dreg,\shift
- and $t8,0x1f
+ srl $t8,\dreg,\shift
+ and $t8,0x1f
.else
- and $t8,\dreg,0x1f
+ and $t8,\dreg,0x1f
.endif
- mul $t8,$t8,$t7
- srl $t6,\src,(8+8+3)
- and $t6,0x1f
- srl $t8,8
- addu $t8,$t6
+ mul $t8,$t8,$t7
+ srl $t6,\src,(8+8+3)
+ and $t6,0x1f
+ srl $t8,8
+ addu $t8,$t6
.if \shift!=0
- sll $t8,\shift
+ sll $t8,\shift
.endif
- or \fb,$t8
-DBG .set push
-DBG .set noat
-DBG .set mips32r2
-DBG rdhwr $t8,$2
-DBG subu $t8,$at
-DBG sltu $at,$t8,$v0
-DBG movn $v0,$t8,$at
-DBG sgtu $at,$t8,$v1
-DBG movn $v1,$t8,$at
-DBG .set pop
- .endm
+ or \fb,$t8
+DBG .set push
+DBG .set noat
+DBG .set mips32r2
+DBG rdhwr $t8,$2
+DBG subu $t8,$at
+DBG sltu $at,$t8,$v0
+DBG movn $v0,$t8,$at
+DBG sgtu $at,$t8,$v1
+DBG movn $v1,$t8,$at
+DBG .set pop
+ .endm
#endif
- .text
- .align
+ .text
+ .align
- .global scanline_t32cb16blend_mips
- .ent scanline_t32cb16blend_mips
+ .global scanline_t32cb16blend_mips
+ .ent scanline_t32cb16blend_mips
scanline_t32cb16blend_mips:
-DBG li $v0,0xffffffff
-DBG li $v1,0
- /* Align the destination if necessary */
- and $t0,$a0,3
- beqz $t0,aligned
+DBG li $v0,0xffffffff
+DBG li $v1,0
+ /* Align the destination if necessary */
+ and $t0,$a0,3
+ beqz $t0,aligned
- /* as long as there is at least one pixel */
- beqz $a2,done
+ /* as long as there is at least one pixel */
+ beqz $a2,done
- lw $t4,($a1)
- addu $a0,2
- addu $a1,4
- beqz $t4,1f
- lhu $t3,-2($a0)
- pixel $t3,$t4,$t1,0
- sh $t1,-2($a0)
-1: subu $a2,1
+ lw $t4,($a1)
+ addu $a0,2
+ addu $a1,4
+ beqz $t4,1f
+ lhu $t3,-2($a0)
+ pixel $t3,$t4,$t1,0
+ sh $t1,-2($a0)
+1: subu $a2,1
aligned:
- /* Check to see if its worth unrolling the loop */
- subu $a2,4
- bltz $a2,tail
+ /* Check to see if its worth unrolling the loop */
+ subu $a2,4
+ bltz $a2,tail
- /* Process 4 pixels at a time */
+ /* Process 4 pixels at a time */
fourpixels:
- /* 1st pair of pixels */
- lw $t4,0($a1)
- lw $t5,4($a1)
- addu $a0,8
- addu $a1,16
+ /* 1st pair of pixels */
+ lw $t4,0($a1)
+ lw $t5,4($a1)
+ addu $a0,8
+ addu $a1,16
- /* both are zero, skip this pair */
- or $t3,$t4,$t5
- beqz $t3,1f
+ /* both are zero, skip this pair */
+ or $t3,$t4,$t5
+ beqz $t3,1f
- /* load the destination */
- lw $t3,-8($a0)
+ /* load the destination */
+ lw $t3,-8($a0)
- pixel $t3,$t4,$t1,0
- pixel $t3,$t5,$t1,16
- sw $t1,-8($a0)
+ pixel $t3,$t4,$t1,0
+ andi $t1, 0xFFFF
+ pixel $t3,$t5,$t1,16
+ sw $t1,-8($a0)
1:
- /* 2nd pair of pixels */
- lw $t4,-8($a1)
- lw $t5,-4($a1)
+ /* 2nd pair of pixels */
+ lw $t4,-8($a1)
+ lw $t5,-4($a1)
- /* both are zero, skip this pair */
- or $t3,$t4,$t5
- beqz $t3,1f
+ /* both are zero, skip this pair */
+ or $t3,$t4,$t5
+ beqz $t3,1f
- /* load the destination */
- lw $t3,-4($a0)
+ /* load the destination */
+ lw $t3,-4($a0)
- pixel $t3,$t4,$t1,0
- pixel $t3,$t5,$t1,16
- sw $t1,-4($a0)
+ pixel $t3,$t4,$t1,0
+ andi $t1, 0xFFFF
+ pixel $t3,$t5,$t1,16
+ sw $t1,-4($a0)
-1: subu $a2,4
- bgtz $a2,fourpixels
+1: subu $a2,4
+ bgtz $a2,fourpixels
tail:
- /* the pixel count underran, restore it now */
- addu $a2,4
+ /* the pixel count underran, restore it now */
+ addu $a2,4
- /* handle the last 0..3 pixels */
- beqz $a2,done
+ /* handle the last 0..3 pixels */
+ beqz $a2,done
onepixel:
- lw $t4,($a1)
- addu $a0,2
- addu $a1,4
- beqz $t4,1f
- lhu $t3,-2($a0)
- pixel $t3,$t4,$t1,0
- sh $t1,-2($a0)
-1: subu $a2,1
- bnez $a2,onepixel
+ lw $t4,($a1)
+ addu $a0,2
+ addu $a1,4
+ beqz $t4,1f
+ lhu $t3,-2($a0)
+ pixel $t3,$t4,$t1,0
+ sh $t1,-2($a0)
+1: subu $a2,1
+ bnez $a2,onepixel
done:
-DBG .set push
-DBG .set mips32r2
-DBG rdhwr $a0,$3
-DBG mul $v0,$a0
-DBG mul $v1,$a0
-DBG .set pop
- j $ra
- .end scanline_t32cb16blend_mips
+DBG .set push
+DBG .set mips32r2
+DBG rdhwr $a0,$3
+DBG mul $v0,$a0
+DBG mul $v1,$a0
+DBG .set pop
+ j $ra
+ .end scanline_t32cb16blend_mips