libpixelflinger/arch-mips/t32cb16blend.S - android_system_core - Gitiles

 /* libs/pixelflinger/t32cb16blend.S
 **
 ** Copyright 2010, The Android Open Source Project
 **
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 **
 **     http://www.apache.org/licenses/LICENSE-2.0
 **
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.
 */

 #ifdef DEBUG
 #define DBG
 #else
 #define DBG #
 #endif

 /*
  * blend one of 2 16bpp RGB pixels held in dreg selected by shift
  * with the 32bpp ABGR pixel held in src and store the result in fb
  *
  * Assumes that the dreg data is little endian and that
  * the the second pixel (shift==16) will be merged into
  * the fb result
  *
  * Uses $t0,$t6,$t7,$t8
  */

 #if __mips==32 && __mips_isa_rev>=2
     .macro pixel dreg src fb shift
     /*
      * sA = s >> 24
      * f = 0x100 - (sA + (sA>>7))
      */
 DBG .set    noat
 DBG rdhwr   $at,$2
 DBG .set    at

     srl  $t7,\src,24
     srl  $t6,$t7,7
     addu $t7,$t6
     li   $t6,0x100
     subu $t7,$t6,$t7

     /* red */
     ext  $t8,\dreg,\shift+6+5,5         # dst[\shift:15..11]
     mul  $t6,$t8,$t7
     ext  $t0,\dreg,\shift+5,6           # start green extraction dst[\shift:10..5]
     ext  $t8,\src,3,5               # src[7..3]
     srl  $t6,8
     addu $t8,$t6
 .if \shift!=0
     sll  $t8,\shift+11
     or   \fb,$t8
 .else
     sll  \fb,$t8,11
 .endif

     /* green */
     mul  $t8,$t0,$t7
     ext  $t0,\dreg,\shift,5         # start blue extraction dst[\shift:4..0]
     ext  $t6,\src,2+8,6             # src[15..10]
     srl  $t8,8
     addu $t8,$t6

     /* blue */
     mul  $t0,$t0,$t7
     sll  $t8, $t8, \shift+5
     or   \fb, \fb, $t8
     ext  $t6,\src,(3+8+8),5
     srl  $t8,$t0,8
     addu $t8,$t6
     sll  $t8, $t8, \shift
     or   \fb, \fb, $t8

 DBG .set    noat
 DBG rdhwr $t8,$2
 DBG subu  $t8,$at
 DBG sltu  $at,$t8,$v0
 DBG movn  $v0,$t8,$at
 DBG sgtu  $at,$t8,$v1
 DBG movn  $v1,$t8,$at
 DBG .set    at
     .endm

 #else

     .macro pixel dreg src fb shift
     /*
      * sA = s >> 24
      * f = 0x100 - (sA + (sA>>7))
      */
 DBG .set    push
 DBG .set    noat
 DBG .set    mips32r2
 DBG rdhwr   $at,$2
 DBG .set    pop

     srl  $t7,\src,24
     srl  $t6,$t7,7
     addu $t7,$t6
     li   $t6,0x100
     subu $t7,$t6,$t7

     /*
      * red
      * dR = (d >> (6 + 5)) & 0x1f;
      * dR = (f*dR)>>8
      * sR = (s >> (   3)) & 0x1f;
      * sR += dR
      * fb |= sR << 11
      */
     srl  $t8,\dreg,\shift+6+5
 .if \shift==0
     and  $t8,0x1f
 .endif
     mul  $t8,$t8,$t7
     srl  $t6,\src,3
     and  $t6,0x1f
     srl  $t8,8
     addu $t8,$t6
 .if \shift!=0
     sll  $t8,\shift+11
     or   \fb,$t8
 .else
     sll  \fb,$t8,11
 .endif

         /*
      * green
      * dG = (d >> 5) & 0x3f
      * dG = (f*dG) >> 8
      * sG = (s >> ( 8+2))&0x3F;
      */
     srl  $t8,\dreg,\shift+5
     and  $t8,0x3f
     mul  $t8,$t8,$t7
     srl  $t6,\src,8+2
     and  $t6,0x3f
     srl  $t8,8
     addu $t8,$t6
     sll  $t8,\shift + 5
     or   \fb,$t8

     /* blue */
 .if \shift!=0
     srl  $t8,\dreg,\shift
     and  $t8,0x1f
 .else
     and  $t8,\dreg,0x1f
 .endif
     mul  $t8,$t8,$t7
     srl  $t6,\src,(8+8+3)
     and  $t6,0x1f
     srl  $t8,8
     addu $t8,$t6
 .if \shift!=0
     sll  $t8,\shift
 .endif
     or   \fb,$t8
 DBG .set    push
 DBG .set    noat
 DBG .set    mips32r2
 DBG rdhwr   $t8,$2
 DBG subu    $t8,$at
 DBG sltu    $at,$t8,$v0
 DBG movn    $v0,$t8,$at
 DBG sgtu    $at,$t8,$v1
 DBG movn    $v1,$t8,$at
 DBG .set    pop
     .endm
 #endif

     .text
     .balign 4

     .global scanline_t32cb16blend_mips
     .ent    scanline_t32cb16blend_mips
 scanline_t32cb16blend_mips:
 DBG li    $v0,0xffffffff
 DBG li    $v1,0
     /* Align the destination if necessary */
     and   $t0,$a0,3
     beqz  $t0,aligned

     /* as long as there is at least one pixel */
     beqz  $a2,done

     lw    $t4,($a1)
     addu  $a0,2
     addu  $a1,4
     beqz  $t4,1f
     lhu   $t3,-2($a0)
     pixel $t3,$t4,$t1,0
     sh    $t1,-2($a0)
 1:  subu  $a2,1

 aligned:
     /* Check to see if its worth unrolling the loop */
     subu  $a2,4
     bltz  $a2,tail

     /* Process 4 pixels at a time */
 fourpixels:
     /* 1st pair of pixels */
     lw    $t4,0($a1)
     lw    $t5,4($a1)
     addu  $a0,8
     addu  $a1,16

     /* both are zero, skip this pair */
     or    $t3,$t4,$t5
     beqz  $t3,1f

     /* load the destination */
     lw    $t3,-8($a0)

     pixel $t3,$t4,$t1,0
     andi  $t1, 0xFFFF
     pixel $t3,$t5,$t1,16
     sw    $t1,-8($a0)

 1:
     /* 2nd pair of pixels */
     lw    $t4,-8($a1)
     lw    $t5,-4($a1)

     /* both are zero, skip this pair */
     or    $t3,$t4,$t5
     beqz  $t3,1f

     /* load the destination */
     lw    $t3,-4($a0)

     pixel $t3,$t4,$t1,0
     andi  $t1, 0xFFFF
     pixel $t3,$t5,$t1,16
     sw    $t1,-4($a0)

 1:  subu  $a2,4
     bgtz  $a2,fourpixels

 tail:
     /* the pixel count underran, restore it now */
     addu  $a2,4

     /* handle the last 0..3 pixels */
     beqz  $a2,done
 onepixel:
     lw    $t4,($a1)
     addu  $a0,2
     addu  $a1,4
     beqz  $t4,1f
     lhu   $t3,-2($a0)
     pixel $t3,$t4,$t1,0
     sh    $t1,-2($a0)
 1:  subu  $a2,1
     bnez  $a2,onepixel
 done:
 DBG .set    push
 DBG .set    mips32r2
 DBG rdhwr   $a0,$3
 DBG mul     $v0,$a0
 DBG mul     $v1,$a0
 DBG .set    pop
     j     $ra
     .end    scanline_t32cb16blend_mips
	/* libs/pixelflinger/t32cb16blend.S
	**
	** Copyright 2010, The Android Open Source Project
	**
	** Licensed under the Apache License, Version 2.0 (the "License");
	** you may not use this file except in compliance with the License.
	** You may obtain a copy of the License at
	**
	** http://www.apache.org/licenses/LICENSE-2.0
	**
	** Unless required by applicable law or agreed to in writing, software
	** distributed under the License is distributed on an "AS IS" BASIS,
	** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	** See the License for the specific language governing permissions and
	** limitations under the License.
	*/

	#ifdef DEBUG
	#define DBG
	#else
	#define DBG #
	#endif

	/*
	* blend one of 2 16bpp RGB pixels held in dreg selected by shift
	* with the 32bpp ABGR pixel held in src and store the result in fb
	*
	* Assumes that the dreg data is little endian and that
	* the the second pixel (shift==16) will be merged into
	* the fb result
	*
	* Uses $t0,$t6,$t7,$t8
	*/

	#if __mips==32 && __mips_isa_rev>=2
	.macro pixel dreg src fb shift
	/*
	* sA = s >> 24
	* f = 0x100 - (sA + (sA>>7))
	*/
	DBG .set noat
	DBG rdhwr $at,$2
	DBG .set at

	srl $t7,\src,24
	srl $t6,$t7,7
	addu $t7,$t6
	li $t6,0x100
	subu $t7,$t6,$t7

	/* red */
	ext $t8,\dreg,\shift+6+5,5 # dst[\shift:15..11]
	mul $t6,$t8,$t7
	ext $t0,\dreg,\shift+5,6 # start green extraction dst[\shift:10..5]
	ext $t8,\src,3,5 # src[7..3]
	srl $t6,8
	addu $t8,$t6
	.if \shift!=0
	sll $t8,\shift+11
	or \fb,$t8
	.else
	sll \fb,$t8,11
	.endif

	/* green */
	mul $t8,$t0,$t7
	ext $t0,\dreg,\shift,5 # start blue extraction dst[\shift:4..0]
	ext $t6,\src,2+8,6 # src[15..10]
	srl $t8,8
	addu $t8,$t6

	/* blue */
	mul $t0,$t0,$t7
	sll $t8, $t8, \shift+5
	or \fb, \fb, $t8
	ext $t6,\src,(3+8+8),5
	srl $t8,$t0,8
	addu $t8,$t6
	sll $t8, $t8, \shift
	or \fb, \fb, $t8

	DBG .set noat
	DBG rdhwr $t8,$2
	DBG subu $t8,$at
	DBG sltu $at,$t8,$v0
	DBG movn $v0,$t8,$at
	DBG sgtu $at,$t8,$v1
	DBG movn $v1,$t8,$at
	DBG .set at
	.endm

	#else

	.macro pixel dreg src fb shift
	/*
	* sA = s >> 24
	* f = 0x100 - (sA + (sA>>7))
	*/
	DBG .set push
	DBG .set noat
	DBG .set mips32r2
	DBG rdhwr $at,$2
	DBG .set pop

	srl $t7,\src,24
	srl $t6,$t7,7
	addu $t7,$t6
	li $t6,0x100
	subu $t7,$t6,$t7

	/*
	* red
	* dR = (d >> (6 + 5)) & 0x1f;
	* dR = (f*dR)>>8
	* sR = (s >> ( 3)) & 0x1f;
	* sR += dR
	* fb \|= sR << 11
	*/
	srl $t8,\dreg,\shift+6+5
	.if \shift==0
	and $t8,0x1f
	.endif
	mul $t8,$t8,$t7
	srl $t6,\src,3
	and $t6,0x1f
	srl $t8,8
	addu $t8,$t6
	.if \shift!=0
	sll $t8,\shift+11
	or \fb,$t8
	.else
	sll \fb,$t8,11
	.endif

	/*
	* green
	* dG = (d >> 5) & 0x3f
	* dG = (f*dG) >> 8
	* sG = (s >> ( 8+2))&0x3F;
	*/
	srl $t8,\dreg,\shift+5
	and $t8,0x3f
	mul $t8,$t8,$t7
	srl $t6,\src,8+2
	and $t6,0x3f
	srl $t8,8
	addu $t8,$t6
	sll $t8,\shift + 5
	or \fb,$t8

	/* blue */
	.if \shift!=0
	srl $t8,\dreg,\shift
	and $t8,0x1f
	.else
	and $t8,\dreg,0x1f
	.endif
	mul $t8,$t8,$t7
	srl $t6,\src,(8+8+3)
	and $t6,0x1f
	srl $t8,8
	addu $t8,$t6
	.if \shift!=0
	sll $t8,\shift
	.endif
	or \fb,$t8
	DBG .set push
	DBG .set noat
	DBG .set mips32r2
	DBG rdhwr $t8,$2
	DBG subu $t8,$at
	DBG sltu $at,$t8,$v0
	DBG movn $v0,$t8,$at
	DBG sgtu $at,$t8,$v1
	DBG movn $v1,$t8,$at
	DBG .set pop
	.endm
	#endif

	.text
	.balign 4

	.global scanline_t32cb16blend_mips
	.ent scanline_t32cb16blend_mips
	scanline_t32cb16blend_mips:
	DBG li $v0,0xffffffff
	DBG li $v1,0
	/* Align the destination if necessary */
	and $t0,$a0,3
	beqz $t0,aligned

	/* as long as there is at least one pixel */
	beqz $a2,done

	lw $t4,($a1)
	addu $a0,2
	addu $a1,4
	beqz $t4,1f
	lhu $t3,-2($a0)
	pixel $t3,$t4,$t1,0
	sh $t1,-2($a0)
	1: subu $a2,1

	aligned:
	/* Check to see if its worth unrolling the loop */
	subu $a2,4
	bltz $a2,tail

	/* Process 4 pixels at a time */
	fourpixels:
	/* 1st pair of pixels */
	lw $t4,0($a1)
	lw $t5,4($a1)
	addu $a0,8
	addu $a1,16

	/* both are zero, skip this pair */
	or $t3,$t4,$t5
	beqz $t3,1f

	/* load the destination */
	lw $t3,-8($a0)

	pixel $t3,$t4,$t1,0
	andi $t1, 0xFFFF
	pixel $t3,$t5,$t1,16
	sw $t1,-8($a0)

	1:
	/* 2nd pair of pixels */
	lw $t4,-8($a1)
	lw $t5,-4($a1)

	/* both are zero, skip this pair */
	or $t3,$t4,$t5
	beqz $t3,1f

	/* load the destination */
	lw $t3,-4($a0)

	pixel $t3,$t4,$t1,0
	andi $t1, 0xFFFF
	pixel $t3,$t5,$t1,16
	sw $t1,-4($a0)

	1: subu $a2,4
	bgtz $a2,fourpixels

	tail:
	/* the pixel count underran, restore it now */
	addu $a2,4

	/* handle the last 0..3 pixels */
	beqz $a2,done
	onepixel:
	lw $t4,($a1)
	addu $a0,2
	addu $a1,4
	beqz $t4,1f
	lhu $t3,-2($a0)
	pixel $t3,$t4,$t1,0
	sh $t1,-2($a0)
	1: subu $a2,1
	bnez $a2,onepixel
	done:
	DBG .set push
	DBG .set mips32r2
	DBG rdhwr $a0,$3
	DBG mul $v0,$a0
	DBG mul $v1,$a0
	DBG .set pop
	j $ra
	.end scanline_t32cb16blend_mips