Blame - libpixelflinger/codeflinger/blending.cpp - android_system_core

blob: b20219ced4ff83ad89b56f1c55c74bd7047ebb1d [file] [log] [blame]

The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	1	/* libs/pixelflinger/codeflinger/blending.cpp
				2	**
				3	** Copyright 2006, The Android Open Source Project
				4	**
				5	** Licensed under the Apache License, Version 2.0 (the "License");
				6	** you may not use this file except in compliance with the License.
				7	** You may obtain a copy of the License at
				8	**
				9	** http://www.apache.org/licenses/LICENSE-2.0
				10	**
				11	** Unless required by applicable law or agreed to in writing, software
				12	** distributed under the License is distributed on an "AS IS" BASIS,
				13	** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	** See the License for the specific language governing permissions and
				15	** limitations under the License.
				16	*/
				17
				18	#include <assert.h>
				19	#include <stdint.h>
				20	#include <stdlib.h>
				21	#include <stdio.h>
				22	#include <sys/types.h>
				23
				24	#include <cutils/log.h>
				25
Mathias Agopian	9857d99	2013-04-01 15:17:55 -0700	[diff] [blame]	26	#include "GGLAssembler.h"
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	27
				28
				29	namespace android {
				30
				31	void GGLAssembler::build_fog(
				32	component_t& temp, // incomming fragment / output
				33	int component,
				34	Scratch& regs)
				35	{
				36	if (mInfo[component].fog) {
				37	Scratch scratches(registerFile());
				38	comment("fog");
				39
				40	integer_t fragment(temp.reg, temp.h, temp.flags);
				41	if (!(temp.flags & CORRUPTIBLE)) {
				42	temp.reg = regs.obtain();
				43	temp.flags \|= CORRUPTIBLE;
				44	}
				45
				46	integer_t fogColor(scratches.obtain(), 8, CORRUPTIBLE);
				47	LDRB(AL, fogColor.reg, mBuilderContext.Rctx,
				48	immed12_pre(GGL_OFFSETOF(state.fog.color[component])));
				49
				50	integer_t factor(scratches.obtain(), 16, CORRUPTIBLE);
				51	CONTEXT_LOAD(factor.reg, generated_vars.f);
				52
				53	// clamp fog factor (TODO: see if there is a way to guarantee
				54	// we won't overflow, when setting the iterators)
				55	BIC(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, ASR, 31));
				56	CMP(AL, factor.reg, imm( 0x10000 ));
				57	MOV(HS, 0, factor.reg, imm( 0x10000 ));
				58
				59	build_blendFOneMinusF(temp, factor, fragment, fogColor);
				60	}
				61	}
				62
				63	void GGLAssembler::build_blending(
				64	component_t& temp, // incomming fragment / output
				65	const pixel_t& pixel, // framebuffer
				66	int component,
				67	Scratch& regs)
				68	{
				69	if (!mInfo[component].blend)
				70	return;
				71
				72	int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
				73	int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
				74	if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA)
				75	fs = GGL_ONE;
				76	const int blending = blending_codes(fs, fd);
				77	if (!temp.size()) {
				78	// here, blending will produce something which doesn't depend on
				79	// that component (eg: GL_ZERO:GL_*), so the register has not been
				80	// allocated yet. Will never be used as a source.
				81	temp = component_t(regs.obtain(), CORRUPTIBLE);
				82	}
				83
				84	// we are doing real blending...
				85	// fb: extracted dst
				86	// fragment: extracted src
				87	// temp: component_t(fragment) and result
				88
				89	// scoped register allocator
				90	Scratch scratches(registerFile());
				91	comment("blending");
				92
				93	// we can optimize these cases a bit...
				94	// (1) saturation is not needed
				95	// (2) we can use only one multiply instead of 2
				96	// (3) we can reduce the register pressure
				97	// R = Sf + D(1-f) = (S-D)*f + D
				98	// R = S(1-f) + Df = (D-S)*f + S
				99
				100	const bool same_factor_opt1 =
				101	(fs==GGL_DST_COLOR && fd==GGL_ONE_MINUS_DST_COLOR) \|\|
				102	(fs==GGL_SRC_COLOR && fd==GGL_ONE_MINUS_SRC_COLOR) \|\|
				103	(fs==GGL_DST_ALPHA && fd==GGL_ONE_MINUS_DST_ALPHA) \|\|
				104	(fs==GGL_SRC_ALPHA && fd==GGL_ONE_MINUS_SRC_ALPHA);
				105
				106	const bool same_factor_opt2 =
				107	(fs==GGL_ONE_MINUS_DST_COLOR && fd==GGL_DST_COLOR) \|\|
				108	(fs==GGL_ONE_MINUS_SRC_COLOR && fd==GGL_SRC_COLOR) \|\|
				109	(fs==GGL_ONE_MINUS_DST_ALPHA && fd==GGL_DST_ALPHA) \|\|
				110	(fs==GGL_ONE_MINUS_SRC_ALPHA && fd==GGL_SRC_ALPHA);
				111
				112
				113	// XXX: we could also optimize these cases:
				114	// R = Sf + Df = (S+D)*f
				115	// R = S(1-f) + D(1-f) = (S+D)*(1-f)
				116	// R = SD + DS = 2SD
				117
				118
				119	// see if we need to extract 'component' from the destination (fb)
				120	integer_t fb;
				121	if (blending & (BLEND_DST\|FACTOR_DST)) {
				122	fb.setTo(scratches.obtain(), 32);
				123	extract(fb, pixel, component);
				124	if (mDithering) {
				125	// XXX: maybe what we should do instead, is simply
				126	// expand fb -or- fragment to the larger of the two
				127	if (fb.size() < temp.size()) {
				128	// for now we expand 'fb' to min(fragment, 8)
				129	int new_size = temp.size() < 8 ? temp.size() : 8;
				130	expand(fb, fb, new_size);
				131	}
				132	}
				133	}
				134
				135
				136	// convert input fragment to integer_t
				137	if (temp.l && (temp.flags & CORRUPTIBLE)) {
				138	MOV(AL, 0, temp.reg, reg_imm(temp.reg, LSR, temp.l));
				139	temp.h -= temp.l;
				140	temp.l = 0;
				141	}
				142	integer_t fragment(temp.reg, temp.size(), temp.flags);
				143
				144	// if not done yet, convert input fragment to integer_t
				145	if (temp.l) {
				146	// here we know temp is not CORRUPTIBLE
				147	fragment.reg = scratches.obtain();
				148	MOV(AL, 0, fragment.reg, reg_imm(temp.reg, LSR, temp.l));
				149	fragment.flags \|= CORRUPTIBLE;
				150	}
				151
				152	if (!(temp.flags & CORRUPTIBLE)) {
				153	// temp is not corruptible, but since it's the destination it
				154	// will be modified, so we need to allocate a new register.
				155	temp.reg = regs.obtain();
				156	temp.flags &= ~CORRUPTIBLE;
				157	fragment.flags &= ~CORRUPTIBLE;
				158	}
				159
				160	if ((blending & BLEND_SRC) && !same_factor_opt1) {
				161	// source (fragment) is needed for the blending stage
				162	// so it's not CORRUPTIBLE (unless we're doing same_factor_opt1)
				163	fragment.flags &= ~CORRUPTIBLE;
				164	}
				165
				166
				167	if (same_factor_opt1) {
				168	// R = Sf + D(1-f) = (S-D)*f + D
				169	integer_t factor;
				170	build_blend_factor(factor, fs,
				171	component, pixel, fragment, fb, scratches);
				172	// fb is always corruptible from this point
				173	fb.flags \|= CORRUPTIBLE;
				174	build_blendFOneMinusF(temp, factor, fragment, fb);
				175	} else if (same_factor_opt2) {
				176	// R = S(1-f) + Df = (D-S)*f + S
				177	integer_t factor;
				178	// fb is always corrruptible here
				179	fb.flags \|= CORRUPTIBLE;
				180	build_blend_factor(factor, fd,
				181	component, pixel, fragment, fb, scratches);
				182	build_blendOneMinusFF(temp, factor, fragment, fb);
				183	} else {
				184	integer_t src_factor;
				185	integer_t dst_factor;
				186
				187	// if destination (fb) is not needed for the blending stage,
				188	// then it can be marked as CORRUPTIBLE
				189	if (!(blending & BLEND_DST)) {
				190	fb.flags \|= CORRUPTIBLE;
				191	}
				192
				193	// XXX: try to mark some registers as CORRUPTIBLE
				194	// in most case we could make those corruptible
				195	// when we're processing the last component
				196	// but not always, for instance
				197	// when fragment is constant and not reloaded
				198	// when fb is needed for logic-ops or masking
				199	// when a register is aliased (for instance with mAlphaSource)
				200
				201	// blend away...
				202	if (fs==GGL_ZERO) {
				203	if (fd==GGL_ZERO) { // R = 0
				204	// already taken care of
				205	} else if (fd==GGL_ONE) { // R = D
				206	// already taken care of
				207	} else { // R = D*fd
				208	// compute fd
				209	build_blend_factor(dst_factor, fd,
				210	component, pixel, fragment, fb, scratches);
				211	mul_factor(temp, fb, dst_factor);
				212	}
				213	} else if (fs==GGL_ONE) {
				214	if (fd==GGL_ZERO) { // R = S
				215	// NOP, taken care of
				216	} else if (fd==GGL_ONE) { // R = S + D
				217	component_add(temp, fb, fragment); // args order matters
				218	component_sat(temp);
				219	} else { // R = S + D*fd
				220	// compute fd
				221	build_blend_factor(dst_factor, fd,
				222	component, pixel, fragment, fb, scratches);
				223	mul_factor_add(temp, fb, dst_factor, component_t(fragment));
Mathias Agopian	665a222	2009-08-07 13:01:46 -0700	[diff] [blame]	224	component_sat(temp);
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	225	}
				226	} else {
				227	// compute fs
				228	build_blend_factor(src_factor, fs,
				229	component, pixel, fragment, fb, scratches);
				230	if (fd==GGL_ZERO) { // R = S*fs
				231	mul_factor(temp, fragment, src_factor);
				232	} else if (fd==GGL_ONE) { // R = S*fs + D
				233	mul_factor_add(temp, fragment, src_factor, component_t(fb));
				234	component_sat(temp);
				235	} else { // R = Sfs + Dfd
				236	mul_factor(temp, fragment, src_factor);
				237	if (scratches.isUsed(src_factor.reg))
				238	scratches.recycle(src_factor.reg);
				239	// compute fd
				240	build_blend_factor(dst_factor, fd,
				241	component, pixel, fragment, fb, scratches);
				242	mul_factor_add(temp, fb, dst_factor, temp);
				243	if (!same_factor_opt1 && !same_factor_opt2) {
				244	component_sat(temp);
				245	}
				246	}
				247	}
				248	}
				249
				250	// now we can be corrupted (it's the dest)
				251	temp.flags \|= CORRUPTIBLE;
				252	}
				253
				254	void GGLAssembler::build_blend_factor(
				255	integer_t& factor, int f, int component,
				256	const pixel_t& dst_pixel,
				257	integer_t& fragment,
				258	integer_t& fb,
				259	Scratch& scratches)
				260	{
				261	integer_t src_alpha(fragment);
				262
				263	// src_factor/dst_factor won't be used after blending,
				264	// so it's fine to mark them as CORRUPTIBLE (if not aliased)
				265	factor.flags \|= CORRUPTIBLE;
				266
				267	switch(f) {
				268	case GGL_ONE_MINUS_SRC_ALPHA:
				269	case GGL_SRC_ALPHA:
				270	if (component==GGLFormat::ALPHA && !isAlphaSourceNeeded()) {
				271	// we're processing alpha, so we already have
				272	// src-alpha in fragment, and we need src-alpha just this time.
				273	} else {
				274	// alpha-src will be needed for other components
				275	if (!mBlendFactorCached \|\| mBlendFactorCached==f) {
				276	src_alpha = mAlphaSource;
				277	factor = mAlphaSource;
				278	factor.flags &= ~CORRUPTIBLE;
				279	// we already computed the blend factor before, nothing to do.
				280	if (mBlendFactorCached)
				281	return;
				282	// this is the first time, make sure to compute the blend
				283	// factor properly.
				284	mBlendFactorCached = f;
				285	break;
				286	} else {
				287	// we have a cached alpha blend factor, but we want another one,
				288	// this should really not happen because by construction,
				289	// we cannot have BOTH source and destination
				290	// blend factors use ALPHA and ONE_MINUS_ALPHA (because
				291	// the blending stage uses the f/(1-f) optimization
				292
				293	// for completeness, we handle this case though. Since there
				294	// are only 2 choices, this meens we want "the other one"
				295	// (1-factor)
				296	factor = mAlphaSource;
				297	factor.flags &= ~CORRUPTIBLE;
				298	RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
				299	mBlendFactorCached = f;
				300	return;
				301	}
				302	}
				303	// fall-through...
				304	case GGL_ONE_MINUS_DST_COLOR:
				305	case GGL_DST_COLOR:
				306	case GGL_ONE_MINUS_SRC_COLOR:
				307	case GGL_SRC_COLOR:
				308	case GGL_ONE_MINUS_DST_ALPHA:
				309	case GGL_DST_ALPHA:
				310	case GGL_SRC_ALPHA_SATURATE:
				311	// help us find out what register we can use for the blend-factor
				312	// CORRUPTIBLE registers are chosen first, or a new one is allocated.
				313	if (fragment.flags & CORRUPTIBLE) {
				314	factor.setTo(fragment.reg, 32, CORRUPTIBLE);
				315	fragment.flags &= ~CORRUPTIBLE;
				316	} else if (fb.flags & CORRUPTIBLE) {
				317	factor.setTo(fb.reg, 32, CORRUPTIBLE);
				318	fb.flags &= ~CORRUPTIBLE;
				319	} else {
				320	factor.setTo(scratches.obtain(), 32, CORRUPTIBLE);
				321	}
				322	break;
				323	}
				324
				325	// XXX: doesn't work if size==1
				326
				327	switch(f) {
				328	case GGL_ONE_MINUS_DST_COLOR:
				329	case GGL_DST_COLOR:
				330	factor.s = fb.s;
				331	ADD(AL, 0, factor.reg, fb.reg, reg_imm(fb.reg, LSR, fb.s-1));
				332	break;
				333	case GGL_ONE_MINUS_SRC_COLOR:
				334	case GGL_SRC_COLOR:
				335	factor.s = fragment.s;
				336	ADD(AL, 0, factor.reg, fragment.reg,
				337	reg_imm(fragment.reg, LSR, fragment.s-1));
				338	break;
				339	case GGL_ONE_MINUS_SRC_ALPHA:
				340	case GGL_SRC_ALPHA:
				341	factor.s = src_alpha.s;
				342	ADD(AL, 0, factor.reg, src_alpha.reg,
				343	reg_imm(src_alpha.reg, LSR, src_alpha.s-1));
				344	break;
				345	case GGL_ONE_MINUS_DST_ALPHA:
				346	case GGL_DST_ALPHA:
				347	// XXX: should be precomputed
				348	extract(factor, dst_pixel, GGLFormat::ALPHA);
				349	ADD(AL, 0, factor.reg, factor.reg,
				350	reg_imm(factor.reg, LSR, factor.s-1));
				351	break;
				352	case GGL_SRC_ALPHA_SATURATE:
				353	// XXX: should be precomputed
				354	// XXX: f = min(As, 1-Ad)
				355	// btw, we're guaranteed that Ad's size is <= 8, because
				356	// it's extracted from the framebuffer
				357	break;
				358	}
				359
				360	switch(f) {
				361	case GGL_ONE_MINUS_DST_COLOR:
				362	case GGL_ONE_MINUS_SRC_COLOR:
				363	case GGL_ONE_MINUS_DST_ALPHA:
				364	case GGL_ONE_MINUS_SRC_ALPHA:
				365	RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
				366	}
				367
				368	// don't need more than 8-bits for the blend factor
				369	// and this will prevent overflows in the multiplies later
				370	if (factor.s > 8) {
				371	MOV(AL, 0, factor.reg, reg_imm(factor.reg, LSR, factor.s-8));
				372	factor.s = 8;
				373	}
				374	}
				375
				376	int GGLAssembler::blending_codes(int fs, int fd)
				377	{
				378	int blending = 0;
				379	switch(fs) {
				380	case GGL_ONE:
				381	blending \|= BLEND_SRC;
				382	break;
				383
				384	case GGL_ONE_MINUS_DST_COLOR:
				385	case GGL_DST_COLOR:
				386	blending \|= FACTOR_DST\|BLEND_SRC;
				387	break;
				388	case GGL_ONE_MINUS_DST_ALPHA:
				389	case GGL_DST_ALPHA:
				390	// no need to extract 'component' from the destination
				391	// for the blend factor, because we need ALPHA only.
				392	blending \|= BLEND_SRC;
				393	break;
				394
				395	case GGL_ONE_MINUS_SRC_COLOR:
				396	case GGL_SRC_COLOR:
				397	blending \|= FACTOR_SRC\|BLEND_SRC;
				398	break;
				399	case GGL_ONE_MINUS_SRC_ALPHA:
				400	case GGL_SRC_ALPHA:
				401	case GGL_SRC_ALPHA_SATURATE:
				402	blending \|= FACTOR_SRC\|BLEND_SRC;
				403	break;
				404	}
				405	switch(fd) {
				406	case GGL_ONE:
				407	blending \|= BLEND_DST;
				408	break;
				409
				410	case GGL_ONE_MINUS_DST_COLOR:
				411	case GGL_DST_COLOR:
				412	blending \|= FACTOR_DST\|BLEND_DST;
				413	break;
				414	case GGL_ONE_MINUS_DST_ALPHA:
				415	case GGL_DST_ALPHA:
				416	blending \|= FACTOR_DST\|BLEND_DST;
				417	break;
				418
				419	case GGL_ONE_MINUS_SRC_COLOR:
				420	case GGL_SRC_COLOR:
				421	blending \|= FACTOR_SRC\|BLEND_DST;
				422	break;
				423	case GGL_ONE_MINUS_SRC_ALPHA:
				424	case GGL_SRC_ALPHA:
				425	// no need to extract 'component' from the source
				426	// for the blend factor, because we need ALPHA only.
				427	blending \|= BLEND_DST;
				428	break;
				429	}
				430	return blending;
				431	}
				432
				433	// ---------------------------------------------------------------------------
				434
				435	void GGLAssembler::build_blendFOneMinusF(
				436	component_t& temp,
				437	const integer_t& factor,
				438	const integer_t& fragment,
				439	const integer_t& fb)
				440	{
				441	// R = Sf + D(1-f) = (S-D)*f + D
				442	Scratch scratches(registerFile());
				443	// compute S-D
				444	integer_t diff(fragment.flags & CORRUPTIBLE ?
				445	fragment.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
				446	const int shift = fragment.size() - fb.size();
				447	if (shift>0) RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
				448	else if (shift<0) RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
				449	else RSB(AL, 0, diff.reg, fb.reg, fragment.reg);
				450	mul_factor_add(temp, diff, factor, component_t(fb));
				451	}
				452
				453	void GGLAssembler::build_blendOneMinusFF(
				454	component_t& temp,
				455	const integer_t& factor,
				456	const integer_t& fragment,
				457	const integer_t& fb)
				458	{
				459	// R = Sf + D(1-f) = (S-D)*f + D
				460	Scratch scratches(registerFile());
				461	// compute D-S
				462	integer_t diff(fb.flags & CORRUPTIBLE ?
				463	fb.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
				464	const int shift = fragment.size() - fb.size();
				465	if (shift>0) SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
				466	else if (shift<0) SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
				467	else SUB(AL, 0, diff.reg, fb.reg, fragment.reg);
				468	mul_factor_add(temp, diff, factor, component_t(fragment));
				469	}
				470
				471	// ---------------------------------------------------------------------------
				472
				473	void GGLAssembler::mul_factor( component_t& d,
				474	const integer_t& v,
				475	const integer_t& f)
				476	{
				477	int vs = v.size();
				478	int fs = f.size();
				479	int ms = vs+fs;
				480
				481	// XXX: we could have special cases for 1 bit mul
				482
				483	// all this code below to use the best multiply instruction
				484	// wrt the parameters size. We take advantage of the fact
				485	// that the 16-bits multiplies allow a 16-bit shift
				486	// The trick is that we just make sure that we have at least 8-bits
				487	// per component (which is enough for a 8 bits display).
				488
				489	int xy;
				490	int vshift = 0;
				491	int fshift = 0;
				492	int smulw = 0;
				493
				494	if (vs<16) {
				495	if (fs<16) {
				496	xy = xyBB;
				497	} else if (GGL_BETWEEN(fs, 24, 31)) {
				498	ms -= 16;
				499	xy = xyTB;
				500	} else {
				501	// eg: 15 * 18 -> 15 * 15
				502	fshift = fs - 15;
				503	ms -= fshift;
				504	xy = xyBB;
				505	}
				506	} else if (GGL_BETWEEN(vs, 24, 31)) {
				507	if (fs<16) {
				508	ms -= 16;
				509	xy = xyTB;
				510	} else if (GGL_BETWEEN(fs, 24, 31)) {
				511	ms -= 32;
				512	xy = xyTT;
				513	} else {
				514	// eg: 24 * 18 -> 8 * 18
				515	fshift = fs - 15;
				516	ms -= 16 + fshift;
				517	xy = xyTB;
				518	}
				519	} else {
				520	if (fs<16) {
				521	// eg: 18 * 15 -> 15 * 15
				522	vshift = vs - 15;
				523	ms -= vshift;
				524	xy = xyBB;
				525	} else if (GGL_BETWEEN(fs, 24, 31)) {
				526	// eg: 18 * 24 -> 15 * 8
				527	vshift = vs - 15;
				528	ms -= 16 + vshift;
				529	xy = xyBT;
				530	} else {
				531	// eg: 18 * 18 -> (15 * 18)>>16
				532	fshift = fs - 15;
				533	ms -= 16 + fshift;
				534	xy = yB; //XXX SMULWB
				535	smulw = 1;
				536	}
				537	}
				538
Steve Block	01dda20	2012-01-06 14:13:42 +0000	[diff] [blame]	539	ALOGE_IF(ms>=32, "mul_factor overflow vs=%d, fs=%d", vs, fs);
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	540
				541	int vreg = v.reg;
				542	int freg = f.reg;
				543	if (vshift) {
				544	MOV(AL, 0, d.reg, reg_imm(vreg, LSR, vshift));
				545	vreg = d.reg;
				546	}
				547	if (fshift) {
				548	MOV(AL, 0, d.reg, reg_imm(vreg, LSR, fshift));
				549	freg = d.reg;
				550	}
				551	if (smulw) SMULW(AL, xy, d.reg, vreg, freg);
				552	else SMUL(AL, xy, d.reg, vreg, freg);
				553
				554
				555	d.h = ms;
				556	if (mDithering) {
				557	d.l = 0;
				558	} else {
				559	d.l = fs;
				560	d.flags \|= CLEAR_LO;
				561	}
				562	}
				563
				564	void GGLAssembler::mul_factor_add( component_t& d,
				565	const integer_t& v,
				566	const integer_t& f,
				567	const component_t& a)
				568	{
				569	// XXX: we could have special cases for 1 bit mul
				570	Scratch scratches(registerFile());
				571
				572	int vs = v.size();
				573	int fs = f.size();
				574	int as = a.h;
				575	int ms = vs+fs;
				576
Steve Block	01dda20	2012-01-06 14:13:42 +0000	[diff] [blame]	577	ALOGE_IF(ms>=32, "mul_factor_add overflow vs=%d, fs=%d, as=%d", vs, fs, as);
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	578
				579	integer_t add(a.reg, a.h, a.flags);
				580
				581	// 'a' is a component_t but it is guaranteed to have
				582	// its high bits set to 0. However in the dithering case,
				583	// we can't get away with truncating the potentially bad bits
				584	// so extraction is needed.
				585
				586	if ((mDithering) && (a.size() < ms)) {
				587	// we need to expand a
				588	if (!(a.flags & CORRUPTIBLE)) {
				589	// ... but it's not corruptible, so we need to pick a
				590	// temporary register.
				591	// Try to uses the destination register first (it's likely
				592	// to be usable, unless it aliases an input).
				593	if (d.reg!=a.reg && d.reg!=v.reg && d.reg!=f.reg) {
				594	add.reg = d.reg;
				595	} else {
				596	add.reg = scratches.obtain();
				597	}
				598	}
				599	expand(add, a, ms); // extracts and expands
				600	as = ms;
				601	}
				602
				603	if (ms == as) {
				604	if (vs<16 && fs<16) SMLABB(AL, d.reg, v.reg, f.reg, add.reg);
				605	else MLA(AL, 0, d.reg, v.reg, f.reg, add.reg);
				606	} else {
				607	int temp = d.reg;
				608	if (temp == add.reg) {
				609	// the mul will modify add.reg, we need an intermediary reg
				610	if (v.flags & CORRUPTIBLE) temp = v.reg;
				611	else if (f.flags & CORRUPTIBLE) temp = f.reg;
				612	else temp = scratches.obtain();
				613	}
				614
				615	if (vs<16 && fs<16) SMULBB(AL, temp, v.reg, f.reg);
				616	else MUL(AL, 0, temp, v.reg, f.reg);
				617
				618	if (ms>as) {
				619	ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSL, ms-as));
				620	} else if (ms<as) {
				621	// not sure if we should expand the mul instead?
				622	ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSR, as-ms));
				623	}
				624	}
				625
				626	d.h = ms;
				627	if (mDithering) {
				628	d.l = a.l;
				629	} else {
				630	d.l = fs>a.l ? fs : a.l;
				631	d.flags \|= CLEAR_LO;
				632	}
				633	}
				634
				635	void GGLAssembler::component_add(component_t& d,
				636	const integer_t& dst, const integer_t& src)
				637	{
				638	// here we're guaranteed that fragment.size() >= fb.size()
				639	const int shift = src.size() - dst.size();
				640	if (!shift) {
				641	ADD(AL, 0, d.reg, src.reg, dst.reg);
				642	} else {
				643	ADD(AL, 0, d.reg, src.reg, reg_imm(dst.reg, LSL, shift));
				644	}
				645
				646	d.h = src.size();
				647	if (mDithering) {
				648	d.l = 0;
				649	} else {
				650	d.l = shift;
				651	d.flags \|= CLEAR_LO;
				652	}
				653	}
				654
				655	void GGLAssembler::component_sat(const component_t& v)
				656	{
				657	const int one = ((1<<v.size())-1)<<v.l;
				658	CMP(AL, v.reg, imm( 1<<v.h ));
				659	if (isValidImmediate(one)) {
				660	MOV(HS, 0, v.reg, imm( one ));
				661	} else if (isValidImmediate(~one)) {
				662	MVN(HS, 0, v.reg, imm( ~one ));
				663	} else {
				664	MOV(HS, 0, v.reg, imm( 1<<v.h ));
				665	SUB(HS, 0, v.reg, v.reg, imm( 1<<v.l ));
				666	}
				667	}
				668
				669	// ----------------------------------------------------------------------------
				670
				671	}; // namespace android
				672