Blame - libpixelflinger/codeflinger/blending.cpp - android_system_core

blob: f10217b74a7a7b38b08298cfd595813411eedac5 [file] [log] [blame]

The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	1	/* libs/pixelflinger/codeflinger/blending.cpp
				2	**
				3	** Copyright 2006, The Android Open Source Project
				4	**
				5	** Licensed under the Apache License, Version 2.0 (the "License");
				6	** you may not use this file except in compliance with the License.
				7	** You may obtain a copy of the License at
				8	**
				9	** http://www.apache.org/licenses/LICENSE-2.0
				10	**
				11	** Unless required by applicable law or agreed to in writing, software
				12	** distributed under the License is distributed on an "AS IS" BASIS,
				13	** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	** See the License for the specific language governing permissions and
				15	** limitations under the License.
				16	*/
				17
				18	#include <assert.h>
				19	#include <stdint.h>
				20	#include <stdlib.h>
				21	#include <stdio.h>
				22	#include <sys/types.h>
				23
				24	#include <cutils/log.h>
				25
				26	#include "codeflinger/GGLAssembler.h"
				27
				28
				29	namespace android {
				30
				31	void GGLAssembler::build_fog(
				32	component_t& temp, // incomming fragment / output
				33	int component,
				34	Scratch& regs)
				35	{
				36	if (mInfo[component].fog) {
				37	Scratch scratches(registerFile());
				38	comment("fog");
				39
				40	integer_t fragment(temp.reg, temp.h, temp.flags);
				41	if (!(temp.flags & CORRUPTIBLE)) {
				42	temp.reg = regs.obtain();
				43	temp.flags \|= CORRUPTIBLE;
				44	}
				45
				46	integer_t fogColor(scratches.obtain(), 8, CORRUPTIBLE);
				47	LDRB(AL, fogColor.reg, mBuilderContext.Rctx,
				48	immed12_pre(GGL_OFFSETOF(state.fog.color[component])));
				49
				50	integer_t factor(scratches.obtain(), 16, CORRUPTIBLE);
				51	CONTEXT_LOAD(factor.reg, generated_vars.f);
				52
The Android Open Source Project	35237d1	2008-12-17 18:08:08 -0800	[diff] [blame]	53	// clamp fog factor (TODO: see if there is a way to guarantee
				54	// we won't overflow, when setting the iterators)
				55	BIC(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, ASR, 31));
				56	CMP(AL, factor.reg, imm( 0x10000 ));
				57	MOV(HS, 0, factor.reg, imm( 0x10000 ));
				58
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	59	build_blendFOneMinusF(temp, factor, fragment, fogColor);
				60	}
				61	}
				62
				63	void GGLAssembler::build_blending(
				64	component_t& temp, // incomming fragment / output
				65	const pixel_t& pixel, // framebuffer
				66	int component,
				67	Scratch& regs)
				68	{
				69	if (!mInfo[component].blend)
				70	return;
				71
				72	int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
				73	int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
				74	if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA)
				75	fs = GGL_ONE;
				76	const int blending = blending_codes(fs, fd);
				77	if (!temp.size()) {
				78	// here, blending will produce something which doesn't depend on
				79	// that component (eg: GL_ZERO:GL_*), so the register has not been
				80	// allocated yet. Will never be used as a source.
				81	temp = component_t(regs.obtain(), CORRUPTIBLE);
				82	}
				83
				84	// we are doing real blending...
				85	// fb: extracted dst
				86	// fragment: extracted src
				87	// temp: component_t(fragment) and result
				88
				89	// scoped register allocator
				90	Scratch scratches(registerFile());
				91	comment("blending");
				92
				93	// we can optimize these cases a bit...
				94	// (1) saturation is not needed
				95	// (2) we can use only one multiply instead of 2
				96	// (3) we can reduce the register pressure
				97	// R = Sf + D(1-f) = (S-D)*f + D
				98	// R = S(1-f) + Df = (D-S)*f + S
				99
				100	const bool same_factor_opt1 =
				101	(fs==GGL_DST_COLOR && fd==GGL_ONE_MINUS_DST_COLOR) \|\|
				102	(fs==GGL_SRC_COLOR && fd==GGL_ONE_MINUS_SRC_COLOR) \|\|
				103	(fs==GGL_DST_ALPHA && fd==GGL_ONE_MINUS_DST_ALPHA) \|\|
				104	(fs==GGL_SRC_ALPHA && fd==GGL_ONE_MINUS_SRC_ALPHA);
				105
				106	const bool same_factor_opt2 =
				107	(fs==GGL_ONE_MINUS_DST_COLOR && fd==GGL_DST_COLOR) \|\|
				108	(fs==GGL_ONE_MINUS_SRC_COLOR && fd==GGL_SRC_COLOR) \|\|
				109	(fs==GGL_ONE_MINUS_DST_ALPHA && fd==GGL_DST_ALPHA) \|\|
				110	(fs==GGL_ONE_MINUS_SRC_ALPHA && fd==GGL_SRC_ALPHA);
				111
				112
				113	// XXX: we could also optimize these cases:
				114	// R = Sf + Df = (S+D)*f
				115	// R = S(1-f) + D(1-f) = (S+D)*(1-f)
				116	// R = SD + DS = 2SD
				117
				118
				119	// see if we need to extract 'component' from the destination (fb)
				120	integer_t fb;
				121	if (blending & (BLEND_DST\|FACTOR_DST)) {
				122	fb.setTo(scratches.obtain(), 32);
				123	extract(fb, pixel, component);
				124	if (mDithering) {
				125	// XXX: maybe what we should do instead, is simply
				126	// expand fb -or- fragment to the larger of the two
				127	if (fb.size() < temp.size()) {
				128	// for now we expand 'fb' to min(fragment, 8)
				129	int new_size = temp.size() < 8 ? temp.size() : 8;
				130	expand(fb, fb, new_size);
				131	}
				132	}
				133	}
				134
				135
				136	// convert input fragment to integer_t
				137	if (temp.l && (temp.flags & CORRUPTIBLE)) {
				138	MOV(AL, 0, temp.reg, reg_imm(temp.reg, LSR, temp.l));
				139	temp.h -= temp.l;
				140	temp.l = 0;
				141	}
				142	integer_t fragment(temp.reg, temp.size(), temp.flags);
				143
				144	// if not done yet, convert input fragment to integer_t
				145	if (temp.l) {
				146	// here we know temp is not CORRUPTIBLE
				147	fragment.reg = scratches.obtain();
				148	MOV(AL, 0, fragment.reg, reg_imm(temp.reg, LSR, temp.l));
				149	fragment.flags \|= CORRUPTIBLE;
				150	}
				151
				152	if (!(temp.flags & CORRUPTIBLE)) {
				153	// temp is not corruptible, but since it's the destination it
				154	// will be modified, so we need to allocate a new register.
				155	temp.reg = regs.obtain();
				156	temp.flags &= ~CORRUPTIBLE;
				157	fragment.flags &= ~CORRUPTIBLE;
				158	}
				159
				160	if ((blending & BLEND_SRC) && !same_factor_opt1) {
				161	// source (fragment) is needed for the blending stage
				162	// so it's not CORRUPTIBLE (unless we're doing same_factor_opt1)
				163	fragment.flags &= ~CORRUPTIBLE;
				164	}
				165
				166
				167	if (same_factor_opt1) {
				168	// R = Sf + D(1-f) = (S-D)*f + D
				169	integer_t factor;
				170	build_blend_factor(factor, fs,
				171	component, pixel, fragment, fb, scratches);
				172	// fb is always corruptible from this point
				173	fb.flags \|= CORRUPTIBLE;
				174	build_blendFOneMinusF(temp, factor, fragment, fb);
				175	} else if (same_factor_opt2) {
				176	// R = S(1-f) + Df = (D-S)*f + S
				177	integer_t factor;
				178	// fb is always corrruptible here
				179	fb.flags \|= CORRUPTIBLE;
				180	build_blend_factor(factor, fd,
				181	component, pixel, fragment, fb, scratches);
				182	build_blendOneMinusFF(temp, factor, fragment, fb);
				183	} else {
				184	integer_t src_factor;
				185	integer_t dst_factor;
				186
				187	// if destination (fb) is not needed for the blending stage,
				188	// then it can be marked as CORRUPTIBLE
				189	if (!(blending & BLEND_DST)) {
				190	fb.flags \|= CORRUPTIBLE;
				191	}
				192
				193	// XXX: try to mark some registers as CORRUPTIBLE
				194	// in most case we could make those corruptible
				195	// when we're processing the last component
				196	// but not always, for instance
				197	// when fragment is constant and not reloaded
				198	// when fb is needed for logic-ops or masking
				199	// when a register is aliased (for instance with mAlphaSource)
				200
				201	// blend away...
				202	if (fs==GGL_ZERO) {
				203	if (fd==GGL_ZERO) { // R = 0
				204	// already taken care of
				205	} else if (fd==GGL_ONE) { // R = D
				206	// already taken care of
				207	} else { // R = D*fd
				208	// compute fd
				209	build_blend_factor(dst_factor, fd,
				210	component, pixel, fragment, fb, scratches);
				211	mul_factor(temp, fb, dst_factor);
				212	}
				213	} else if (fs==GGL_ONE) {
				214	if (fd==GGL_ZERO) { // R = S
				215	// NOP, taken care of
				216	} else if (fd==GGL_ONE) { // R = S + D
				217	component_add(temp, fb, fragment); // args order matters
				218	component_sat(temp);
				219	} else { // R = S + D*fd
				220	// compute fd
				221	build_blend_factor(dst_factor, fd,
				222	component, pixel, fragment, fb, scratches);
				223	mul_factor_add(temp, fb, dst_factor, component_t(fragment));
				224	if (fd==GGL_ONE_MINUS_SRC_ALPHA) {
				225	// XXX: in theory this is not correct, we should
				226	// saturate here. However, this mode is often
				227	// used for displaying alpha-premultiplied graphics,
				228	// in which case, saturation is not necessary.
				229	// unfortunatelly, we have no way to know.
				230	// This is a case, where we sacrifice correctness for
				231	// performance. we should probably have some heuristics.
				232	} else {
				233	component_sat(temp);
				234	}
				235	}
				236	} else {
				237	// compute fs
				238	build_blend_factor(src_factor, fs,
				239	component, pixel, fragment, fb, scratches);
				240	if (fd==GGL_ZERO) { // R = S*fs
				241	mul_factor(temp, fragment, src_factor);
				242	} else if (fd==GGL_ONE) { // R = S*fs + D
				243	mul_factor_add(temp, fragment, src_factor, component_t(fb));
				244	component_sat(temp);
				245	} else { // R = Sfs + Dfd
				246	mul_factor(temp, fragment, src_factor);
				247	if (scratches.isUsed(src_factor.reg))
				248	scratches.recycle(src_factor.reg);
				249	// compute fd
				250	build_blend_factor(dst_factor, fd,
				251	component, pixel, fragment, fb, scratches);
				252	mul_factor_add(temp, fb, dst_factor, temp);
				253	if (!same_factor_opt1 && !same_factor_opt2) {
				254	component_sat(temp);
				255	}
				256	}
				257	}
				258	}
				259
				260	// now we can be corrupted (it's the dest)
				261	temp.flags \|= CORRUPTIBLE;
				262	}
				263
				264	void GGLAssembler::build_blend_factor(
				265	integer_t& factor, int f, int component,
				266	const pixel_t& dst_pixel,
				267	integer_t& fragment,
				268	integer_t& fb,
				269	Scratch& scratches)
				270	{
				271	integer_t src_alpha(fragment);
				272
				273	// src_factor/dst_factor won't be used after blending,
				274	// so it's fine to mark them as CORRUPTIBLE (if not aliased)
				275	factor.flags \|= CORRUPTIBLE;
				276
				277	switch(f) {
				278	case GGL_ONE_MINUS_SRC_ALPHA:
				279	case GGL_SRC_ALPHA:
				280	if (component==GGLFormat::ALPHA && !isAlphaSourceNeeded()) {
				281	// we're processing alpha, so we already have
				282	// src-alpha in fragment, and we need src-alpha just this time.
				283	} else {
				284	// alpha-src will be needed for other components
				285	if (!mBlendFactorCached \|\| mBlendFactorCached==f) {
				286	src_alpha = mAlphaSource;
				287	factor = mAlphaSource;
				288	factor.flags &= ~CORRUPTIBLE;
				289	// we already computed the blend factor before, nothing to do.
				290	if (mBlendFactorCached)
				291	return;
				292	// this is the first time, make sure to compute the blend
				293	// factor properly.
				294	mBlendFactorCached = f;
				295	break;
				296	} else {
				297	// we have a cached alpha blend factor, but we want another one,
				298	// this should really not happen because by construction,
				299	// we cannot have BOTH source and destination
				300	// blend factors use ALPHA and ONE_MINUS_ALPHA (because
				301	// the blending stage uses the f/(1-f) optimization
				302
				303	// for completeness, we handle this case though. Since there
				304	// are only 2 choices, this meens we want "the other one"
				305	// (1-factor)
				306	factor = mAlphaSource;
				307	factor.flags &= ~CORRUPTIBLE;
				308	RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
				309	mBlendFactorCached = f;
				310	return;
				311	}
				312	}
				313	// fall-through...
				314	case GGL_ONE_MINUS_DST_COLOR:
				315	case GGL_DST_COLOR:
				316	case GGL_ONE_MINUS_SRC_COLOR:
				317	case GGL_SRC_COLOR:
				318	case GGL_ONE_MINUS_DST_ALPHA:
				319	case GGL_DST_ALPHA:
				320	case GGL_SRC_ALPHA_SATURATE:
				321	// help us find out what register we can use for the blend-factor
				322	// CORRUPTIBLE registers are chosen first, or a new one is allocated.
				323	if (fragment.flags & CORRUPTIBLE) {
				324	factor.setTo(fragment.reg, 32, CORRUPTIBLE);
				325	fragment.flags &= ~CORRUPTIBLE;
				326	} else if (fb.flags & CORRUPTIBLE) {
				327	factor.setTo(fb.reg, 32, CORRUPTIBLE);
				328	fb.flags &= ~CORRUPTIBLE;
				329	} else {
				330	factor.setTo(scratches.obtain(), 32, CORRUPTIBLE);
				331	}
				332	break;
				333	}
				334
				335	// XXX: doesn't work if size==1
				336
				337	switch(f) {
				338	case GGL_ONE_MINUS_DST_COLOR:
				339	case GGL_DST_COLOR:
				340	factor.s = fb.s;
				341	ADD(AL, 0, factor.reg, fb.reg, reg_imm(fb.reg, LSR, fb.s-1));
				342	break;
				343	case GGL_ONE_MINUS_SRC_COLOR:
				344	case GGL_SRC_COLOR:
				345	factor.s = fragment.s;
				346	ADD(AL, 0, factor.reg, fragment.reg,
				347	reg_imm(fragment.reg, LSR, fragment.s-1));
				348	break;
				349	case GGL_ONE_MINUS_SRC_ALPHA:
				350	case GGL_SRC_ALPHA:
				351	factor.s = src_alpha.s;
				352	ADD(AL, 0, factor.reg, src_alpha.reg,
				353	reg_imm(src_alpha.reg, LSR, src_alpha.s-1));
				354	break;
				355	case GGL_ONE_MINUS_DST_ALPHA:
				356	case GGL_DST_ALPHA:
				357	// XXX: should be precomputed
				358	extract(factor, dst_pixel, GGLFormat::ALPHA);
				359	ADD(AL, 0, factor.reg, factor.reg,
				360	reg_imm(factor.reg, LSR, factor.s-1));
				361	break;
				362	case GGL_SRC_ALPHA_SATURATE:
				363	// XXX: should be precomputed
				364	// XXX: f = min(As, 1-Ad)
				365	// btw, we're guaranteed that Ad's size is <= 8, because
				366	// it's extracted from the framebuffer
				367	break;
				368	}
				369
				370	switch(f) {
				371	case GGL_ONE_MINUS_DST_COLOR:
				372	case GGL_ONE_MINUS_SRC_COLOR:
				373	case GGL_ONE_MINUS_DST_ALPHA:
				374	case GGL_ONE_MINUS_SRC_ALPHA:
				375	RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
				376	}
				377
				378	// don't need more than 8-bits for the blend factor
				379	// and this will prevent overflows in the multiplies later
				380	if (factor.s > 8) {
				381	MOV(AL, 0, factor.reg, reg_imm(factor.reg, LSR, factor.s-8));
				382	factor.s = 8;
				383	}
				384	}
				385
				386	int GGLAssembler::blending_codes(int fs, int fd)
				387	{
				388	int blending = 0;
				389	switch(fs) {
				390	case GGL_ONE:
				391	blending \|= BLEND_SRC;
				392	break;
				393
				394	case GGL_ONE_MINUS_DST_COLOR:
				395	case GGL_DST_COLOR:
				396	blending \|= FACTOR_DST\|BLEND_SRC;
				397	break;
				398	case GGL_ONE_MINUS_DST_ALPHA:
				399	case GGL_DST_ALPHA:
				400	// no need to extract 'component' from the destination
				401	// for the blend factor, because we need ALPHA only.
				402	blending \|= BLEND_SRC;
				403	break;
				404
				405	case GGL_ONE_MINUS_SRC_COLOR:
				406	case GGL_SRC_COLOR:
				407	blending \|= FACTOR_SRC\|BLEND_SRC;
				408	break;
				409	case GGL_ONE_MINUS_SRC_ALPHA:
				410	case GGL_SRC_ALPHA:
				411	case GGL_SRC_ALPHA_SATURATE:
				412	blending \|= FACTOR_SRC\|BLEND_SRC;
				413	break;
				414	}
				415	switch(fd) {
				416	case GGL_ONE:
				417	blending \|= BLEND_DST;
				418	break;
				419
				420	case GGL_ONE_MINUS_DST_COLOR:
				421	case GGL_DST_COLOR:
				422	blending \|= FACTOR_DST\|BLEND_DST;
				423	break;
				424	case GGL_ONE_MINUS_DST_ALPHA:
				425	case GGL_DST_ALPHA:
				426	blending \|= FACTOR_DST\|BLEND_DST;
				427	break;
				428
				429	case GGL_ONE_MINUS_SRC_COLOR:
				430	case GGL_SRC_COLOR:
				431	blending \|= FACTOR_SRC\|BLEND_DST;
				432	break;
				433	case GGL_ONE_MINUS_SRC_ALPHA:
				434	case GGL_SRC_ALPHA:
				435	// no need to extract 'component' from the source
				436	// for the blend factor, because we need ALPHA only.
				437	blending \|= BLEND_DST;
				438	break;
				439	}
				440	return blending;
				441	}
				442
				443	// ---------------------------------------------------------------------------
				444
				445	void GGLAssembler::build_blendFOneMinusF(
				446	component_t& temp,
				447	const integer_t& factor,
				448	const integer_t& fragment,
				449	const integer_t& fb)
				450	{
				451	// R = Sf + D(1-f) = (S-D)*f + D
				452	Scratch scratches(registerFile());
				453	// compute S-D
				454	integer_t diff(fragment.flags & CORRUPTIBLE ?
				455	fragment.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
				456	const int shift = fragment.size() - fb.size();
				457	if (shift>0) RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
				458	else if (shift<0) RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
				459	else RSB(AL, 0, diff.reg, fb.reg, fragment.reg);
				460	mul_factor_add(temp, diff, factor, component_t(fb));
				461	}
				462
				463	void GGLAssembler::build_blendOneMinusFF(
				464	component_t& temp,
				465	const integer_t& factor,
				466	const integer_t& fragment,
				467	const integer_t& fb)
				468	{
				469	// R = Sf + D(1-f) = (S-D)*f + D
				470	Scratch scratches(registerFile());
				471	// compute D-S
				472	integer_t diff(fb.flags & CORRUPTIBLE ?
				473	fb.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
				474	const int shift = fragment.size() - fb.size();
				475	if (shift>0) SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
				476	else if (shift<0) SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
				477	else SUB(AL, 0, diff.reg, fb.reg, fragment.reg);
				478	mul_factor_add(temp, diff, factor, component_t(fragment));
				479	}
				480
				481	// ---------------------------------------------------------------------------
				482
				483	void GGLAssembler::mul_factor( component_t& d,
				484	const integer_t& v,
				485	const integer_t& f)
				486	{
				487	int vs = v.size();
				488	int fs = f.size();
				489	int ms = vs+fs;
				490
				491	// XXX: we could have special cases for 1 bit mul
				492
				493	// all this code below to use the best multiply instruction
				494	// wrt the parameters size. We take advantage of the fact
				495	// that the 16-bits multiplies allow a 16-bit shift
				496	// The trick is that we just make sure that we have at least 8-bits
				497	// per component (which is enough for a 8 bits display).
				498
				499	int xy;
				500	int vshift = 0;
				501	int fshift = 0;
				502	int smulw = 0;
				503
				504	if (vs<16) {
				505	if (fs<16) {
				506	xy = xyBB;
				507	} else if (GGL_BETWEEN(fs, 24, 31)) {
				508	ms -= 16;
				509	xy = xyTB;
				510	} else {
				511	// eg: 15 * 18 -> 15 * 15
				512	fshift = fs - 15;
				513	ms -= fshift;
				514	xy = xyBB;
				515	}
				516	} else if (GGL_BETWEEN(vs, 24, 31)) {
				517	if (fs<16) {
				518	ms -= 16;
				519	xy = xyTB;
				520	} else if (GGL_BETWEEN(fs, 24, 31)) {
				521	ms -= 32;
				522	xy = xyTT;
				523	} else {
				524	// eg: 24 * 18 -> 8 * 18
				525	fshift = fs - 15;
				526	ms -= 16 + fshift;
				527	xy = xyTB;
				528	}
				529	} else {
				530	if (fs<16) {
				531	// eg: 18 * 15 -> 15 * 15
				532	vshift = vs - 15;
				533	ms -= vshift;
				534	xy = xyBB;
				535	} else if (GGL_BETWEEN(fs, 24, 31)) {
				536	// eg: 18 * 24 -> 15 * 8
				537	vshift = vs - 15;
				538	ms -= 16 + vshift;
				539	xy = xyBT;
				540	} else {
				541	// eg: 18 * 18 -> (15 * 18)>>16
				542	fshift = fs - 15;
				543	ms -= 16 + fshift;
				544	xy = yB; //XXX SMULWB
				545	smulw = 1;
				546	}
				547	}
				548
				549	LOGE_IF(ms>=32, "mul_factor overflow vs=%d, fs=%d", vs, fs);
				550
				551	int vreg = v.reg;
				552	int freg = f.reg;
				553	if (vshift) {
				554	MOV(AL, 0, d.reg, reg_imm(vreg, LSR, vshift));
				555	vreg = d.reg;
				556	}
				557	if (fshift) {
				558	MOV(AL, 0, d.reg, reg_imm(vreg, LSR, fshift));
				559	freg = d.reg;
				560	}
				561	if (smulw) SMULW(AL, xy, d.reg, vreg, freg);
				562	else SMUL(AL, xy, d.reg, vreg, freg);
				563
				564
				565	d.h = ms;
				566	if (mDithering) {
				567	d.l = 0;
				568	} else {
				569	d.l = fs;
				570	d.flags \|= CLEAR_LO;
				571	}
				572	}
				573
				574	void GGLAssembler::mul_factor_add( component_t& d,
				575	const integer_t& v,
				576	const integer_t& f,
				577	const component_t& a)
				578	{
				579	// XXX: we could have special cases for 1 bit mul
				580	Scratch scratches(registerFile());
				581
				582	int vs = v.size();
				583	int fs = f.size();
				584	int as = a.h;
				585	int ms = vs+fs;
				586
				587	LOGE_IF(ms>=32, "mul_factor_add overflow vs=%d, fs=%d, as=%d", vs, fs, as);
				588
				589	integer_t add(a.reg, a.h, a.flags);
				590
				591	// 'a' is a component_t but it is guaranteed to have
				592	// its high bits set to 0. However in the dithering case,
				593	// we can't get away with truncating the potentially bad bits
				594	// so extraction is needed.
				595
				596	if ((mDithering) && (a.size() < ms)) {
				597	// we need to expand a
				598	if (!(a.flags & CORRUPTIBLE)) {
				599	// ... but it's not corruptible, so we need to pick a
				600	// temporary register.
				601	// Try to uses the destination register first (it's likely
				602	// to be usable, unless it aliases an input).
				603	if (d.reg!=a.reg && d.reg!=v.reg && d.reg!=f.reg) {
				604	add.reg = d.reg;
				605	} else {
				606	add.reg = scratches.obtain();
				607	}
				608	}
				609	expand(add, a, ms); // extracts and expands
				610	as = ms;
				611	}
				612
				613	if (ms == as) {
				614	if (vs<16 && fs<16) SMLABB(AL, d.reg, v.reg, f.reg, add.reg);
				615	else MLA(AL, 0, d.reg, v.reg, f.reg, add.reg);
				616	} else {
				617	int temp = d.reg;
				618	if (temp == add.reg) {
				619	// the mul will modify add.reg, we need an intermediary reg
				620	if (v.flags & CORRUPTIBLE) temp = v.reg;
				621	else if (f.flags & CORRUPTIBLE) temp = f.reg;
				622	else temp = scratches.obtain();
				623	}
				624
				625	if (vs<16 && fs<16) SMULBB(AL, temp, v.reg, f.reg);
				626	else MUL(AL, 0, temp, v.reg, f.reg);
				627
				628	if (ms>as) {
				629	ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSL, ms-as));
				630	} else if (ms<as) {
				631	// not sure if we should expand the mul instead?
				632	ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSR, as-ms));
				633	}
				634	}
				635
				636	d.h = ms;
				637	if (mDithering) {
				638	d.l = a.l;
				639	} else {
				640	d.l = fs>a.l ? fs : a.l;
				641	d.flags \|= CLEAR_LO;
				642	}
				643	}
				644
				645	void GGLAssembler::component_add(component_t& d,
				646	const integer_t& dst, const integer_t& src)
				647	{
				648	// here we're guaranteed that fragment.size() >= fb.size()
				649	const int shift = src.size() - dst.size();
				650	if (!shift) {
				651	ADD(AL, 0, d.reg, src.reg, dst.reg);
				652	} else {
				653	ADD(AL, 0, d.reg, src.reg, reg_imm(dst.reg, LSL, shift));
				654	}
				655
				656	d.h = src.size();
				657	if (mDithering) {
				658	d.l = 0;
				659	} else {
				660	d.l = shift;
				661	d.flags \|= CLEAR_LO;
				662	}
				663	}
				664
				665	void GGLAssembler::component_sat(const component_t& v)
				666	{
				667	const int one = ((1<<v.size())-1)<<v.l;
				668	CMP(AL, v.reg, imm( 1<<v.h ));
				669	if (isValidImmediate(one)) {
				670	MOV(HS, 0, v.reg, imm( one ));
				671	} else if (isValidImmediate(~one)) {
				672	MVN(HS, 0, v.reg, imm( ~one ));
				673	} else {
				674	MOV(HS, 0, v.reg, imm( 1<<v.h ));
				675	SUB(HS, 0, v.reg, v.reg, imm( 1<<v.l ));
				676	}
				677	}
				678
				679	// ----------------------------------------------------------------------------
				680
				681	}; // namespace android
				682