Blame - libpixelflinger/codeflinger/texturing.cpp - android_system_core

blob: d66981dad184ba7ef76fe8fae736f31fa6425a82 [file] [log] [blame]

The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	1	/* libs/pixelflinger/codeflinger/texturing.cpp
				2	**
				3	** Copyright 2006, The Android Open Source Project
				4	**
Mark Salyzyn	66ce3e0	2016-09-28 10:07:20 -0700	[diff] [blame]	5	** Licensed under the Apache License, Version 2.0 (the "License");
				6	** you may not use this file except in compliance with the License.
				7	** You may obtain a copy of the License at
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	8	**
Mark Salyzyn	66ce3e0	2016-09-28 10:07:20 -0700	[diff] [blame]	9	** http://www.apache.org/licenses/LICENSE-2.0
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	10	**
Mark Salyzyn	66ce3e0	2016-09-28 10:07:20 -0700	[diff] [blame]	11	** Unless required by applicable law or agreed to in writing, software
				12	** distributed under the License is distributed on an "AS IS" BASIS,
				13	** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	** See the License for the specific language governing permissions and
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	15	** limitations under the License.
				16	*/
				17
				18	#include <assert.h>
				19	#include <stdint.h>
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	20	#include <stdio.h>
Mark Salyzyn	66ce3e0	2016-09-28 10:07:20 -0700	[diff] [blame]	21	#include <stdlib.h>
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	22	#include <sys/types.h>
				23
Mark Salyzyn	66ce3e0	2016-09-28 10:07:20 -0700	[diff] [blame]	24	#include <android/log.h>
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	25
Mathias Agopian	9857d99	2013-04-01 15:17:55 -0700	[diff] [blame]	26	#include "GGLAssembler.h"
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	27
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	28	namespace android {
				29
				30	// ---------------------------------------------------------------------------
				31
				32	// iterators are initialized like this:
				33	// (intToFixedCenter(x) * dx)>>16 + x0
				34	// ((x<<16 + 0x8000) * dx)>>16 + x0
				35	// ((x<<16)dx + (0x8000dx))>>16 + x0
				36	// ( (x*dx) + dx>>1 ) + x0
				37	// (x*dx) + (dx>>1 + x0)
				38
				39	void GGLAssembler::init_iterated_color(fragment_parts_t& parts, const reg_t& x)
				40	{
				41	context_t const* c = mBuilderContext.c;
				42	const needs_t& needs = mBuilderContext.needs;
				43
				44	if (mSmooth) {
				45	// NOTE: we could take this case in the mDithering + !mSmooth case,
				46	// but this would use up to 4 more registers for the color components
				47	// for only a little added quality.
				48	// Currently, this causes the system to run out of registers in
				49	// some case (see issue #719496)
				50
				51	comment("compute initial iterated color (smooth and/or dither case)");
				52
				53	parts.iterated_packed = 0;
				54	parts.packed = 0;
				55
				56	// 0x1: color component
				57	// 0x2: iterators
				58	const int optReload = mOptLevel >> 1;
				59	if (optReload >= 3) parts.reload = 0; // reload nothing
				60	else if (optReload == 2) parts.reload = 2; // reload iterators
				61	else if (optReload == 1) parts.reload = 1; // reload colors
				62	else if (optReload <= 0) parts.reload = 3; // reload both
				63
				64	if (!mSmooth) {
				65	// we're not smoothing (just dithering), we never have to
				66	// reload the iterators
				67	parts.reload &= ~2;
				68	}
				69
				70	Scratch scratches(registerFile());
				71	const int t0 = (parts.reload & 1) ? scratches.obtain() : 0;
				72	const int t1 = (parts.reload & 2) ? scratches.obtain() : 0;
				73	for (int i=0 ; i<4 ; i++) {
				74	if (!mInfo[i].iterated)
				75	continue;
				76
				77	// this component exists in the destination and is not replaced
				78	// by a texture unit.
				79	const int c = (parts.reload & 1) ? t0 : obtainReg();
				80	if (i==0) CONTEXT_LOAD(c, iterators.ydady);
				81	if (i==1) CONTEXT_LOAD(c, iterators.ydrdy);
				82	if (i==2) CONTEXT_LOAD(c, iterators.ydgdy);
				83	if (i==3) CONTEXT_LOAD(c, iterators.ydbdy);
				84	parts.argb[i].reg = c;
				85
				86	if (mInfo[i].smooth) {
				87	parts.argb_dx[i].reg = (parts.reload & 2) ? t1 : obtainReg();
				88	const int dvdx = parts.argb_dx[i].reg;
				89	CONTEXT_LOAD(dvdx, generated_vars.argb[i].dx);
				90	MLA(AL, 0, c, x.reg, dvdx, c);
				91
				92	// adjust the color iterator to make sure it won't overflow
				93	if (!mAA) {
				94	// this is not needed when we're using anti-aliasing
				95	// because we will (have to) clamp the components
				96	// anyway.
				97	int end = scratches.obtain();
				98	MOV(AL, 0, end, reg_imm(parts.count.reg, LSR, 16));
				99	MLA(AL, 1, end, dvdx, end, c);
				100	SUB(MI, 0, c, c, end);
				101	BIC(AL, 0, c, c, reg_imm(c, ASR, 31));
				102	scratches.recycle(end);
				103	}
				104	}
				105
				106	if (parts.reload & 1) {
				107	CONTEXT_STORE(c, generated_vars.argb[i].c);
				108	}
				109	}
				110	} else {
				111	// We're not smoothed, so we can
				112	// just use a packed version of the color and extract the
				113	// components as needed (or not at all if we don't blend)
				114
				115	// figure out if we need the iterated color
				116	int load = 0;
				117	for (int i=0 ; i<4 ; i++) {
				118	component_info_t& info = mInfo[i];
				119	if ((info.inDest \|\| info.needed) && !info.replaced)
				120	load \|= 1;
				121	}
				122
				123	parts.iterated_packed = 1;
				124	parts.packed = (!mTextureMachine.mask && !mBlending
				125	&& !mFog && !mDithering);
				126	parts.reload = 0;
				127	if (load \|\| parts.packed) {
				128	if (mBlending \|\| mDithering \|\| mInfo[GGLFormat::ALPHA].needed) {
				129	comment("load initial iterated color (8888 packed)");
				130	parts.iterated.setTo(obtainReg(),
				131	&(c->formats[GGL_PIXEL_FORMAT_RGBA_8888]));
				132	CONTEXT_LOAD(parts.iterated.reg, packed8888);
				133	} else {
				134	comment("load initial iterated color (dest format packed)");
				135
				136	parts.iterated.setTo(obtainReg(), &mCbFormat);
				137
				138	// pre-mask the iterated color
				139	const int bits = parts.iterated.size();
				140	const uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1;
				141	uint32_t mask = 0;
				142	if (mMasking) {
				143	for (int i=0 ; i<4 ; i++) {
				144	const int component_mask = 1<<i;
				145	const int h = parts.iterated.format.c[i].h;
				146	const int l = parts.iterated.format.c[i].l;
				147	if (h && (!(mMasking & component_mask))) {
				148	mask \|= ((1<<(h-l))-1) << l;
				149	}
				150	}
				151	}
				152
				153	if (mMasking && ((mask & size)==0)) {
				154	// none of the components are present in the mask
				155	} else {
				156	CONTEXT_LOAD(parts.iterated.reg, packed);
				157	if (mCbFormat.size == 1) {
				158	AND(AL, 0, parts.iterated.reg,
				159	parts.iterated.reg, imm(0xFF));
				160	} else if (mCbFormat.size == 2) {
				161	MOV(AL, 0, parts.iterated.reg,
				162	reg_imm(parts.iterated.reg, LSR, 16));
				163	}
				164	}
				165
				166	// pre-mask the iterated color
				167	if (mMasking) {
				168	build_and_immediate(parts.iterated.reg, parts.iterated.reg,
				169	mask, bits);
				170	}
				171	}
				172	}
				173	}
				174	}
				175
				176	void GGLAssembler::build_iterated_color(
				177	component_t& fragment,
				178	const fragment_parts_t& parts,
				179	int component,
				180	Scratch& regs)
				181	{
				182	fragment.setTo( regs.obtain(), 0, 32, CORRUPTIBLE);
				183
				184	if (!mInfo[component].iterated)
				185	return;
				186
				187	if (parts.iterated_packed) {
				188	// iterated colors are packed, extract the one we need
				189	extract(fragment, parts.iterated, component);
				190	} else {
				191	fragment.h = GGL_COLOR_BITS;
				192	fragment.l = GGL_COLOR_BITS - 8;
				193	fragment.flags \|= CLEAR_LO;
				194	// iterated colors are held in their own register,
				195	// (smooth and/or dithering case)
				196	if (parts.reload==3) {
				197	// this implies mSmooth
				198	Scratch scratches(registerFile());
				199	int dx = scratches.obtain();
				200	CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c);
				201	CONTEXT_LOAD(dx, generated_vars.argb[component].dx);
				202	ADD(AL, 0, dx, fragment.reg, dx);
				203	CONTEXT_STORE(dx, generated_vars.argb[component].c);
				204	} else if (parts.reload & 1) {
				205	CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c);
				206	} else {
				207	// we don't reload, so simply rename the register and mark as
				208	// non CORRUPTIBLE so that the texture env or blending code
				209	// won't modify this (renamed) register
				210	regs.recycle(fragment.reg);
				211	fragment.reg = parts.argb[component].reg;
				212	fragment.flags &= ~CORRUPTIBLE;
				213	}
				214	if (mInfo[component].smooth && mAA) {
				215	// when using smooth shading AND anti-aliasing, we need to clamp
				216	// the iterators because there is always an extra pixel on the
				217	// edges, which most of the time will cause an overflow
				218	// (since technically its outside of the domain).
				219	BIC(AL, 0, fragment.reg, fragment.reg,
				220	reg_imm(fragment.reg, ASR, 31));
				221	component_sat(fragment);
				222	}
				223	}
				224	}
				225
				226	// ---------------------------------------------------------------------------
				227
				228	void GGLAssembler::decodeLogicOpNeeds(const needs_t& needs)
				229	{
				230	// gather some informations about the components we need to process...
				231	const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) \| GGL_CLEAR;
				232	switch(opcode) {
				233	case GGL_COPY:
				234	mLogicOp = 0;
				235	break;
				236	case GGL_CLEAR:
				237	case GGL_SET:
				238	mLogicOp = LOGIC_OP;
				239	break;
				240	case GGL_AND:
				241	case GGL_AND_REVERSE:
				242	case GGL_AND_INVERTED:
				243	case GGL_XOR:
				244	case GGL_OR:
				245	case GGL_NOR:
				246	case GGL_EQUIV:
				247	case GGL_OR_REVERSE:
				248	case GGL_OR_INVERTED:
				249	case GGL_NAND:
				250	mLogicOp = LOGIC_OP\|LOGIC_OP_SRC\|LOGIC_OP_DST;
				251	break;
				252	case GGL_NOOP:
				253	case GGL_INVERT:
				254	mLogicOp = LOGIC_OP\|LOGIC_OP_DST;
				255	break;
				256	case GGL_COPY_INVERTED:
				257	mLogicOp = LOGIC_OP\|LOGIC_OP_SRC;
				258	break;
				259	};
				260	}
				261
				262	void GGLAssembler::decodeTMUNeeds(const needs_t& needs, context_t const* c)
				263	{
				264	uint8_t replaced=0;
				265	mTextureMachine.mask = 0;
				266	mTextureMachine.activeUnits = 0;
				267	for (int i=GGL_TEXTURE_UNIT_COUNT-1 ; i>=0 ; i--) {
				268	texture_unit_t& tmu = mTextureMachine.tmu[i];
				269	if (replaced == 0xF) {
				270	// all components are replaced, skip this TMU.
				271	tmu.format_idx = 0;
				272	tmu.mask = 0;
				273	tmu.replaced = replaced;
				274	continue;
				275	}
				276	tmu.format_idx = GGL_READ_NEEDS(T_FORMAT, needs.t[i]);
				277	tmu.format = c->formats[tmu.format_idx];
				278	tmu.bits = tmu.format.size*8;
				279	tmu.swrap = GGL_READ_NEEDS(T_S_WRAP, needs.t[i]);
				280	tmu.twrap = GGL_READ_NEEDS(T_T_WRAP, needs.t[i]);
				281	tmu.env = ggl_needs_to_env(GGL_READ_NEEDS(T_ENV, needs.t[i]));
				282	tmu.pot = GGL_READ_NEEDS(T_POT, needs.t[i]);
				283	tmu.linear = GGL_READ_NEEDS(T_LINEAR, needs.t[i])
				284	&& tmu.format.size!=3; // XXX: only 8, 16 and 32 modes for now
				285
				286	// 5551 linear filtering is not supported
				287	if (tmu.format_idx == GGL_PIXEL_FORMAT_RGBA_5551)
				288	tmu.linear = 0;
				289
				290	tmu.mask = 0;
				291	tmu.replaced = replaced;
				292
				293	if (tmu.format_idx) {
				294	mTextureMachine.activeUnits++;
				295	if (tmu.format.c[0].h) tmu.mask \|= 0x1;
				296	if (tmu.format.c[1].h) tmu.mask \|= 0x2;
				297	if (tmu.format.c[2].h) tmu.mask \|= 0x4;
				298	if (tmu.format.c[3].h) tmu.mask \|= 0x8;
				299	if (tmu.env == GGL_REPLACE) {
				300	replaced \|= tmu.mask;
				301	} else if (tmu.env == GGL_DECAL) {
				302	if (!tmu.format.c[GGLFormat::ALPHA].h) {
				303	// if we don't have alpha, decal does nothing
				304	tmu.mask = 0;
				305	} else {
				306	// decal always ignores At
				307	tmu.mask &= ~(1<<GGLFormat::ALPHA);
				308	}
				309	}
				310	}
				311	mTextureMachine.mask \|= tmu.mask;
				312	//printf("%d: mask=%08lx, replaced=%08lx\n",
				313	// i, int(tmu.mask), int(tmu.replaced));
				314	}
				315	mTextureMachine.replaced = replaced;
				316	mTextureMachine.directTexture = 0;
				317	//printf("replaced=%08lx\n", mTextureMachine.replaced);
				318	}
				319
				320
				321	void GGLAssembler::init_textures(
				322	tex_coord_t* coords,
				323	const reg_t& x, const reg_t& y)
				324	{
				325	context_t const* c = mBuilderContext.c;
				326	const needs_t& needs = mBuilderContext.needs;
				327	int Rctx = mBuilderContext.Rctx;
				328	int Rx = x.reg;
				329	int Ry = y.reg;
				330
				331	if (mTextureMachine.mask) {
				332	comment("compute texture coordinates");
				333	}
				334
				335	// init texture coordinates for each tmu
				336	const int cb_format_idx = GGL_READ_NEEDS(CB_FORMAT, needs.n);
				337	const bool multiTexture = mTextureMachine.activeUnits > 1;
				338	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) {
				339	const texture_unit_t& tmu = mTextureMachine.tmu[i];
				340	if (tmu.format_idx == 0)
				341	continue;
				342	if ((tmu.swrap == GGL_NEEDS_WRAP_11) &&
				343	(tmu.twrap == GGL_NEEDS_WRAP_11))
				344	{
				345	// 1:1 texture
				346	pointer_t& txPtr = coords[i].ptr;
				347	txPtr.setTo(obtainReg(), tmu.bits);
				348	CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydsdy);
				349	ADD(AL, 0, Rx, Rx, reg_imm(txPtr.reg, ASR, 16)); // x += (s>>16)
				350	CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydtdy);
				351	ADD(AL, 0, Ry, Ry, reg_imm(txPtr.reg, ASR, 16)); // y += (t>>16)
				352	// merge base & offset
				353	CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].stride);
				354	SMLABB(AL, Rx, Ry, txPtr.reg, Rx); // x+y*stride
Ashok Bhat	bfc6dc4	2013-02-21 10:27:40 +0000	[diff] [blame]	355	CONTEXT_ADDR_LOAD(txPtr.reg, generated_vars.texture[i].data);
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	356	base_offset(txPtr, txPtr, Rx);
				357	} else {
				358	Scratch scratches(registerFile());
				359	reg_t& s = coords[i].s;
				360	reg_t& t = coords[i].t;
				361	// s = (x * dsdx)>>16 + ydsdy
				362	// s = (x * dsdx)>>16 + (y*dsdy)>>16 + s0
				363	// t = (x * dtdx)>>16 + ydtdy
				364	// t = (x * dtdx)>>16 + (y*dtdy)>>16 + t0
				365	s.setTo(obtainReg());
				366	t.setTo(obtainReg());
				367	const int need_w = GGL_READ_NEEDS(W, needs.n);
				368	if (need_w) {
				369	CONTEXT_LOAD(s.reg, state.texture[i].iterators.ydsdy);
				370	CONTEXT_LOAD(t.reg, state.texture[i].iterators.ydtdy);
				371	} else {
				372	int ydsdy = scratches.obtain();
				373	int ydtdy = scratches.obtain();
				374	CONTEXT_LOAD(s.reg, generated_vars.texture[i].dsdx);
				375	CONTEXT_LOAD(ydsdy, state.texture[i].iterators.ydsdy);
				376	CONTEXT_LOAD(t.reg, generated_vars.texture[i].dtdx);
				377	CONTEXT_LOAD(ydtdy, state.texture[i].iterators.ydtdy);
				378	MLA(AL, 0, s.reg, Rx, s.reg, ydsdy);
				379	MLA(AL, 0, t.reg, Rx, t.reg, ydtdy);
				380	}
				381
				382	if ((mOptLevel&1)==0) {
				383	CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]);
				384	CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]);
				385	recycleReg(s.reg);
				386	recycleReg(t.reg);
				387	}
				388	}
				389
				390	// direct texture?
				391	if (!multiTexture && !mBlending && !mDithering && !mFog &&
				392	cb_format_idx == tmu.format_idx && !tmu.linear &&
				393	mTextureMachine.replaced == tmu.mask)
				394	{
				395	mTextureMachine.directTexture = i + 1;
				396	}
				397	}
				398	}
				399
				400	void GGLAssembler::build_textures( fragment_parts_t& parts,
				401	Scratch& regs)
				402	{
				403	context_t const* c = mBuilderContext.c;
				404	const needs_t& needs = mBuilderContext.needs;
				405	int Rctx = mBuilderContext.Rctx;
				406
				407	// We don't have a way to spill registers automatically
				408	// spill depth and AA regs, when we know we may have to.
				409	// build the spill list...
				410	uint32_t spill_list = 0;
				411	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) {
				412	const texture_unit_t& tmu = mTextureMachine.tmu[i];
				413	if (tmu.format_idx == 0)
				414	continue;
				415	if (tmu.linear) {
				416	// we may run out of register if we have linear filtering
				417	// at 1 or 4 bytes / pixel on any texture unit.
				418	if (tmu.format.size == 1) {
				419	// if depth and AA enabled, we'll run out of 1 register
				420	if (parts.z.reg > 0 && parts.covPtr.reg > 0)
				421	spill_list \|= 1<<parts.covPtr.reg;
				422	}
				423	if (tmu.format.size == 4) {
				424	// if depth or AA enabled, we'll run out of 1 or 2 registers
				425	if (parts.z.reg > 0)
				426	spill_list \|= 1<<parts.z.reg;
				427	if (parts.covPtr.reg > 0)
				428	spill_list \|= 1<<parts.covPtr.reg;
				429	}
				430	}
				431	}
				432
				433	Spill spill(registerFile(), *this, spill_list);
				434
				435	const bool multiTexture = mTextureMachine.activeUnits > 1;
				436	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) {
				437	const texture_unit_t& tmu = mTextureMachine.tmu[i];
				438	if (tmu.format_idx == 0)
				439	continue;
				440
				441	pointer_t& txPtr = parts.coords[i].ptr;
				442	pixel_t& texel = parts.texel[i];
				443
				444	// repeat...
				445	if ((tmu.swrap == GGL_NEEDS_WRAP_11) &&
				446	(tmu.twrap == GGL_NEEDS_WRAP_11))
				447	{ // 1:1 textures
				448	comment("fetch texel");
				449	texel.setTo(regs.obtain(), &tmu.format);
				450	load(txPtr, texel, WRITE_BACK);
				451	} else {
				452	Scratch scratches(registerFile());
				453	reg_t& s = parts.coords[i].s;
				454	reg_t& t = parts.coords[i].t;
				455	if ((mOptLevel&1)==0) {
				456	comment("reload s/t (multitexture or linear filtering)");
				457	s.reg = scratches.obtain();
				458	t.reg = scratches.obtain();
				459	CONTEXT_LOAD(s.reg, generated_vars.texture[i].spill[0]);
				460	CONTEXT_LOAD(t.reg, generated_vars.texture[i].spill[1]);
				461	}
				462
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame]	463	if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS)
				464	return;
				465
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	466	comment("compute repeat/clamp");
				467	int u = scratches.obtain();
				468	int v = scratches.obtain();
				469	int width = scratches.obtain();
				470	int height = scratches.obtain();
				471	int U = 0;
				472	int V = 0;
				473
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame]	474	if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS)
				475	return;
				476
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	477	CONTEXT_LOAD(width, generated_vars.texture[i].width);
				478	CONTEXT_LOAD(height, generated_vars.texture[i].height);
				479
				480	int FRAC_BITS = 0;
				481	if (tmu.linear) {
				482	// linear interpolation
				483	if (tmu.format.size == 1) {
				484	// for 8-bits textures, we can afford
				485	// 7 bits of fractional precision at no
				486	// additional cost (we can't do 8 bits
				487	// because filter8 uses signed 16 bits muls)
				488	FRAC_BITS = 7;
				489	} else if (tmu.format.size == 2) {
				490	// filter16() is internally limited to 4 bits, so:
				491	// FRAC_BITS=2 generates less instructions,
				492	// FRAC_BITS=3,4,5 creates unpleasant artifacts,
				493	// FRAC_BITS=6+ looks good
				494	FRAC_BITS = 6;
				495	} else if (tmu.format.size == 4) {
				496	// filter32() is internally limited to 8 bits, so:
				497	// FRAC_BITS=4 looks good
				498	// FRAC_BITS=5+ looks better, but generates 3 extra ipp
				499	FRAC_BITS = 6;
				500	} else {
				501	// for all other cases we use 4 bits.
				502	FRAC_BITS = 4;
				503	}
				504	}
				505	wrapping(u, s.reg, width, tmu.swrap, FRAC_BITS);
				506	wrapping(v, t.reg, height, tmu.twrap, FRAC_BITS);
				507
				508	if (tmu.linear) {
				509	comment("compute linear filtering offsets");
				510	// pixel size scale
				511	const int shift = 31 - gglClz(tmu.format.size);
				512	U = scratches.obtain();
				513	V = scratches.obtain();
				514
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame]	515	if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS)
				516	return;
				517
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	518	// sample the texel center
				519	SUB(AL, 0, u, u, imm(1<<(FRAC_BITS-1)));
				520	SUB(AL, 0, v, v, imm(1<<(FRAC_BITS-1)));
				521
				522	// get the fractionnal part of U,V
				523	AND(AL, 0, U, u, imm((1<<FRAC_BITS)-1));
				524	AND(AL, 0, V, v, imm((1<<FRAC_BITS)-1));
				525
				526	// compute width-1 and height-1
				527	SUB(AL, 0, width, width, imm(1));
				528	SUB(AL, 0, height, height, imm(1));
				529
				530	// get the integer part of U,V and clamp/wrap
				531	// and compute offset to the next texel
				532	if (tmu.swrap == GGL_NEEDS_WRAP_REPEAT) {
				533	// u has already been REPEATed
				534	MOV(AL, 1, u, reg_imm(u, ASR, FRAC_BITS));
				535	MOV(MI, 0, u, width);
				536	CMP(AL, u, width);
				537	MOV(LT, 0, width, imm(1 << shift));
				538	if (shift)
				539	MOV(GE, 0, width, reg_imm(width, LSL, shift));
				540	RSB(GE, 0, width, width, imm(0));
				541	} else {
				542	// u has not been CLAMPed yet
				543	// algorithm:
				544	// if ((u>>4) >= width)
				545	// u = width<<4
				546	// width = 0
				547	// else
				548	// width = 1<<shift
				549	// u = u>>4; // get integer part
				550	// if (u<0)
				551	// u = 0
				552	// width = 0
				553	// generated_vars.rt = width
				554
				555	CMP(AL, width, reg_imm(u, ASR, FRAC_BITS));
				556	MOV(LE, 0, u, reg_imm(width, LSL, FRAC_BITS));
				557	MOV(LE, 0, width, imm(0));
				558	MOV(GT, 0, width, imm(1 << shift));
				559	MOV(AL, 1, u, reg_imm(u, ASR, FRAC_BITS));
				560	MOV(MI, 0, u, imm(0));
				561	MOV(MI, 0, width, imm(0));
				562	}
				563	CONTEXT_STORE(width, generated_vars.rt);
				564
				565	const int stride = width;
				566	CONTEXT_LOAD(stride, generated_vars.texture[i].stride);
				567	if (tmu.twrap == GGL_NEEDS_WRAP_REPEAT) {
				568	// v has already been REPEATed
				569	MOV(AL, 1, v, reg_imm(v, ASR, FRAC_BITS));
				570	MOV(MI, 0, v, height);
				571	CMP(AL, v, height);
				572	MOV(LT, 0, height, imm(1 << shift));
				573	if (shift)
				574	MOV(GE, 0, height, reg_imm(height, LSL, shift));
				575	RSB(GE, 0, height, height, imm(0));
				576	MUL(AL, 0, height, stride, height);
				577	} else {
Martyn Capewell	96dbb4f	2009-12-07 13:59:59 +0000	[diff] [blame]	578	// v has not been CLAMPed yet
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	579	CMP(AL, height, reg_imm(v, ASR, FRAC_BITS));
				580	MOV(LE, 0, v, reg_imm(height, LSL, FRAC_BITS));
				581	MOV(LE, 0, height, imm(0));
				582	if (shift) {
				583	MOV(GT, 0, height, reg_imm(stride, LSL, shift));
				584	} else {
				585	MOV(GT, 0, height, stride);
				586	}
				587	MOV(AL, 1, v, reg_imm(v, ASR, FRAC_BITS));
				588	MOV(MI, 0, v, imm(0));
				589	MOV(MI, 0, height, imm(0));
				590	}
				591	CONTEXT_STORE(height, generated_vars.lb);
				592	}
				593
				594	scratches.recycle(width);
				595	scratches.recycle(height);
				596
				597	// iterate texture coordinates...
				598	comment("iterate s,t");
				599	int dsdx = scratches.obtain();
				600	int dtdx = scratches.obtain();
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame]	601
				602	if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS)
				603	return;
				604
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	605	CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx);
				606	CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx);
				607	ADD(AL, 0, s.reg, s.reg, dsdx);
				608	ADD(AL, 0, t.reg, t.reg, dtdx);
				609	if ((mOptLevel&1)==0) {
				610	CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]);
				611	CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]);
				612	scratches.recycle(s.reg);
				613	scratches.recycle(t.reg);
				614	}
				615	scratches.recycle(dsdx);
				616	scratches.recycle(dtdx);
				617
				618	// merge base & offset...
				619	comment("merge base & offset");
				620	texel.setTo(regs.obtain(), &tmu.format);
				621	txPtr.setTo(texel.reg, tmu.bits);
				622	int stride = scratches.obtain();
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame]	623
				624	if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS)
				625	return;
				626
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	627	CONTEXT_LOAD(stride, generated_vars.texture[i].stride);
Ashok Bhat	bfc6dc4	2013-02-21 10:27:40 +0000	[diff] [blame]	628	CONTEXT_ADDR_LOAD(txPtr.reg, generated_vars.texture[i].data);
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	629	SMLABB(AL, u, v, stride, u); // u+v*stride
				630	base_offset(txPtr, txPtr, u);
				631
				632	// load texel
				633	if (!tmu.linear) {
				634	comment("fetch texel");
				635	load(txPtr, texel, 0);
				636	} else {
				637	// recycle registers we don't need anymore
				638	scratches.recycle(u);
				639	scratches.recycle(v);
				640	scratches.recycle(stride);
				641
				642	comment("fetch texel, bilinear");
				643	switch (tmu.format.size) {
				644	case 1: filter8(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break;
				645	case 2: filter16(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break;
				646	case 3: filter24(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break;
				647	case 4: filter32(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break;
				648	}
				649	}
				650	}
				651	}
				652	}
				653
				654	void GGLAssembler::build_iterate_texture_coordinates(
				655	const fragment_parts_t& parts)
				656	{
				657	const bool multiTexture = mTextureMachine.activeUnits > 1;
				658	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) {
				659	const texture_unit_t& tmu = mTextureMachine.tmu[i];
				660	if (tmu.format_idx == 0)
				661	continue;
				662
				663	if ((tmu.swrap == GGL_NEEDS_WRAP_11) &&
				664	(tmu.twrap == GGL_NEEDS_WRAP_11))
				665	{ // 1:1 textures
				666	const pointer_t& txPtr = parts.coords[i].ptr;
				667	ADD(AL, 0, txPtr.reg, txPtr.reg, imm(txPtr.size>>3));
				668	} else {
				669	Scratch scratches(registerFile());
				670	int s = parts.coords[i].s.reg;
				671	int t = parts.coords[i].t.reg;
				672	if ((mOptLevel&1)==0) {
				673	s = scratches.obtain();
				674	t = scratches.obtain();
				675	CONTEXT_LOAD(s, generated_vars.texture[i].spill[0]);
				676	CONTEXT_LOAD(t, generated_vars.texture[i].spill[1]);
				677	}
				678	int dsdx = scratches.obtain();
				679	int dtdx = scratches.obtain();
				680	CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx);
				681	CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx);
				682	ADD(AL, 0, s, s, dsdx);
				683	ADD(AL, 0, t, t, dtdx);
				684	if ((mOptLevel&1)==0) {
				685	CONTEXT_STORE(s, generated_vars.texture[i].spill[0]);
				686	CONTEXT_STORE(t, generated_vars.texture[i].spill[1]);
				687	}
				688	}
				689	}
				690	}
				691
				692	void GGLAssembler::filter8(
Ashok Bhat	3078b13	2014-02-17 15:15:46 +0000	[diff] [blame]	693	const fragment_parts_t& /parts/,
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	694	pixel_t& texel, const texture_unit_t& tmu,
				695	int U, int V, pointer_t& txPtr,
				696	int FRAC_BITS)
				697	{
				698	if (tmu.format.components != GGL_ALPHA &&
				699	tmu.format.components != GGL_LUMINANCE)
				700	{
				701	// this is a packed format, and we don't support
				702	// linear filtering (it's probably RGB 332)
				703	// Should not happen with OpenGL\|ES
				704	LDRB(AL, texel.reg, txPtr.reg);
				705	return;
				706	}
				707
				708	// ------------------------
				709	// about ~22 cycles / pixel
				710	Scratch scratches(registerFile());
				711
				712	int pixel= scratches.obtain();
				713	int d = scratches.obtain();
				714	int u = scratches.obtain();
				715	int k = scratches.obtain();
				716	int rt = scratches.obtain();
				717	int lb = scratches.obtain();
				718
				719	// RB -> U * V
				720
				721	CONTEXT_LOAD(rt, generated_vars.rt);
				722	CONTEXT_LOAD(lb, generated_vars.lb);
				723
				724	int offset = pixel;
				725	ADD(AL, 0, offset, lb, rt);
				726	LDRB(AL, pixel, txPtr.reg, reg_scale_pre(offset));
				727	SMULBB(AL, u, U, V);
				728	SMULBB(AL, d, pixel, u);
				729	RSB(AL, 0, k, u, imm(1<<(FRAC_BITS*2)));
				730
				731	// LB -> (1-U) * V
				732	RSB(AL, 0, U, U, imm(1<<FRAC_BITS));
				733	LDRB(AL, pixel, txPtr.reg, reg_scale_pre(lb));
				734	SMULBB(AL, u, U, V);
				735	SMLABB(AL, d, pixel, u, d);
				736	SUB(AL, 0, k, k, u);
				737
				738	// LT -> (1-U)*(1-V)
				739	RSB(AL, 0, V, V, imm(1<<FRAC_BITS));
				740	LDRB(AL, pixel, txPtr.reg);
				741	SMULBB(AL, u, U, V);
				742	SMLABB(AL, d, pixel, u, d);
				743
				744	// RT -> U*(1-V)
				745	LDRB(AL, pixel, txPtr.reg, reg_scale_pre(rt));
				746	SUB(AL, 0, u, k, u);
				747	SMLABB(AL, texel.reg, pixel, u, d);
				748
				749	for (int i=0 ; i<4 ; i++) {
				750	if (!texel.format.c[i].h) continue;
				751	texel.format.c[i].h = FRAC_BITS*2+8;
				752	texel.format.c[i].l = FRAC_BITS*2; // keeping 8 bits in enough
				753	}
				754	texel.format.size = 4;
				755	texel.format.bitsPerPixel = 32;
				756	texel.flags \|= CLEAR_LO;
				757	}
				758
				759	void GGLAssembler::filter16(
Ashok Bhat	3078b13	2014-02-17 15:15:46 +0000	[diff] [blame]	760	const fragment_parts_t& /parts/,
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	761	pixel_t& texel, const texture_unit_t& tmu,
				762	int U, int V, pointer_t& txPtr,
				763	int FRAC_BITS)
				764	{
				765	// compute the mask
				766	// XXX: it would be nice if the mask below could be computed
				767	// automatically.
				768	uint32_t mask = 0;
				769	int shift = 0;
				770	int prec = 0;
				771	switch (tmu.format_idx) {
				772	case GGL_PIXEL_FORMAT_RGB_565:
				773	// source: 00000ggg.ggg00000 \| rrrrr000.000bbbbb
				774	// result: gggggggg.gggrrrrr \| rrrrr0bb.bbbbbbbb
				775	mask = 0x07E0F81F;
				776	shift = 16;
				777	prec = 5;
				778	break;
				779	case GGL_PIXEL_FORMAT_RGBA_4444:
				780	// 0000,1111,0000,1111 \| 0000,1111,0000,1111
				781	mask = 0x0F0F0F0F;
				782	shift = 12;
				783	prec = 4;
				784	break;
				785	case GGL_PIXEL_FORMAT_LA_88:
				786	// 0000,0000,1111,1111 \| 0000,0000,1111,1111
				787	// AALL -> 00AA \| 00LL
				788	mask = 0x00FF00FF;
				789	shift = 8;
				790	prec = 8;
				791	break;
				792	default:
				793	// unsupported format, do something sensical...
Steve Block	01dda20	2012-01-06 14:13:42 +0000	[diff] [blame]	794	ALOGE("Unsupported 16-bits texture format (%d)", tmu.format_idx);
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	795	LDRH(AL, texel.reg, txPtr.reg);
				796	return;
				797	}
				798
				799	const int adjust = FRAC_BITS*2 - prec;
				800	const int round = 0;
				801
				802	// update the texel format
				803	texel.format.size = 4;
				804	texel.format.bitsPerPixel = 32;
				805	texel.flags \|= CLEAR_HI\|CLEAR_LO;
				806	for (int i=0 ; i<4 ; i++) {
				807	if (!texel.format.c[i].h) continue;
				808	const uint32_t offset = (mask & tmu.format.mask(i)) ? 0 : shift;
				809	texel.format.c[i].h = tmu.format.c[i].h + offset + prec;
				810	texel.format.c[i].l = texel.format.c[i].h - (tmu.format.bits(i) + prec);
				811	}
				812
				813	// ------------------------
				814	// about ~40 cycles / pixel
				815	Scratch scratches(registerFile());
				816
				817	int pixel= scratches.obtain();
				818	int d = scratches.obtain();
				819	int u = scratches.obtain();
				820	int k = scratches.obtain();
				821
				822	// RB -> U * V
				823	int offset = pixel;
				824	CONTEXT_LOAD(offset, generated_vars.rt);
				825	CONTEXT_LOAD(u, generated_vars.lb);
				826	ADD(AL, 0, offset, offset, u);
				827
				828	LDRH(AL, pixel, txPtr.reg, reg_pre(offset));
				829	SMULBB(AL, u, U, V);
				830	ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift));
				831	build_and_immediate(pixel, pixel, mask, 32);
				832	if (adjust) {
				833	if (round)
				834	ADD(AL, 0, u, u, imm(1<<(adjust-1)));
				835	MOV(AL, 0, u, reg_imm(u, LSR, adjust));
				836	}
				837	MUL(AL, 0, d, pixel, u);
				838	RSB(AL, 0, k, u, imm(1<<prec));
				839
				840	// LB -> (1-U) * V
				841	CONTEXT_LOAD(offset, generated_vars.lb);
				842	RSB(AL, 0, U, U, imm(1<<FRAC_BITS));
				843	LDRH(AL, pixel, txPtr.reg, reg_pre(offset));
				844	SMULBB(AL, u, U, V);
				845	ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift));
				846	build_and_immediate(pixel, pixel, mask, 32);
				847	if (adjust) {
				848	if (round)
				849	ADD(AL, 0, u, u, imm(1<<(adjust-1)));
				850	MOV(AL, 0, u, reg_imm(u, LSR, adjust));
				851	}
				852	MLA(AL, 0, d, pixel, u, d);
				853	SUB(AL, 0, k, k, u);
				854
				855	// LT -> (1-U)*(1-V)
				856	RSB(AL, 0, V, V, imm(1<<FRAC_BITS));
				857	LDRH(AL, pixel, txPtr.reg);
				858	SMULBB(AL, u, U, V);
				859	ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift));
				860	build_and_immediate(pixel, pixel, mask, 32);
				861	if (adjust) {
				862	if (round)
				863	ADD(AL, 0, u, u, imm(1<<(adjust-1)));
				864	MOV(AL, 0, u, reg_imm(u, LSR, adjust));
				865	}
				866	MLA(AL, 0, d, pixel, u, d);
				867
				868	// RT -> U*(1-V)
				869	CONTEXT_LOAD(offset, generated_vars.rt);
				870	LDRH(AL, pixel, txPtr.reg, reg_pre(offset));
				871	SUB(AL, 0, u, k, u);
				872	ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift));
				873	build_and_immediate(pixel, pixel, mask, 32);
				874	MLA(AL, 0, texel.reg, pixel, u, d);
				875	}
				876
				877	void GGLAssembler::filter24(
Ashok Bhat	3078b13	2014-02-17 15:15:46 +0000	[diff] [blame]	878	const fragment_parts_t& /parts/,
				879	pixel_t& texel, const texture_unit_t& /tmu/,
				880	int /U/, int /V/, pointer_t& txPtr,
				881	int /FRAC_BITS/)
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	882	{
				883	// not supported yet (currently disabled)
				884	load(txPtr, texel, 0);
				885	}
				886
				887	void GGLAssembler::filter32(
Ashok Bhat	3078b13	2014-02-17 15:15:46 +0000	[diff] [blame]	888	const fragment_parts_t& /parts/,
				889	pixel_t& texel, const texture_unit_t& /tmu/,
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	890	int U, int V, pointer_t& txPtr,
				891	int FRAC_BITS)
				892	{
				893	const int adjust = FRAC_BITS*2 - 8;
				894	const int round = 0;
				895
				896	// ------------------------
				897	// about ~38 cycles / pixel
				898	Scratch scratches(registerFile());
				899
				900	int pixel= scratches.obtain();
				901	int dh = scratches.obtain();
				902	int u = scratches.obtain();
				903	int k = scratches.obtain();
				904
				905	int temp = scratches.obtain();
				906	int dl = scratches.obtain();
				907	int mask = scratches.obtain();
				908
				909	MOV(AL, 0, mask, imm(0xFF));
				910	ORR(AL, 0, mask, mask, imm(0xFF0000));
				911
				912	// RB -> U * V
				913	int offset = pixel;
				914	CONTEXT_LOAD(offset, generated_vars.rt);
				915	CONTEXT_LOAD(u, generated_vars.lb);
				916	ADD(AL, 0, offset, offset, u);
				917
				918	LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset));
				919	SMULBB(AL, u, U, V);
				920	AND(AL, 0, temp, mask, pixel);
				921	if (adjust) {
				922	if (round)
				923	ADD(AL, 0, u, u, imm(1<<(adjust-1)));
				924	MOV(AL, 0, u, reg_imm(u, LSR, adjust));
				925	}
				926	MUL(AL, 0, dh, temp, u);
				927	AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8));
				928	MUL(AL, 0, dl, temp, u);
				929	RSB(AL, 0, k, u, imm(0x100));
				930
				931	// LB -> (1-U) * V
				932	CONTEXT_LOAD(offset, generated_vars.lb);
				933	RSB(AL, 0, U, U, imm(1<<FRAC_BITS));
				934	LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset));
				935	SMULBB(AL, u, U, V);
				936	AND(AL, 0, temp, mask, pixel);
				937	if (adjust) {
				938	if (round)
				939	ADD(AL, 0, u, u, imm(1<<(adjust-1)));
				940	MOV(AL, 0, u, reg_imm(u, LSR, adjust));
				941	}
				942	MLA(AL, 0, dh, temp, u, dh);
				943	AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8));
				944	MLA(AL, 0, dl, temp, u, dl);
				945	SUB(AL, 0, k, k, u);
				946
				947	// LT -> (1-U)*(1-V)
				948	RSB(AL, 0, V, V, imm(1<<FRAC_BITS));
				949	LDR(AL, pixel, txPtr.reg);
				950	SMULBB(AL, u, U, V);
				951	AND(AL, 0, temp, mask, pixel);
				952	if (adjust) {
				953	if (round)
				954	ADD(AL, 0, u, u, imm(1<<(adjust-1)));
				955	MOV(AL, 0, u, reg_imm(u, LSR, adjust));
				956	}
				957	MLA(AL, 0, dh, temp, u, dh);
				958	AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8));
				959	MLA(AL, 0, dl, temp, u, dl);
				960
				961	// RT -> U*(1-V)
				962	CONTEXT_LOAD(offset, generated_vars.rt);
				963	LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset));
				964	SUB(AL, 0, u, k, u);
				965	AND(AL, 0, temp, mask, pixel);
				966	MLA(AL, 0, dh, temp, u, dh);
				967	AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8));
				968	MLA(AL, 0, dl, temp, u, dl);
				969
				970	AND(AL, 0, dh, mask, reg_imm(dh, LSR, 8));
				971	AND(AL, 0, dl, dl, reg_imm(mask, LSL, 8));
				972	ORR(AL, 0, texel.reg, dh, dl);
				973	}
				974
				975	void GGLAssembler::build_texture_environment(
				976	component_t& fragment,
				977	const fragment_parts_t& parts,
				978	int component,
				979	Scratch& regs)
				980	{
				981	const uint32_t component_mask = 1<<component;
				982	const bool multiTexture = mTextureMachine.activeUnits > 1;
				983	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
				984	texture_unit_t& tmu = mTextureMachine.tmu[i];
				985
				986	if (tmu.mask & component_mask) {
				987	// replace or modulate with this texture
				988	if ((tmu.replaced & component_mask) == 0) {
				989	// not replaced by a later tmu...
				990
				991	Scratch scratches(registerFile());
				992	pixel_t texel(parts.texel[i]);
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame]	993
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	994	if (multiTexture &&
				995	tmu.swrap == GGL_NEEDS_WRAP_11 &&
				996	tmu.twrap == GGL_NEEDS_WRAP_11)
				997	{
				998	texel.reg = scratches.obtain();
				999	texel.flags \|= CORRUPTIBLE;
				1000	comment("fetch texel (multitexture 1:1)");
				1001	load(parts.coords[i].ptr, texel, WRITE_BACK);
				1002	}
				1003
				1004	component_t incoming(fragment);
				1005	modify(fragment, regs);
				1006
				1007	switch (tmu.env) {
				1008	case GGL_REPLACE:
				1009	extract(fragment, texel, component);
				1010	break;
				1011	case GGL_MODULATE:
				1012	modulate(fragment, incoming, texel, component);
				1013	break;
				1014	case GGL_DECAL:
				1015	decal(fragment, incoming, texel, component);
				1016	break;
				1017	case GGL_BLEND:
				1018	blend(fragment, incoming, texel, component, i);
				1019	break;
The Android Open Source Project	35237d1	2008-12-17 18:08:08 -0800	[diff] [blame]	1020	case GGL_ADD:
				1021	add(fragment, incoming, texel, component);
				1022	break;
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	1023	}
				1024	}
				1025	}
				1026	}
				1027	}
				1028
				1029	// ---------------------------------------------------------------------------
				1030
				1031	void GGLAssembler::wrapping(
				1032	int d,
				1033	int coord, int size,
				1034	int tx_wrap, int tx_linear)
				1035	{
				1036	// notes:
				1037	// if tx_linear is set, we need 4 extra bits of precision on the result
				1038	// SMULL/UMULL is 3 cycles
				1039	Scratch scratches(registerFile());
				1040	int c = coord;
				1041	if (tx_wrap == GGL_NEEDS_WRAP_REPEAT) {
				1042	// UMULL takes 4 cycles (interlocked), and we can get away with
				1043	// 2 cycles using SMULWB, but we're loosing 16 bits of precision
				1044	// out of 32 (this is not a problem because the iterator keeps
				1045	// its full precision)
				1046	// UMULL(AL, 0, size, d, c, size);
				1047	// note: we can't use SMULTB because it's signed.
				1048	MOV(AL, 0, d, reg_imm(c, LSR, 16-tx_linear));
				1049	SMULWB(AL, d, d, size);
				1050	} else if (tx_wrap == GGL_NEEDS_WRAP_CLAMP_TO_EDGE) {
				1051	if (tx_linear) {
				1052	// 1 cycle
				1053	MOV(AL, 0, d, reg_imm(coord, ASR, 16-tx_linear));
				1054	} else {
				1055	// 4 cycles (common case)
				1056	MOV(AL, 0, d, reg_imm(coord, ASR, 16));
				1057	BIC(AL, 0, d, d, reg_imm(d, ASR, 31));
				1058	CMP(AL, d, size);
				1059	SUB(GE, 0, d, size, imm(1));
				1060	}
				1061	}
				1062	}
				1063
				1064	// ---------------------------------------------------------------------------
				1065
				1066	void GGLAssembler::modulate(
				1067	component_t& dest,
				1068	const component_t& incoming,
				1069	const pixel_t& incomingTexel, int component)
				1070	{
				1071	Scratch locals(registerFile());
				1072	integer_t texel(locals.obtain(), 32, CORRUPTIBLE);
				1073	extract(texel, incomingTexel, component);
				1074
				1075	const int Nt = texel.size();
				1076	// Nt should always be less than 10 bits because it comes
				1077	// from the TMU.
				1078
				1079	int Ni = incoming.size();
				1080	// Ni could be big because it comes from previous MODULATEs
				1081
				1082	if (Nt == 1) {
				1083	// texel acts as a bit-mask
				1084	// dest = incoming & ((texel << incoming.h)-texel)
				1085	RSB(AL, 0, dest.reg, texel.reg, reg_imm(texel.reg, LSL, incoming.h));
				1086	AND(AL, 0, dest.reg, dest.reg, incoming.reg);
				1087	dest.l = incoming.l;
				1088	dest.h = incoming.h;
				1089	dest.flags \|= (incoming.flags & CLEAR_LO);
				1090	} else if (Ni == 1) {
				1091	MOV(AL, 0, dest.reg, reg_imm(incoming.reg, LSL, 31-incoming.h));
				1092	AND(AL, 0, dest.reg, texel.reg, reg_imm(dest.reg, ASR, 31));
				1093	dest.l = 0;
				1094	dest.h = Nt;
				1095	} else {
				1096	int inReg = incoming.reg;
				1097	int shift = incoming.l;
				1098	if ((Nt + Ni) > 32) {
				1099	// we will overflow, reduce the precision of Ni to 8 bits
				1100	// (Note Nt cannot be more than 10 bits which happens with
				1101	// 565 textures and GGL_LINEAR)
				1102	shift += Ni-8;
				1103	Ni = 8;
				1104	}
				1105
				1106	// modulate by the component with the lowest precision
				1107	if (Nt >= Ni) {
				1108	if (shift) {
				1109	// XXX: we should be able to avoid this shift
				1110	// when shift==16 && Nt<16 && Ni<16, in which
				1111	// we could use SMULBT below.
				1112	MOV(AL, 0, dest.reg, reg_imm(inReg, LSR, shift));
				1113	inReg = dest.reg;
				1114	shift = 0;
				1115	}
				1116	// operation: (Cf*Ct)/((1<<Ni)-1)
				1117	// approximated with: Cf*(Ct + Ct>>(Ni-1))>>Ni
				1118	// this operation doesn't change texel's size
				1119	ADD(AL, 0, dest.reg, inReg, reg_imm(inReg, LSR, Ni-1));
				1120	if (Nt<16 && Ni<16) SMULBB(AL, dest.reg, texel.reg, dest.reg);
				1121	else MUL(AL, 0, dest.reg, texel.reg, dest.reg);
				1122	dest.l = Ni;
				1123	dest.h = Nt + Ni;
				1124	} else {
				1125	if (shift && (shift != 16)) {
				1126	// if shift==16, we can use 16-bits mul instructions later
				1127	MOV(AL, 0, dest.reg, reg_imm(inReg, LSR, shift));
				1128	inReg = dest.reg;
				1129	shift = 0;
				1130	}
				1131	// operation: (Cf*Ct)/((1<<Nt)-1)
				1132	// approximated with: Ct*(Cf + Cf>>(Nt-1))>>Nt
				1133	// this operation doesn't change incoming's size
				1134	Scratch scratches(registerFile());
				1135	int t = (texel.flags & CORRUPTIBLE) ? texel.reg : dest.reg;
				1136	if (t == inReg)
				1137	t = scratches.obtain();
				1138	ADD(AL, 0, t, texel.reg, reg_imm(texel.reg, LSR, Nt-1));
				1139	if (Nt<16 && Ni<16) {
				1140	if (shift==16) SMULBT(AL, dest.reg, t, inReg);
				1141	else SMULBB(AL, dest.reg, t, inReg);
				1142	} else MUL(AL, 0, dest.reg, t, inReg);
				1143	dest.l = Nt;
				1144	dest.h = Nt + Ni;
				1145	}
				1146
				1147	// low bits are not valid
				1148	dest.flags \|= CLEAR_LO;
				1149
				1150	// no need to keep more than 8 bits/component
				1151	if (dest.size() > 8)
				1152	dest.l = dest.h-8;
				1153	}
				1154	}
				1155
				1156	void GGLAssembler::decal(
				1157	component_t& dest,
				1158	const component_t& incoming,
				1159	const pixel_t& incomingTexel, int component)
				1160	{
				1161	// RGBA:
				1162	// Cv = Cf(1 - At) + CtAt = Cf + (Ct - Cf)*At
				1163	// Av = Af
				1164	Scratch locals(registerFile());
				1165	integer_t texel(locals.obtain(), 32, CORRUPTIBLE);
				1166	integer_t factor(locals.obtain(), 32, CORRUPTIBLE);
				1167	extract(texel, incomingTexel, component);
				1168	extract(factor, incomingTexel, GGLFormat::ALPHA);
				1169
				1170	// no need to keep more than 8-bits for decal
				1171	int Ni = incoming.size();
				1172	int shift = incoming.l;
				1173	if (Ni > 8) {
				1174	shift += Ni-8;
				1175	Ni = 8;
				1176	}
				1177	integer_t incomingNorm(incoming.reg, Ni, incoming.flags);
				1178	if (shift) {
				1179	MOV(AL, 0, dest.reg, reg_imm(incomingNorm.reg, LSR, shift));
				1180	incomingNorm.reg = dest.reg;
				1181	incomingNorm.flags \|= CORRUPTIBLE;
				1182	}
				1183	ADD(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, LSR, factor.s-1));
				1184	build_blendOneMinusFF(dest, factor, incomingNorm, texel);
				1185	}
				1186
				1187	void GGLAssembler::blend(
				1188	component_t& dest,
				1189	const component_t& incoming,
				1190	const pixel_t& incomingTexel, int component, int tmu)
				1191	{
				1192	// RGBA:
				1193	// Cv = (1 - Ct)Cf + CtCc = Cf + (Cc - Cf)*Ct
				1194	// Av = At*Af
				1195
				1196	if (component == GGLFormat::ALPHA) {
				1197	modulate(dest, incoming, incomingTexel, component);
				1198	return;
				1199	}
				1200
				1201	Scratch locals(registerFile());
				1202	integer_t color(locals.obtain(), 8, CORRUPTIBLE);
				1203	integer_t factor(locals.obtain(), 32, CORRUPTIBLE);
				1204	LDRB(AL, color.reg, mBuilderContext.Rctx,
				1205	immed12_pre(GGL_OFFSETOF(state.texture[tmu].env_color[component])));
				1206	extract(factor, incomingTexel, component);
				1207
				1208	// no need to keep more than 8-bits for blend
				1209	int Ni = incoming.size();
				1210	int shift = incoming.l;
				1211	if (Ni > 8) {
				1212	shift += Ni-8;
				1213	Ni = 8;
				1214	}
				1215	integer_t incomingNorm(incoming.reg, Ni, incoming.flags);
				1216	if (shift) {
				1217	MOV(AL, 0, dest.reg, reg_imm(incomingNorm.reg, LSR, shift));
				1218	incomingNorm.reg = dest.reg;
				1219	incomingNorm.flags \|= CORRUPTIBLE;
				1220	}
				1221	ADD(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, LSR, factor.s-1));
				1222	build_blendOneMinusFF(dest, factor, incomingNorm, color);
				1223	}
				1224
The Android Open Source Project	35237d1	2008-12-17 18:08:08 -0800	[diff] [blame]	1225	void GGLAssembler::add(
				1226	component_t& dest,
				1227	const component_t& incoming,
				1228	const pixel_t& incomingTexel, int component)
				1229	{
				1230	// RGBA:
				1231	// Cv = Cf + Ct;
				1232	Scratch locals(registerFile());
				1233
				1234	component_t incomingTemp(incoming);
				1235
				1236	// use "dest" as a temporary for extracting the texel, unless "dest"
				1237	// overlaps "incoming".
				1238	integer_t texel(dest.reg, 32, CORRUPTIBLE);
				1239	if (dest.reg == incomingTemp.reg)
				1240	texel.reg = locals.obtain();
				1241	extract(texel, incomingTexel, component);
				1242
				1243	if (texel.s < incomingTemp.size()) {
				1244	expand(texel, texel, incomingTemp.size());
				1245	} else if (texel.s > incomingTemp.size()) {
				1246	if (incomingTemp.flags & CORRUPTIBLE) {
				1247	expand(incomingTemp, incomingTemp, texel.s);
				1248	} else {
				1249	incomingTemp.reg = locals.obtain();
				1250	expand(incomingTemp, incoming, texel.s);
				1251	}
				1252	}
				1253
				1254	if (incomingTemp.l) {
				1255	ADD(AL, 0, dest.reg, texel.reg,
				1256	reg_imm(incomingTemp.reg, LSR, incomingTemp.l));
				1257	} else {
				1258	ADD(AL, 0, dest.reg, texel.reg, incomingTemp.reg);
				1259	}
				1260	dest.l = 0;
				1261	dest.h = texel.size();
				1262	component_sat(dest);
				1263	}
				1264
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	1265	// ----------------------------------------------------------------------------
				1266
				1267	}; // namespace android
				1268