Blame - libpixelflinger/codeflinger/texturing.cpp - android_system_core

blob: f4f465796ebd6bdcd923a699d932de1f2d39dfa6 [file] [log] [blame]

The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	1	/* libs/pixelflinger/codeflinger/texturing.cpp
				2	**
				3	** Copyright 2006, The Android Open Source Project
				4	**
Mark Salyzyn	66ce3e0	2016-09-28 10:07:20 -0700	[diff] [blame]	5	** Licensed under the Apache License, Version 2.0 (the "License");
				6	** you may not use this file except in compliance with the License.
				7	** You may obtain a copy of the License at
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	8	**
Mark Salyzyn	66ce3e0	2016-09-28 10:07:20 -0700	[diff] [blame]	9	** http://www.apache.org/licenses/LICENSE-2.0
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	10	**
Mark Salyzyn	66ce3e0	2016-09-28 10:07:20 -0700	[diff] [blame]	11	** Unless required by applicable law or agreed to in writing, software
				12	** distributed under the License is distributed on an "AS IS" BASIS,
				13	** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	** See the License for the specific language governing permissions and
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	15	** limitations under the License.
				16	*/
				17
Mark Salyzyn	cfd5b08	2016-10-17 14:28:00 -0700	[diff] [blame]	18	#define LOG_TAG "pixelflinger-code"
				19
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	20	#include <assert.h>
				21	#include <stdint.h>
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	22	#include <stdio.h>
Mark Salyzyn	66ce3e0	2016-09-28 10:07:20 -0700	[diff] [blame]	23	#include <stdlib.h>
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	24	#include <sys/types.h>
				25
Mark Salyzyn	66ce3e0	2016-09-28 10:07:20 -0700	[diff] [blame]	26	#include <android/log.h>
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	27
Mathias Agopian	9857d99	2013-04-01 15:17:55 -0700	[diff] [blame]	28	#include "GGLAssembler.h"
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	29
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	30	namespace android {
				31
				32	// ---------------------------------------------------------------------------
				33
				34	// iterators are initialized like this:
				35	// (intToFixedCenter(x) * dx)>>16 + x0
				36	// ((x<<16 + 0x8000) * dx)>>16 + x0
				37	// ((x<<16)dx + (0x8000dx))>>16 + x0
				38	// ( (x*dx) + dx>>1 ) + x0
				39	// (x*dx) + (dx>>1 + x0)
				40
				41	void GGLAssembler::init_iterated_color(fragment_parts_t& parts, const reg_t& x)
				42	{
				43	context_t const* c = mBuilderContext.c;
				44	const needs_t& needs = mBuilderContext.needs;
				45
				46	if (mSmooth) {
				47	// NOTE: we could take this case in the mDithering + !mSmooth case,
				48	// but this would use up to 4 more registers for the color components
				49	// for only a little added quality.
				50	// Currently, this causes the system to run out of registers in
				51	// some case (see issue #719496)
				52
				53	comment("compute initial iterated color (smooth and/or dither case)");
				54
				55	parts.iterated_packed = 0;
				56	parts.packed = 0;
				57
				58	// 0x1: color component
				59	// 0x2: iterators
				60	const int optReload = mOptLevel >> 1;
				61	if (optReload >= 3) parts.reload = 0; // reload nothing
				62	else if (optReload == 2) parts.reload = 2; // reload iterators
				63	else if (optReload == 1) parts.reload = 1; // reload colors
				64	else if (optReload <= 0) parts.reload = 3; // reload both
				65
				66	if (!mSmooth) {
				67	// we're not smoothing (just dithering), we never have to
				68	// reload the iterators
				69	parts.reload &= ~2;
				70	}
				71
				72	Scratch scratches(registerFile());
				73	const int t0 = (parts.reload & 1) ? scratches.obtain() : 0;
				74	const int t1 = (parts.reload & 2) ? scratches.obtain() : 0;
				75	for (int i=0 ; i<4 ; i++) {
				76	if (!mInfo[i].iterated)
				77	continue;
				78
				79	// this component exists in the destination and is not replaced
				80	// by a texture unit.
				81	const int c = (parts.reload & 1) ? t0 : obtainReg();
				82	if (i==0) CONTEXT_LOAD(c, iterators.ydady);
				83	if (i==1) CONTEXT_LOAD(c, iterators.ydrdy);
				84	if (i==2) CONTEXT_LOAD(c, iterators.ydgdy);
				85	if (i==3) CONTEXT_LOAD(c, iterators.ydbdy);
				86	parts.argb[i].reg = c;
				87
				88	if (mInfo[i].smooth) {
				89	parts.argb_dx[i].reg = (parts.reload & 2) ? t1 : obtainReg();
				90	const int dvdx = parts.argb_dx[i].reg;
				91	CONTEXT_LOAD(dvdx, generated_vars.argb[i].dx);
				92	MLA(AL, 0, c, x.reg, dvdx, c);
				93
				94	// adjust the color iterator to make sure it won't overflow
				95	if (!mAA) {
				96	// this is not needed when we're using anti-aliasing
				97	// because we will (have to) clamp the components
				98	// anyway.
				99	int end = scratches.obtain();
				100	MOV(AL, 0, end, reg_imm(parts.count.reg, LSR, 16));
				101	MLA(AL, 1, end, dvdx, end, c);
				102	SUB(MI, 0, c, c, end);
				103	BIC(AL, 0, c, c, reg_imm(c, ASR, 31));
				104	scratches.recycle(end);
				105	}
				106	}
				107
				108	if (parts.reload & 1) {
				109	CONTEXT_STORE(c, generated_vars.argb[i].c);
				110	}
				111	}
				112	} else {
				113	// We're not smoothed, so we can
				114	// just use a packed version of the color and extract the
				115	// components as needed (or not at all if we don't blend)
				116
				117	// figure out if we need the iterated color
				118	int load = 0;
				119	for (int i=0 ; i<4 ; i++) {
				120	component_info_t& info = mInfo[i];
				121	if ((info.inDest \|\| info.needed) && !info.replaced)
				122	load \|= 1;
				123	}
				124
				125	parts.iterated_packed = 1;
				126	parts.packed = (!mTextureMachine.mask && !mBlending
				127	&& !mFog && !mDithering);
				128	parts.reload = 0;
				129	if (load \|\| parts.packed) {
				130	if (mBlending \|\| mDithering \|\| mInfo[GGLFormat::ALPHA].needed) {
				131	comment("load initial iterated color (8888 packed)");
				132	parts.iterated.setTo(obtainReg(),
				133	&(c->formats[GGL_PIXEL_FORMAT_RGBA_8888]));
				134	CONTEXT_LOAD(parts.iterated.reg, packed8888);
				135	} else {
				136	comment("load initial iterated color (dest format packed)");
				137
				138	parts.iterated.setTo(obtainReg(), &mCbFormat);
				139
				140	// pre-mask the iterated color
				141	const int bits = parts.iterated.size();
				142	const uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1;
				143	uint32_t mask = 0;
				144	if (mMasking) {
				145	for (int i=0 ; i<4 ; i++) {
				146	const int component_mask = 1<<i;
				147	const int h = parts.iterated.format.c[i].h;
				148	const int l = parts.iterated.format.c[i].l;
				149	if (h && (!(mMasking & component_mask))) {
				150	mask \|= ((1<<(h-l))-1) << l;
				151	}
				152	}
				153	}
				154
				155	if (mMasking && ((mask & size)==0)) {
				156	// none of the components are present in the mask
				157	} else {
				158	CONTEXT_LOAD(parts.iterated.reg, packed);
				159	if (mCbFormat.size == 1) {
				160	AND(AL, 0, parts.iterated.reg,
				161	parts.iterated.reg, imm(0xFF));
				162	} else if (mCbFormat.size == 2) {
				163	MOV(AL, 0, parts.iterated.reg,
				164	reg_imm(parts.iterated.reg, LSR, 16));
				165	}
				166	}
				167
				168	// pre-mask the iterated color
				169	if (mMasking) {
				170	build_and_immediate(parts.iterated.reg, parts.iterated.reg,
				171	mask, bits);
				172	}
				173	}
				174	}
				175	}
				176	}
				177
				178	void GGLAssembler::build_iterated_color(
				179	component_t& fragment,
				180	const fragment_parts_t& parts,
				181	int component,
				182	Scratch& regs)
				183	{
				184	fragment.setTo( regs.obtain(), 0, 32, CORRUPTIBLE);
				185
				186	if (!mInfo[component].iterated)
				187	return;
				188
				189	if (parts.iterated_packed) {
				190	// iterated colors are packed, extract the one we need
				191	extract(fragment, parts.iterated, component);
				192	} else {
				193	fragment.h = GGL_COLOR_BITS;
				194	fragment.l = GGL_COLOR_BITS - 8;
				195	fragment.flags \|= CLEAR_LO;
				196	// iterated colors are held in their own register,
				197	// (smooth and/or dithering case)
				198	if (parts.reload==3) {
				199	// this implies mSmooth
				200	Scratch scratches(registerFile());
				201	int dx = scratches.obtain();
				202	CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c);
				203	CONTEXT_LOAD(dx, generated_vars.argb[component].dx);
				204	ADD(AL, 0, dx, fragment.reg, dx);
				205	CONTEXT_STORE(dx, generated_vars.argb[component].c);
				206	} else if (parts.reload & 1) {
				207	CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c);
				208	} else {
				209	// we don't reload, so simply rename the register and mark as
				210	// non CORRUPTIBLE so that the texture env or blending code
				211	// won't modify this (renamed) register
				212	regs.recycle(fragment.reg);
				213	fragment.reg = parts.argb[component].reg;
				214	fragment.flags &= ~CORRUPTIBLE;
				215	}
				216	if (mInfo[component].smooth && mAA) {
				217	// when using smooth shading AND anti-aliasing, we need to clamp
				218	// the iterators because there is always an extra pixel on the
				219	// edges, which most of the time will cause an overflow
				220	// (since technically its outside of the domain).
				221	BIC(AL, 0, fragment.reg, fragment.reg,
				222	reg_imm(fragment.reg, ASR, 31));
				223	component_sat(fragment);
				224	}
				225	}
				226	}
				227
				228	// ---------------------------------------------------------------------------
				229
				230	void GGLAssembler::decodeLogicOpNeeds(const needs_t& needs)
				231	{
				232	// gather some informations about the components we need to process...
				233	const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) \| GGL_CLEAR;
				234	switch(opcode) {
				235	case GGL_COPY:
				236	mLogicOp = 0;
				237	break;
				238	case GGL_CLEAR:
				239	case GGL_SET:
				240	mLogicOp = LOGIC_OP;
				241	break;
				242	case GGL_AND:
				243	case GGL_AND_REVERSE:
				244	case GGL_AND_INVERTED:
				245	case GGL_XOR:
				246	case GGL_OR:
				247	case GGL_NOR:
				248	case GGL_EQUIV:
				249	case GGL_OR_REVERSE:
				250	case GGL_OR_INVERTED:
				251	case GGL_NAND:
				252	mLogicOp = LOGIC_OP\|LOGIC_OP_SRC\|LOGIC_OP_DST;
				253	break;
				254	case GGL_NOOP:
				255	case GGL_INVERT:
				256	mLogicOp = LOGIC_OP\|LOGIC_OP_DST;
				257	break;
				258	case GGL_COPY_INVERTED:
				259	mLogicOp = LOGIC_OP\|LOGIC_OP_SRC;
				260	break;
				261	};
				262	}
				263
				264	void GGLAssembler::decodeTMUNeeds(const needs_t& needs, context_t const* c)
				265	{
				266	uint8_t replaced=0;
				267	mTextureMachine.mask = 0;
				268	mTextureMachine.activeUnits = 0;
				269	for (int i=GGL_TEXTURE_UNIT_COUNT-1 ; i>=0 ; i--) {
				270	texture_unit_t& tmu = mTextureMachine.tmu[i];
				271	if (replaced == 0xF) {
				272	// all components are replaced, skip this TMU.
				273	tmu.format_idx = 0;
				274	tmu.mask = 0;
				275	tmu.replaced = replaced;
				276	continue;
				277	}
				278	tmu.format_idx = GGL_READ_NEEDS(T_FORMAT, needs.t[i]);
				279	tmu.format = c->formats[tmu.format_idx];
				280	tmu.bits = tmu.format.size*8;
				281	tmu.swrap = GGL_READ_NEEDS(T_S_WRAP, needs.t[i]);
				282	tmu.twrap = GGL_READ_NEEDS(T_T_WRAP, needs.t[i]);
				283	tmu.env = ggl_needs_to_env(GGL_READ_NEEDS(T_ENV, needs.t[i]));
				284	tmu.pot = GGL_READ_NEEDS(T_POT, needs.t[i]);
				285	tmu.linear = GGL_READ_NEEDS(T_LINEAR, needs.t[i])
				286	&& tmu.format.size!=3; // XXX: only 8, 16 and 32 modes for now
				287
				288	// 5551 linear filtering is not supported
				289	if (tmu.format_idx == GGL_PIXEL_FORMAT_RGBA_5551)
				290	tmu.linear = 0;
				291
				292	tmu.mask = 0;
				293	tmu.replaced = replaced;
				294
				295	if (tmu.format_idx) {
				296	mTextureMachine.activeUnits++;
				297	if (tmu.format.c[0].h) tmu.mask \|= 0x1;
				298	if (tmu.format.c[1].h) tmu.mask \|= 0x2;
				299	if (tmu.format.c[2].h) tmu.mask \|= 0x4;
				300	if (tmu.format.c[3].h) tmu.mask \|= 0x8;
				301	if (tmu.env == GGL_REPLACE) {
				302	replaced \|= tmu.mask;
				303	} else if (tmu.env == GGL_DECAL) {
				304	if (!tmu.format.c[GGLFormat::ALPHA].h) {
				305	// if we don't have alpha, decal does nothing
				306	tmu.mask = 0;
				307	} else {
				308	// decal always ignores At
				309	tmu.mask &= ~(1<<GGLFormat::ALPHA);
				310	}
				311	}
				312	}
				313	mTextureMachine.mask \|= tmu.mask;
				314	//printf("%d: mask=%08lx, replaced=%08lx\n",
				315	// i, int(tmu.mask), int(tmu.replaced));
				316	}
				317	mTextureMachine.replaced = replaced;
				318	mTextureMachine.directTexture = 0;
				319	//printf("replaced=%08lx\n", mTextureMachine.replaced);
				320	}
				321
				322
				323	void GGLAssembler::init_textures(
				324	tex_coord_t* coords,
				325	const reg_t& x, const reg_t& y)
				326	{
				327	context_t const* c = mBuilderContext.c;
				328	const needs_t& needs = mBuilderContext.needs;
				329	int Rctx = mBuilderContext.Rctx;
				330	int Rx = x.reg;
				331	int Ry = y.reg;
				332
				333	if (mTextureMachine.mask) {
				334	comment("compute texture coordinates");
				335	}
				336
				337	// init texture coordinates for each tmu
				338	const int cb_format_idx = GGL_READ_NEEDS(CB_FORMAT, needs.n);
				339	const bool multiTexture = mTextureMachine.activeUnits > 1;
				340	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) {
				341	const texture_unit_t& tmu = mTextureMachine.tmu[i];
				342	if (tmu.format_idx == 0)
				343	continue;
				344	if ((tmu.swrap == GGL_NEEDS_WRAP_11) &&
				345	(tmu.twrap == GGL_NEEDS_WRAP_11))
				346	{
				347	// 1:1 texture
				348	pointer_t& txPtr = coords[i].ptr;
				349	txPtr.setTo(obtainReg(), tmu.bits);
				350	CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydsdy);
				351	ADD(AL, 0, Rx, Rx, reg_imm(txPtr.reg, ASR, 16)); // x += (s>>16)
				352	CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydtdy);
				353	ADD(AL, 0, Ry, Ry, reg_imm(txPtr.reg, ASR, 16)); // y += (t>>16)
				354	// merge base & offset
				355	CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].stride);
				356	SMLABB(AL, Rx, Ry, txPtr.reg, Rx); // x+y*stride
Ashok Bhat	bfc6dc4	2013-02-21 10:27:40 +0000	[diff] [blame]	357	CONTEXT_ADDR_LOAD(txPtr.reg, generated_vars.texture[i].data);
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	358	base_offset(txPtr, txPtr, Rx);
				359	} else {
				360	Scratch scratches(registerFile());
				361	reg_t& s = coords[i].s;
				362	reg_t& t = coords[i].t;
				363	// s = (x * dsdx)>>16 + ydsdy
				364	// s = (x * dsdx)>>16 + (y*dsdy)>>16 + s0
				365	// t = (x * dtdx)>>16 + ydtdy
				366	// t = (x * dtdx)>>16 + (y*dtdy)>>16 + t0
				367	s.setTo(obtainReg());
				368	t.setTo(obtainReg());
				369	const int need_w = GGL_READ_NEEDS(W, needs.n);
				370	if (need_w) {
				371	CONTEXT_LOAD(s.reg, state.texture[i].iterators.ydsdy);
				372	CONTEXT_LOAD(t.reg, state.texture[i].iterators.ydtdy);
				373	} else {
				374	int ydsdy = scratches.obtain();
				375	int ydtdy = scratches.obtain();
				376	CONTEXT_LOAD(s.reg, generated_vars.texture[i].dsdx);
				377	CONTEXT_LOAD(ydsdy, state.texture[i].iterators.ydsdy);
				378	CONTEXT_LOAD(t.reg, generated_vars.texture[i].dtdx);
				379	CONTEXT_LOAD(ydtdy, state.texture[i].iterators.ydtdy);
				380	MLA(AL, 0, s.reg, Rx, s.reg, ydsdy);
				381	MLA(AL, 0, t.reg, Rx, t.reg, ydtdy);
				382	}
				383
				384	if ((mOptLevel&1)==0) {
				385	CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]);
				386	CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]);
				387	recycleReg(s.reg);
				388	recycleReg(t.reg);
				389	}
				390	}
				391
				392	// direct texture?
				393	if (!multiTexture && !mBlending && !mDithering && !mFog &&
				394	cb_format_idx == tmu.format_idx && !tmu.linear &&
				395	mTextureMachine.replaced == tmu.mask)
				396	{
				397	mTextureMachine.directTexture = i + 1;
				398	}
				399	}
				400	}
				401
				402	void GGLAssembler::build_textures( fragment_parts_t& parts,
				403	Scratch& regs)
				404	{
				405	context_t const* c = mBuilderContext.c;
				406	const needs_t& needs = mBuilderContext.needs;
				407	int Rctx = mBuilderContext.Rctx;
				408
				409	// We don't have a way to spill registers automatically
				410	// spill depth and AA regs, when we know we may have to.
				411	// build the spill list...
				412	uint32_t spill_list = 0;
				413	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) {
				414	const texture_unit_t& tmu = mTextureMachine.tmu[i];
				415	if (tmu.format_idx == 0)
				416	continue;
				417	if (tmu.linear) {
				418	// we may run out of register if we have linear filtering
				419	// at 1 or 4 bytes / pixel on any texture unit.
				420	if (tmu.format.size == 1) {
				421	// if depth and AA enabled, we'll run out of 1 register
				422	if (parts.z.reg > 0 && parts.covPtr.reg > 0)
				423	spill_list \|= 1<<parts.covPtr.reg;
				424	}
				425	if (tmu.format.size == 4) {
				426	// if depth or AA enabled, we'll run out of 1 or 2 registers
				427	if (parts.z.reg > 0)
				428	spill_list \|= 1<<parts.z.reg;
				429	if (parts.covPtr.reg > 0)
				430	spill_list \|= 1<<parts.covPtr.reg;
				431	}
				432	}
				433	}
				434
				435	Spill spill(registerFile(), *this, spill_list);
				436
				437	const bool multiTexture = mTextureMachine.activeUnits > 1;
				438	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) {
				439	const texture_unit_t& tmu = mTextureMachine.tmu[i];
				440	if (tmu.format_idx == 0)
				441	continue;
				442
				443	pointer_t& txPtr = parts.coords[i].ptr;
				444	pixel_t& texel = parts.texel[i];
				445
				446	// repeat...
				447	if ((tmu.swrap == GGL_NEEDS_WRAP_11) &&
				448	(tmu.twrap == GGL_NEEDS_WRAP_11))
				449	{ // 1:1 textures
				450	comment("fetch texel");
				451	texel.setTo(regs.obtain(), &tmu.format);
				452	load(txPtr, texel, WRITE_BACK);
				453	} else {
				454	Scratch scratches(registerFile());
				455	reg_t& s = parts.coords[i].s;
				456	reg_t& t = parts.coords[i].t;
				457	if ((mOptLevel&1)==0) {
				458	comment("reload s/t (multitexture or linear filtering)");
				459	s.reg = scratches.obtain();
				460	t.reg = scratches.obtain();
				461	CONTEXT_LOAD(s.reg, generated_vars.texture[i].spill[0]);
				462	CONTEXT_LOAD(t.reg, generated_vars.texture[i].spill[1]);
				463	}
				464
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame]	465	if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS)
				466	return;
				467
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	468	comment("compute repeat/clamp");
				469	int u = scratches.obtain();
				470	int v = scratches.obtain();
				471	int width = scratches.obtain();
				472	int height = scratches.obtain();
				473	int U = 0;
				474	int V = 0;
				475
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame]	476	if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS)
				477	return;
				478
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	479	CONTEXT_LOAD(width, generated_vars.texture[i].width);
				480	CONTEXT_LOAD(height, generated_vars.texture[i].height);
				481
				482	int FRAC_BITS = 0;
				483	if (tmu.linear) {
				484	// linear interpolation
				485	if (tmu.format.size == 1) {
				486	// for 8-bits textures, we can afford
				487	// 7 bits of fractional precision at no
				488	// additional cost (we can't do 8 bits
				489	// because filter8 uses signed 16 bits muls)
				490	FRAC_BITS = 7;
				491	} else if (tmu.format.size == 2) {
				492	// filter16() is internally limited to 4 bits, so:
				493	// FRAC_BITS=2 generates less instructions,
				494	// FRAC_BITS=3,4,5 creates unpleasant artifacts,
				495	// FRAC_BITS=6+ looks good
				496	FRAC_BITS = 6;
				497	} else if (tmu.format.size == 4) {
				498	// filter32() is internally limited to 8 bits, so:
				499	// FRAC_BITS=4 looks good
				500	// FRAC_BITS=5+ looks better, but generates 3 extra ipp
				501	FRAC_BITS = 6;
				502	} else {
				503	// for all other cases we use 4 bits.
				504	FRAC_BITS = 4;
				505	}
				506	}
				507	wrapping(u, s.reg, width, tmu.swrap, FRAC_BITS);
				508	wrapping(v, t.reg, height, tmu.twrap, FRAC_BITS);
				509
				510	if (tmu.linear) {
				511	comment("compute linear filtering offsets");
				512	// pixel size scale
				513	const int shift = 31 - gglClz(tmu.format.size);
				514	U = scratches.obtain();
				515	V = scratches.obtain();
				516
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame]	517	if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS)
				518	return;
				519
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	520	// sample the texel center
				521	SUB(AL, 0, u, u, imm(1<<(FRAC_BITS-1)));
				522	SUB(AL, 0, v, v, imm(1<<(FRAC_BITS-1)));
				523
				524	// get the fractionnal part of U,V
				525	AND(AL, 0, U, u, imm((1<<FRAC_BITS)-1));
				526	AND(AL, 0, V, v, imm((1<<FRAC_BITS)-1));
				527
				528	// compute width-1 and height-1
				529	SUB(AL, 0, width, width, imm(1));
				530	SUB(AL, 0, height, height, imm(1));
				531
				532	// get the integer part of U,V and clamp/wrap
				533	// and compute offset to the next texel
				534	if (tmu.swrap == GGL_NEEDS_WRAP_REPEAT) {
				535	// u has already been REPEATed
				536	MOV(AL, 1, u, reg_imm(u, ASR, FRAC_BITS));
				537	MOV(MI, 0, u, width);
				538	CMP(AL, u, width);
				539	MOV(LT, 0, width, imm(1 << shift));
				540	if (shift)
				541	MOV(GE, 0, width, reg_imm(width, LSL, shift));
				542	RSB(GE, 0, width, width, imm(0));
				543	} else {
				544	// u has not been CLAMPed yet
				545	// algorithm:
				546	// if ((u>>4) >= width)
				547	// u = width<<4
				548	// width = 0
				549	// else
				550	// width = 1<<shift
				551	// u = u>>4; // get integer part
				552	// if (u<0)
				553	// u = 0
				554	// width = 0
				555	// generated_vars.rt = width
				556
				557	CMP(AL, width, reg_imm(u, ASR, FRAC_BITS));
				558	MOV(LE, 0, u, reg_imm(width, LSL, FRAC_BITS));
				559	MOV(LE, 0, width, imm(0));
				560	MOV(GT, 0, width, imm(1 << shift));
				561	MOV(AL, 1, u, reg_imm(u, ASR, FRAC_BITS));
				562	MOV(MI, 0, u, imm(0));
				563	MOV(MI, 0, width, imm(0));
				564	}
				565	CONTEXT_STORE(width, generated_vars.rt);
				566
				567	const int stride = width;
				568	CONTEXT_LOAD(stride, generated_vars.texture[i].stride);
				569	if (tmu.twrap == GGL_NEEDS_WRAP_REPEAT) {
				570	// v has already been REPEATed
				571	MOV(AL, 1, v, reg_imm(v, ASR, FRAC_BITS));
				572	MOV(MI, 0, v, height);
				573	CMP(AL, v, height);
				574	MOV(LT, 0, height, imm(1 << shift));
				575	if (shift)
				576	MOV(GE, 0, height, reg_imm(height, LSL, shift));
				577	RSB(GE, 0, height, height, imm(0));
				578	MUL(AL, 0, height, stride, height);
				579	} else {
Martyn Capewell	96dbb4f	2009-12-07 13:59:59 +0000	[diff] [blame]	580	// v has not been CLAMPed yet
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	581	CMP(AL, height, reg_imm(v, ASR, FRAC_BITS));
				582	MOV(LE, 0, v, reg_imm(height, LSL, FRAC_BITS));
				583	MOV(LE, 0, height, imm(0));
				584	if (shift) {
				585	MOV(GT, 0, height, reg_imm(stride, LSL, shift));
				586	} else {
				587	MOV(GT, 0, height, stride);
				588	}
				589	MOV(AL, 1, v, reg_imm(v, ASR, FRAC_BITS));
				590	MOV(MI, 0, v, imm(0));
				591	MOV(MI, 0, height, imm(0));
				592	}
				593	CONTEXT_STORE(height, generated_vars.lb);
				594	}
				595
				596	scratches.recycle(width);
				597	scratches.recycle(height);
				598
				599	// iterate texture coordinates...
				600	comment("iterate s,t");
				601	int dsdx = scratches.obtain();
				602	int dtdx = scratches.obtain();
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame]	603
				604	if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS)
				605	return;
				606
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	607	CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx);
				608	CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx);
				609	ADD(AL, 0, s.reg, s.reg, dsdx);
				610	ADD(AL, 0, t.reg, t.reg, dtdx);
				611	if ((mOptLevel&1)==0) {
				612	CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]);
				613	CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]);
				614	scratches.recycle(s.reg);
				615	scratches.recycle(t.reg);
				616	}
				617	scratches.recycle(dsdx);
				618	scratches.recycle(dtdx);
				619
				620	// merge base & offset...
				621	comment("merge base & offset");
				622	texel.setTo(regs.obtain(), &tmu.format);
				623	txPtr.setTo(texel.reg, tmu.bits);
				624	int stride = scratches.obtain();
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame]	625
				626	if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS)
				627	return;
				628
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	629	CONTEXT_LOAD(stride, generated_vars.texture[i].stride);
Ashok Bhat	bfc6dc4	2013-02-21 10:27:40 +0000	[diff] [blame]	630	CONTEXT_ADDR_LOAD(txPtr.reg, generated_vars.texture[i].data);
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	631	SMLABB(AL, u, v, stride, u); // u+v*stride
				632	base_offset(txPtr, txPtr, u);
				633
				634	// load texel
				635	if (!tmu.linear) {
				636	comment("fetch texel");
				637	load(txPtr, texel, 0);
				638	} else {
				639	// recycle registers we don't need anymore
				640	scratches.recycle(u);
				641	scratches.recycle(v);
				642	scratches.recycle(stride);
				643
				644	comment("fetch texel, bilinear");
				645	switch (tmu.format.size) {
				646	case 1: filter8(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break;
				647	case 2: filter16(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break;
				648	case 3: filter24(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break;
				649	case 4: filter32(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break;
				650	}
				651	}
				652	}
				653	}
				654	}
				655
				656	void GGLAssembler::build_iterate_texture_coordinates(
				657	const fragment_parts_t& parts)
				658	{
				659	const bool multiTexture = mTextureMachine.activeUnits > 1;
				660	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) {
				661	const texture_unit_t& tmu = mTextureMachine.tmu[i];
				662	if (tmu.format_idx == 0)
				663	continue;
				664
				665	if ((tmu.swrap == GGL_NEEDS_WRAP_11) &&
				666	(tmu.twrap == GGL_NEEDS_WRAP_11))
				667	{ // 1:1 textures
				668	const pointer_t& txPtr = parts.coords[i].ptr;
				669	ADD(AL, 0, txPtr.reg, txPtr.reg, imm(txPtr.size>>3));
				670	} else {
				671	Scratch scratches(registerFile());
				672	int s = parts.coords[i].s.reg;
				673	int t = parts.coords[i].t.reg;
				674	if ((mOptLevel&1)==0) {
				675	s = scratches.obtain();
				676	t = scratches.obtain();
				677	CONTEXT_LOAD(s, generated_vars.texture[i].spill[0]);
				678	CONTEXT_LOAD(t, generated_vars.texture[i].spill[1]);
				679	}
				680	int dsdx = scratches.obtain();
				681	int dtdx = scratches.obtain();
				682	CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx);
				683	CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx);
				684	ADD(AL, 0, s, s, dsdx);
				685	ADD(AL, 0, t, t, dtdx);
				686	if ((mOptLevel&1)==0) {
				687	CONTEXT_STORE(s, generated_vars.texture[i].spill[0]);
				688	CONTEXT_STORE(t, generated_vars.texture[i].spill[1]);
				689	}
				690	}
				691	}
				692	}
				693
				694	void GGLAssembler::filter8(
Ashok Bhat	3078b13	2014-02-17 15:15:46 +0000	[diff] [blame]	695	const fragment_parts_t& /parts/,
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	696	pixel_t& texel, const texture_unit_t& tmu,
				697	int U, int V, pointer_t& txPtr,
				698	int FRAC_BITS)
				699	{
				700	if (tmu.format.components != GGL_ALPHA &&
				701	tmu.format.components != GGL_LUMINANCE)
				702	{
				703	// this is a packed format, and we don't support
				704	// linear filtering (it's probably RGB 332)
				705	// Should not happen with OpenGL\|ES
				706	LDRB(AL, texel.reg, txPtr.reg);
				707	return;
				708	}
				709
				710	// ------------------------
				711	// about ~22 cycles / pixel
				712	Scratch scratches(registerFile());
				713
				714	int pixel= scratches.obtain();
				715	int d = scratches.obtain();
				716	int u = scratches.obtain();
				717	int k = scratches.obtain();
				718	int rt = scratches.obtain();
				719	int lb = scratches.obtain();
				720
				721	// RB -> U * V
				722
				723	CONTEXT_LOAD(rt, generated_vars.rt);
				724	CONTEXT_LOAD(lb, generated_vars.lb);
				725
				726	int offset = pixel;
				727	ADD(AL, 0, offset, lb, rt);
				728	LDRB(AL, pixel, txPtr.reg, reg_scale_pre(offset));
				729	SMULBB(AL, u, U, V);
				730	SMULBB(AL, d, pixel, u);
				731	RSB(AL, 0, k, u, imm(1<<(FRAC_BITS*2)));
				732
				733	// LB -> (1-U) * V
				734	RSB(AL, 0, U, U, imm(1<<FRAC_BITS));
				735	LDRB(AL, pixel, txPtr.reg, reg_scale_pre(lb));
				736	SMULBB(AL, u, U, V);
				737	SMLABB(AL, d, pixel, u, d);
				738	SUB(AL, 0, k, k, u);
				739
				740	// LT -> (1-U)*(1-V)
				741	RSB(AL, 0, V, V, imm(1<<FRAC_BITS));
				742	LDRB(AL, pixel, txPtr.reg);
				743	SMULBB(AL, u, U, V);
				744	SMLABB(AL, d, pixel, u, d);
				745
				746	// RT -> U*(1-V)
				747	LDRB(AL, pixel, txPtr.reg, reg_scale_pre(rt));
				748	SUB(AL, 0, u, k, u);
				749	SMLABB(AL, texel.reg, pixel, u, d);
				750
				751	for (int i=0 ; i<4 ; i++) {
				752	if (!texel.format.c[i].h) continue;
				753	texel.format.c[i].h = FRAC_BITS*2+8;
				754	texel.format.c[i].l = FRAC_BITS*2; // keeping 8 bits in enough
				755	}
				756	texel.format.size = 4;
				757	texel.format.bitsPerPixel = 32;
				758	texel.flags \|= CLEAR_LO;
				759	}
				760
				761	void GGLAssembler::filter16(
Ashok Bhat	3078b13	2014-02-17 15:15:46 +0000	[diff] [blame]	762	const fragment_parts_t& /parts/,
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	763	pixel_t& texel, const texture_unit_t& tmu,
				764	int U, int V, pointer_t& txPtr,
				765	int FRAC_BITS)
				766	{
				767	// compute the mask
				768	// XXX: it would be nice if the mask below could be computed
				769	// automatically.
				770	uint32_t mask = 0;
				771	int shift = 0;
				772	int prec = 0;
				773	switch (tmu.format_idx) {
				774	case GGL_PIXEL_FORMAT_RGB_565:
				775	// source: 00000ggg.ggg00000 \| rrrrr000.000bbbbb
				776	// result: gggggggg.gggrrrrr \| rrrrr0bb.bbbbbbbb
				777	mask = 0x07E0F81F;
				778	shift = 16;
				779	prec = 5;
				780	break;
				781	case GGL_PIXEL_FORMAT_RGBA_4444:
				782	// 0000,1111,0000,1111 \| 0000,1111,0000,1111
				783	mask = 0x0F0F0F0F;
				784	shift = 12;
				785	prec = 4;
				786	break;
				787	case GGL_PIXEL_FORMAT_LA_88:
				788	// 0000,0000,1111,1111 \| 0000,0000,1111,1111
				789	// AALL -> 00AA \| 00LL
				790	mask = 0x00FF00FF;
				791	shift = 8;
				792	prec = 8;
				793	break;
				794	default:
				795	// unsupported format, do something sensical...
Steve Block	01dda20	2012-01-06 14:13:42 +0000	[diff] [blame]	796	ALOGE("Unsupported 16-bits texture format (%d)", tmu.format_idx);
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	797	LDRH(AL, texel.reg, txPtr.reg);
				798	return;
				799	}
				800
				801	const int adjust = FRAC_BITS*2 - prec;
				802	const int round = 0;
				803
				804	// update the texel format
				805	texel.format.size = 4;
				806	texel.format.bitsPerPixel = 32;
				807	texel.flags \|= CLEAR_HI\|CLEAR_LO;
				808	for (int i=0 ; i<4 ; i++) {
				809	if (!texel.format.c[i].h) continue;
				810	const uint32_t offset = (mask & tmu.format.mask(i)) ? 0 : shift;
				811	texel.format.c[i].h = tmu.format.c[i].h + offset + prec;
				812	texel.format.c[i].l = texel.format.c[i].h - (tmu.format.bits(i) + prec);
				813	}
				814
				815	// ------------------------
				816	// about ~40 cycles / pixel
				817	Scratch scratches(registerFile());
				818
				819	int pixel= scratches.obtain();
				820	int d = scratches.obtain();
				821	int u = scratches.obtain();
				822	int k = scratches.obtain();
				823
				824	// RB -> U * V
				825	int offset = pixel;
				826	CONTEXT_LOAD(offset, generated_vars.rt);
				827	CONTEXT_LOAD(u, generated_vars.lb);
				828	ADD(AL, 0, offset, offset, u);
				829
				830	LDRH(AL, pixel, txPtr.reg, reg_pre(offset));
				831	SMULBB(AL, u, U, V);
				832	ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift));
				833	build_and_immediate(pixel, pixel, mask, 32);
				834	if (adjust) {
				835	if (round)
				836	ADD(AL, 0, u, u, imm(1<<(adjust-1)));
				837	MOV(AL, 0, u, reg_imm(u, LSR, adjust));
				838	}
				839	MUL(AL, 0, d, pixel, u);
				840	RSB(AL, 0, k, u, imm(1<<prec));
				841
				842	// LB -> (1-U) * V
				843	CONTEXT_LOAD(offset, generated_vars.lb);
				844	RSB(AL, 0, U, U, imm(1<<FRAC_BITS));
				845	LDRH(AL, pixel, txPtr.reg, reg_pre(offset));
				846	SMULBB(AL, u, U, V);
				847	ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift));
				848	build_and_immediate(pixel, pixel, mask, 32);
				849	if (adjust) {
				850	if (round)
				851	ADD(AL, 0, u, u, imm(1<<(adjust-1)));
				852	MOV(AL, 0, u, reg_imm(u, LSR, adjust));
				853	}
				854	MLA(AL, 0, d, pixel, u, d);
				855	SUB(AL, 0, k, k, u);
				856
				857	// LT -> (1-U)*(1-V)
				858	RSB(AL, 0, V, V, imm(1<<FRAC_BITS));
				859	LDRH(AL, pixel, txPtr.reg);
				860	SMULBB(AL, u, U, V);
				861	ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift));
				862	build_and_immediate(pixel, pixel, mask, 32);
				863	if (adjust) {
				864	if (round)
				865	ADD(AL, 0, u, u, imm(1<<(adjust-1)));
				866	MOV(AL, 0, u, reg_imm(u, LSR, adjust));
				867	}
				868	MLA(AL, 0, d, pixel, u, d);
				869
				870	// RT -> U*(1-V)
				871	CONTEXT_LOAD(offset, generated_vars.rt);
				872	LDRH(AL, pixel, txPtr.reg, reg_pre(offset));
				873	SUB(AL, 0, u, k, u);
				874	ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift));
				875	build_and_immediate(pixel, pixel, mask, 32);
				876	MLA(AL, 0, texel.reg, pixel, u, d);
				877	}
				878
				879	void GGLAssembler::filter24(
Ashok Bhat	3078b13	2014-02-17 15:15:46 +0000	[diff] [blame]	880	const fragment_parts_t& /parts/,
				881	pixel_t& texel, const texture_unit_t& /tmu/,
				882	int /U/, int /V/, pointer_t& txPtr,
				883	int /FRAC_BITS/)
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	884	{
				885	// not supported yet (currently disabled)
				886	load(txPtr, texel, 0);
				887	}
				888
				889	void GGLAssembler::filter32(
Ashok Bhat	3078b13	2014-02-17 15:15:46 +0000	[diff] [blame]	890	const fragment_parts_t& /parts/,
				891	pixel_t& texel, const texture_unit_t& /tmu/,
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	892	int U, int V, pointer_t& txPtr,
				893	int FRAC_BITS)
				894	{
				895	const int adjust = FRAC_BITS*2 - 8;
				896	const int round = 0;
				897
				898	// ------------------------
				899	// about ~38 cycles / pixel
				900	Scratch scratches(registerFile());
				901
				902	int pixel= scratches.obtain();
				903	int dh = scratches.obtain();
				904	int u = scratches.obtain();
				905	int k = scratches.obtain();
				906
				907	int temp = scratches.obtain();
				908	int dl = scratches.obtain();
				909	int mask = scratches.obtain();
				910
				911	MOV(AL, 0, mask, imm(0xFF));
				912	ORR(AL, 0, mask, mask, imm(0xFF0000));
				913
				914	// RB -> U * V
				915	int offset = pixel;
				916	CONTEXT_LOAD(offset, generated_vars.rt);
				917	CONTEXT_LOAD(u, generated_vars.lb);
				918	ADD(AL, 0, offset, offset, u);
				919
				920	LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset));
				921	SMULBB(AL, u, U, V);
				922	AND(AL, 0, temp, mask, pixel);
				923	if (adjust) {
				924	if (round)
				925	ADD(AL, 0, u, u, imm(1<<(adjust-1)));
				926	MOV(AL, 0, u, reg_imm(u, LSR, adjust));
				927	}
				928	MUL(AL, 0, dh, temp, u);
				929	AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8));
				930	MUL(AL, 0, dl, temp, u);
				931	RSB(AL, 0, k, u, imm(0x100));
				932
				933	// LB -> (1-U) * V
				934	CONTEXT_LOAD(offset, generated_vars.lb);
				935	RSB(AL, 0, U, U, imm(1<<FRAC_BITS));
				936	LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset));
				937	SMULBB(AL, u, U, V);
				938	AND(AL, 0, temp, mask, pixel);
				939	if (adjust) {
				940	if (round)
				941	ADD(AL, 0, u, u, imm(1<<(adjust-1)));
				942	MOV(AL, 0, u, reg_imm(u, LSR, adjust));
				943	}
				944	MLA(AL, 0, dh, temp, u, dh);
				945	AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8));
				946	MLA(AL, 0, dl, temp, u, dl);
				947	SUB(AL, 0, k, k, u);
				948
				949	// LT -> (1-U)*(1-V)
				950	RSB(AL, 0, V, V, imm(1<<FRAC_BITS));
				951	LDR(AL, pixel, txPtr.reg);
				952	SMULBB(AL, u, U, V);
				953	AND(AL, 0, temp, mask, pixel);
				954	if (adjust) {
				955	if (round)
				956	ADD(AL, 0, u, u, imm(1<<(adjust-1)));
				957	MOV(AL, 0, u, reg_imm(u, LSR, adjust));
				958	}
				959	MLA(AL, 0, dh, temp, u, dh);
				960	AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8));
				961	MLA(AL, 0, dl, temp, u, dl);
				962
				963	// RT -> U*(1-V)
				964	CONTEXT_LOAD(offset, generated_vars.rt);
				965	LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset));
				966	SUB(AL, 0, u, k, u);
				967	AND(AL, 0, temp, mask, pixel);
				968	MLA(AL, 0, dh, temp, u, dh);
				969	AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8));
				970	MLA(AL, 0, dl, temp, u, dl);
				971
				972	AND(AL, 0, dh, mask, reg_imm(dh, LSR, 8));
				973	AND(AL, 0, dl, dl, reg_imm(mask, LSL, 8));
				974	ORR(AL, 0, texel.reg, dh, dl);
				975	}
				976
				977	void GGLAssembler::build_texture_environment(
				978	component_t& fragment,
				979	const fragment_parts_t& parts,
				980	int component,
				981	Scratch& regs)
				982	{
				983	const uint32_t component_mask = 1<<component;
				984	const bool multiTexture = mTextureMachine.activeUnits > 1;
				985	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
				986	texture_unit_t& tmu = mTextureMachine.tmu[i];
				987
				988	if (tmu.mask & component_mask) {
				989	// replace or modulate with this texture
				990	if ((tmu.replaced & component_mask) == 0) {
				991	// not replaced by a later tmu...
				992
				993	Scratch scratches(registerFile());
				994	pixel_t texel(parts.texel[i]);
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame]	995
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	996	if (multiTexture &&
				997	tmu.swrap == GGL_NEEDS_WRAP_11 &&
				998	tmu.twrap == GGL_NEEDS_WRAP_11)
				999	{
				1000	texel.reg = scratches.obtain();
				1001	texel.flags \|= CORRUPTIBLE;
				1002	comment("fetch texel (multitexture 1:1)");
				1003	load(parts.coords[i].ptr, texel, WRITE_BACK);
				1004	}
				1005
				1006	component_t incoming(fragment);
				1007	modify(fragment, regs);
				1008
				1009	switch (tmu.env) {
				1010	case GGL_REPLACE:
				1011	extract(fragment, texel, component);
				1012	break;
				1013	case GGL_MODULATE:
				1014	modulate(fragment, incoming, texel, component);
				1015	break;
				1016	case GGL_DECAL:
				1017	decal(fragment, incoming, texel, component);
				1018	break;
				1019	case GGL_BLEND:
				1020	blend(fragment, incoming, texel, component, i);
				1021	break;
The Android Open Source Project	35237d1	2008-12-17 18:08:08 -0800	[diff] [blame]	1022	case GGL_ADD:
				1023	add(fragment, incoming, texel, component);
				1024	break;
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	1025	}
				1026	}
				1027	}
				1028	}
				1029	}
				1030
				1031	// ---------------------------------------------------------------------------
				1032
				1033	void GGLAssembler::wrapping(
				1034	int d,
				1035	int coord, int size,
				1036	int tx_wrap, int tx_linear)
				1037	{
				1038	// notes:
				1039	// if tx_linear is set, we need 4 extra bits of precision on the result
				1040	// SMULL/UMULL is 3 cycles
				1041	Scratch scratches(registerFile());
				1042	int c = coord;
				1043	if (tx_wrap == GGL_NEEDS_WRAP_REPEAT) {
				1044	// UMULL takes 4 cycles (interlocked), and we can get away with
				1045	// 2 cycles using SMULWB, but we're loosing 16 bits of precision
				1046	// out of 32 (this is not a problem because the iterator keeps
				1047	// its full precision)
				1048	// UMULL(AL, 0, size, d, c, size);
				1049	// note: we can't use SMULTB because it's signed.
				1050	MOV(AL, 0, d, reg_imm(c, LSR, 16-tx_linear));
				1051	SMULWB(AL, d, d, size);
				1052	} else if (tx_wrap == GGL_NEEDS_WRAP_CLAMP_TO_EDGE) {
				1053	if (tx_linear) {
				1054	// 1 cycle
				1055	MOV(AL, 0, d, reg_imm(coord, ASR, 16-tx_linear));
				1056	} else {
				1057	// 4 cycles (common case)
				1058	MOV(AL, 0, d, reg_imm(coord, ASR, 16));
				1059	BIC(AL, 0, d, d, reg_imm(d, ASR, 31));
				1060	CMP(AL, d, size);
				1061	SUB(GE, 0, d, size, imm(1));
				1062	}
				1063	}
				1064	}
				1065
				1066	// ---------------------------------------------------------------------------
				1067
				1068	void GGLAssembler::modulate(
				1069	component_t& dest,
				1070	const component_t& incoming,
				1071	const pixel_t& incomingTexel, int component)
				1072	{
				1073	Scratch locals(registerFile());
				1074	integer_t texel(locals.obtain(), 32, CORRUPTIBLE);
				1075	extract(texel, incomingTexel, component);
				1076
				1077	const int Nt = texel.size();
				1078	// Nt should always be less than 10 bits because it comes
				1079	// from the TMU.
				1080
				1081	int Ni = incoming.size();
				1082	// Ni could be big because it comes from previous MODULATEs
				1083
				1084	if (Nt == 1) {
				1085	// texel acts as a bit-mask
				1086	// dest = incoming & ((texel << incoming.h)-texel)
				1087	RSB(AL, 0, dest.reg, texel.reg, reg_imm(texel.reg, LSL, incoming.h));
				1088	AND(AL, 0, dest.reg, dest.reg, incoming.reg);
				1089	dest.l = incoming.l;
				1090	dest.h = incoming.h;
				1091	dest.flags \|= (incoming.flags & CLEAR_LO);
				1092	} else if (Ni == 1) {
				1093	MOV(AL, 0, dest.reg, reg_imm(incoming.reg, LSL, 31-incoming.h));
				1094	AND(AL, 0, dest.reg, texel.reg, reg_imm(dest.reg, ASR, 31));
				1095	dest.l = 0;
				1096	dest.h = Nt;
				1097	} else {
				1098	int inReg = incoming.reg;
				1099	int shift = incoming.l;
				1100	if ((Nt + Ni) > 32) {
				1101	// we will overflow, reduce the precision of Ni to 8 bits
				1102	// (Note Nt cannot be more than 10 bits which happens with
				1103	// 565 textures and GGL_LINEAR)
				1104	shift += Ni-8;
				1105	Ni = 8;
				1106	}
				1107
				1108	// modulate by the component with the lowest precision
				1109	if (Nt >= Ni) {
				1110	if (shift) {
				1111	// XXX: we should be able to avoid this shift
				1112	// when shift==16 && Nt<16 && Ni<16, in which
				1113	// we could use SMULBT below.
				1114	MOV(AL, 0, dest.reg, reg_imm(inReg, LSR, shift));
				1115	inReg = dest.reg;
				1116	shift = 0;
				1117	}
				1118	// operation: (Cf*Ct)/((1<<Ni)-1)
				1119	// approximated with: Cf*(Ct + Ct>>(Ni-1))>>Ni
				1120	// this operation doesn't change texel's size
				1121	ADD(AL, 0, dest.reg, inReg, reg_imm(inReg, LSR, Ni-1));
				1122	if (Nt<16 && Ni<16) SMULBB(AL, dest.reg, texel.reg, dest.reg);
				1123	else MUL(AL, 0, dest.reg, texel.reg, dest.reg);
				1124	dest.l = Ni;
				1125	dest.h = Nt + Ni;
				1126	} else {
				1127	if (shift && (shift != 16)) {
				1128	// if shift==16, we can use 16-bits mul instructions later
				1129	MOV(AL, 0, dest.reg, reg_imm(inReg, LSR, shift));
				1130	inReg = dest.reg;
				1131	shift = 0;
				1132	}
				1133	// operation: (Cf*Ct)/((1<<Nt)-1)
				1134	// approximated with: Ct*(Cf + Cf>>(Nt-1))>>Nt
				1135	// this operation doesn't change incoming's size
				1136	Scratch scratches(registerFile());
				1137	int t = (texel.flags & CORRUPTIBLE) ? texel.reg : dest.reg;
				1138	if (t == inReg)
				1139	t = scratches.obtain();
				1140	ADD(AL, 0, t, texel.reg, reg_imm(texel.reg, LSR, Nt-1));
				1141	if (Nt<16 && Ni<16) {
				1142	if (shift==16) SMULBT(AL, dest.reg, t, inReg);
				1143	else SMULBB(AL, dest.reg, t, inReg);
				1144	} else MUL(AL, 0, dest.reg, t, inReg);
				1145	dest.l = Nt;
				1146	dest.h = Nt + Ni;
				1147	}
				1148
				1149	// low bits are not valid
				1150	dest.flags \|= CLEAR_LO;
				1151
				1152	// no need to keep more than 8 bits/component
				1153	if (dest.size() > 8)
				1154	dest.l = dest.h-8;
				1155	}
				1156	}
				1157
				1158	void GGLAssembler::decal(
				1159	component_t& dest,
				1160	const component_t& incoming,
				1161	const pixel_t& incomingTexel, int component)
				1162	{
				1163	// RGBA:
				1164	// Cv = Cf(1 - At) + CtAt = Cf + (Ct - Cf)*At
				1165	// Av = Af
				1166	Scratch locals(registerFile());
				1167	integer_t texel(locals.obtain(), 32, CORRUPTIBLE);
				1168	integer_t factor(locals.obtain(), 32, CORRUPTIBLE);
				1169	extract(texel, incomingTexel, component);
				1170	extract(factor, incomingTexel, GGLFormat::ALPHA);
				1171
				1172	// no need to keep more than 8-bits for decal
				1173	int Ni = incoming.size();
				1174	int shift = incoming.l;
				1175	if (Ni > 8) {
				1176	shift += Ni-8;
				1177	Ni = 8;
				1178	}
				1179	integer_t incomingNorm(incoming.reg, Ni, incoming.flags);
				1180	if (shift) {
				1181	MOV(AL, 0, dest.reg, reg_imm(incomingNorm.reg, LSR, shift));
				1182	incomingNorm.reg = dest.reg;
				1183	incomingNorm.flags \|= CORRUPTIBLE;
				1184	}
				1185	ADD(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, LSR, factor.s-1));
				1186	build_blendOneMinusFF(dest, factor, incomingNorm, texel);
				1187	}
				1188
				1189	void GGLAssembler::blend(
				1190	component_t& dest,
				1191	const component_t& incoming,
				1192	const pixel_t& incomingTexel, int component, int tmu)
				1193	{
				1194	// RGBA:
				1195	// Cv = (1 - Ct)Cf + CtCc = Cf + (Cc - Cf)*Ct
				1196	// Av = At*Af
				1197
				1198	if (component == GGLFormat::ALPHA) {
				1199	modulate(dest, incoming, incomingTexel, component);
				1200	return;
				1201	}
				1202
				1203	Scratch locals(registerFile());
				1204	integer_t color(locals.obtain(), 8, CORRUPTIBLE);
				1205	integer_t factor(locals.obtain(), 32, CORRUPTIBLE);
				1206	LDRB(AL, color.reg, mBuilderContext.Rctx,
				1207	immed12_pre(GGL_OFFSETOF(state.texture[tmu].env_color[component])));
				1208	extract(factor, incomingTexel, component);
				1209
				1210	// no need to keep more than 8-bits for blend
				1211	int Ni = incoming.size();
				1212	int shift = incoming.l;
				1213	if (Ni > 8) {
				1214	shift += Ni-8;
				1215	Ni = 8;
				1216	}
				1217	integer_t incomingNorm(incoming.reg, Ni, incoming.flags);
				1218	if (shift) {
				1219	MOV(AL, 0, dest.reg, reg_imm(incomingNorm.reg, LSR, shift));
				1220	incomingNorm.reg = dest.reg;
				1221	incomingNorm.flags \|= CORRUPTIBLE;
				1222	}
				1223	ADD(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, LSR, factor.s-1));
				1224	build_blendOneMinusFF(dest, factor, incomingNorm, color);
				1225	}
				1226
The Android Open Source Project	35237d1	2008-12-17 18:08:08 -0800	[diff] [blame]	1227	void GGLAssembler::add(
				1228	component_t& dest,
				1229	const component_t& incoming,
				1230	const pixel_t& incomingTexel, int component)
				1231	{
				1232	// RGBA:
				1233	// Cv = Cf + Ct;
				1234	Scratch locals(registerFile());
				1235
				1236	component_t incomingTemp(incoming);
				1237
				1238	// use "dest" as a temporary for extracting the texel, unless "dest"
				1239	// overlaps "incoming".
				1240	integer_t texel(dest.reg, 32, CORRUPTIBLE);
				1241	if (dest.reg == incomingTemp.reg)
				1242	texel.reg = locals.obtain();
				1243	extract(texel, incomingTexel, component);
				1244
				1245	if (texel.s < incomingTemp.size()) {
				1246	expand(texel, texel, incomingTemp.size());
				1247	} else if (texel.s > incomingTemp.size()) {
				1248	if (incomingTemp.flags & CORRUPTIBLE) {
				1249	expand(incomingTemp, incomingTemp, texel.s);
				1250	} else {
				1251	incomingTemp.reg = locals.obtain();
				1252	expand(incomingTemp, incoming, texel.s);
				1253	}
				1254	}
				1255
				1256	if (incomingTemp.l) {
				1257	ADD(AL, 0, dest.reg, texel.reg,
				1258	reg_imm(incomingTemp.reg, LSR, incomingTemp.l));
				1259	} else {
				1260	ADD(AL, 0, dest.reg, texel.reg, incomingTemp.reg);
				1261	}
				1262	dest.l = 0;
				1263	dest.h = texel.size();
				1264	component_sat(dest);
				1265	}
				1266
The Android Open Source Project	4f6e8d7	2008-10-21 07:00:00 -0700	[diff] [blame]	1267	// ----------------------------------------------------------------------------
				1268
				1269	}; // namespace android
				1270