Blame - opengl/libagl/primitives.cpp - android_frameworks_native

blob: f164c02eea34924fb4c5dee81ac6eb1a18cd90cb [file] [log] [blame]

The Android Open Source Project	edbf3b6	2009-03-03 19:31:44 -0800	[diff] [blame]	1	/* libs/opengles/primitives.cpp
				2	**
				3	** Copyright 2006, The Android Open Source Project
				4	**
				5	** Licensed under the Apache License, Version 2.0 (the "License");
				6	** you may not use this file except in compliance with the License.
				7	** You may obtain a copy of the License at
				8	**
				9	** http://www.apache.org/licenses/LICENSE-2.0
				10	**
				11	** Unless required by applicable law or agreed to in writing, software
				12	** distributed under the License is distributed on an "AS IS" BASIS,
				13	** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	** See the License for the specific language governing permissions and
				15	** limitations under the License.
				16	*/
				17
				18	#include <stdio.h>
				19	#include <stdlib.h>
				20	#include <math.h>
				21
				22	#include "context.h"
				23	#include "primitives.h"
				24	#include "light.h"
				25	#include "matrix.h"
				26	#include "vertex.h"
				27	#include "fp.h"
				28	#include "TextureObjectManager.h"
				29
				30	extern "C" void iterators0032(const void* that,
				31	int32_t* it, int32_t c0, int32_t c1, int32_t c2);
				32
				33	namespace android {
				34
				35	// ----------------------------------------------------------------------------
				36
				37	static void primitive_point(ogles_context_t* c, vertex_t* v);
				38	static void primitive_line(ogles_context_t* c, vertex_t* v0, vertex_t* v1);
				39	static void primitive_clip_triangle(ogles_context_t* c,
				40	vertex_t* v0, vertex_t* v1, vertex_t* v2);
				41
				42	static void primitive_nop_point(ogles_context_t* c, vertex_t* v);
				43	static void primitive_nop_line(ogles_context_t* c, vertex_t* v0, vertex_t* v1);
				44	static void primitive_nop_triangle(ogles_context_t* c,
				45	vertex_t* v0, vertex_t* v1, vertex_t* v2);
				46
				47	static inline bool cull_triangle(ogles_context_t* c,
				48	vertex_t* v0, vertex_t* v1, vertex_t* v2);
				49
				50	static void lerp_triangle(ogles_context_t* c,
				51	vertex_t* v0, vertex_t* v1, vertex_t* v2);
				52
				53	static void lerp_texcoords(ogles_context_t* c,
				54	vertex_t* v0, vertex_t* v1, vertex_t* v2);
				55
				56	static void lerp_texcoords_w(ogles_context_t* c,
				57	vertex_t* v0, vertex_t* v1, vertex_t* v2);
				58
				59	static void triangle(ogles_context_t* c,
				60	vertex_t* v0, vertex_t* v1, vertex_t* v2);
				61
				62	static void clip_triangle(ogles_context_t* c,
				63	vertex_t* v0, vertex_t* v1, vertex_t* v2);
				64
				65	static unsigned int clip_line(ogles_context_t* c,
				66	vertex_t* s, vertex_t* p);
				67
				68	// ----------------------------------------------------------------------------
				69	#if 0
				70	#pragma mark -
				71	#endif
				72
				73	static void lightTriangleDarkSmooth(ogles_context_t* c,
				74	vertex_t* v0, vertex_t* v1, vertex_t* v2)
				75	{
				76	if (!(v0->flags & vertex_t::LIT)) {
				77	v0->flags \|= vertex_t::LIT;
				78	const GLvoid* cp = c->arrays.color.element(
				79	v0->index & vertex_cache_t::INDEX_MASK);
				80	c->arrays.color.fetch(c, v0->color.v, cp);
				81	}
				82	if (!(v1->flags & vertex_t::LIT)) {
				83	v1->flags \|= vertex_t::LIT;
				84	const GLvoid* cp = c->arrays.color.element(
				85	v1->index & vertex_cache_t::INDEX_MASK);
				86	c->arrays.color.fetch(c, v1->color.v, cp);
				87	}
				88	if(!(v2->flags & vertex_t::LIT)) {
				89	v2->flags \|= vertex_t::LIT;
				90	const GLvoid* cp = c->arrays.color.element(
				91	v2->index & vertex_cache_t::INDEX_MASK);
				92	c->arrays.color.fetch(c, v2->color.v, cp);
				93	}
				94	}
				95
				96	static void lightTriangleDarkFlat(ogles_context_t* c,
				97	vertex_t* v0, vertex_t* v1, vertex_t* v2)
				98	{
				99	if (!(v2->flags & vertex_t::LIT)) {
				100	v2->flags \|= vertex_t::LIT;
				101	const GLvoid* cp = c->arrays.color.element(
				102	v2->index & vertex_cache_t::INDEX_MASK);
				103	c->arrays.color.fetch(c, v2->color.v, cp);
				104	}
				105	// configure the rasterizer here, before we clip
				106	c->rasterizer.procs.color4xv(c, v2->color.v);
				107	}
				108
				109	static void lightTriangleSmooth(ogles_context_t* c,
				110	vertex_t* v0, vertex_t* v1, vertex_t* v2)
				111	{
				112	if (!(v0->flags & vertex_t::LIT))
				113	c->lighting.lightVertex(c, v0);
				114	if (!(v1->flags & vertex_t::LIT))
				115	c->lighting.lightVertex(c, v1);
				116	if(!(v2->flags & vertex_t::LIT))
				117	c->lighting.lightVertex(c, v2);
				118	}
				119
				120	static void lightTriangleFlat(ogles_context_t* c,
				121	vertex_t* v0, vertex_t* v1, vertex_t* v2)
				122	{
				123	if (!(v2->flags & vertex_t::LIT))
				124	c->lighting.lightVertex(c, v2);
				125	// configure the rasterizer here, before we clip
				126	c->rasterizer.procs.color4xv(c, v2->color.v);
				127	}
				128
				129	// The fog versions...
				130
				131	static inline
				132	void lightVertexDarkSmoothFog(ogles_context_t* c, vertex_t* v)
				133	{
				134	if (!(v->flags & vertex_t::LIT)) {
				135	v->flags \|= vertex_t::LIT;
				136	v->fog = c->fog.fog(c, v->eye.z);
				137	const GLvoid* cp = c->arrays.color.element(
				138	v->index & vertex_cache_t::INDEX_MASK);
				139	c->arrays.color.fetch(c, v->color.v, cp);
				140	}
				141	}
				142	static inline
				143	void lightVertexDarkFlatFog(ogles_context_t* c, vertex_t* v)
				144	{
				145	if (!(v->flags & vertex_t::LIT)) {
				146	v->flags \|= vertex_t::LIT;
				147	v->fog = c->fog.fog(c, v->eye.z);
				148	}
				149	}
				150	static inline
				151	void lightVertexSmoothFog(ogles_context_t* c, vertex_t* v)
				152	{
				153	if (!(v->flags & vertex_t::LIT)) {
				154	v->fog = c->fog.fog(c, v->eye.z);
				155	c->lighting.lightVertex(c, v);
				156	}
				157	}
				158
				159	static void lightTriangleDarkSmoothFog(ogles_context_t* c,
				160	vertex_t* v0, vertex_t* v1, vertex_t* v2)
				161	{
				162	lightVertexDarkSmoothFog(c, v0);
				163	lightVertexDarkSmoothFog(c, v1);
				164	lightVertexDarkSmoothFog(c, v2);
				165	}
				166
				167	static void lightTriangleDarkFlatFog(ogles_context_t* c,
				168	vertex_t* v0, vertex_t* v1, vertex_t* v2)
				169	{
				170	lightVertexDarkFlatFog(c, v0);
				171	lightVertexDarkFlatFog(c, v1);
				172	lightVertexDarkSmoothFog(c, v2);
				173	// configure the rasterizer here, before we clip
				174	c->rasterizer.procs.color4xv(c, v2->color.v);
				175	}
				176
				177	static void lightTriangleSmoothFog(ogles_context_t* c,
				178	vertex_t* v0, vertex_t* v1, vertex_t* v2)
				179	{
				180	lightVertexSmoothFog(c, v0);
				181	lightVertexSmoothFog(c, v1);
				182	lightVertexSmoothFog(c, v2);
				183	}
				184
				185	static void lightTriangleFlatFog(ogles_context_t* c,
				186	vertex_t* v0, vertex_t* v1, vertex_t* v2)
				187	{
				188	lightVertexDarkFlatFog(c, v0);
				189	lightVertexDarkFlatFog(c, v1);
				190	lightVertexSmoothFog(c, v2);
				191	// configure the rasterizer here, before we clip
				192	c->rasterizer.procs.color4xv(c, v2->color.v);
				193	}
				194
				195
				196
				197	typedef void (light_primitive_t)(ogles_context_t,
				198	vertex_t, vertex_t, vertex_t*);
				199
				200	// fog 0x4, light 0x2, smooth 0x1
				201	static const light_primitive_t lightPrimitive[8] = {
				202	lightTriangleDarkFlat, // no fog \| dark \| flat
				203	lightTriangleDarkSmooth, // no fog \| dark \| smooth
				204	lightTriangleFlat, // no fog \| light \| flat
				205	lightTriangleSmooth, // no fog \| light \| smooth
				206	lightTriangleDarkFlatFog, // fog \| dark \| flat
				207	lightTriangleDarkSmoothFog, // fog \| dark \| smooth
				208	lightTriangleFlatFog, // fog \| light \| flat
				209	lightTriangleSmoothFog // fog \| light \| smooth
				210	};
				211
				212	void ogles_validate_primitives(ogles_context_t* c)
				213	{
				214	const uint32_t enables = c->rasterizer.state.enables;
				215
				216	// set up the lighting/shading/smoothing/fogging function
				217	int index = enables & GGL_ENABLE_SMOOTH ? 0x1 : 0;
				218	index \|= c->lighting.enable ? 0x2 : 0;
				219	index \|= enables & GGL_ENABLE_FOG ? 0x4 : 0;
				220	c->lighting.lightTriangle = lightPrimitive[index];
				221
				222	// set up the primitive renderers
				223	if (ggl_likely(c->arrays.vertex.enable)) {
				224	c->prims.renderPoint = primitive_point;
				225	c->prims.renderLine = primitive_line;
				226	c->prims.renderTriangle = primitive_clip_triangle;
				227	} else {
				228	c->prims.renderPoint = primitive_nop_point;
				229	c->prims.renderLine = primitive_nop_line;
				230	c->prims.renderTriangle = primitive_nop_triangle;
				231	}
				232	}
				233
				234	// ----------------------------------------------------------------------------
				235
				236	void compute_iterators_t::initTriangle(
				237	vertex_t const* v0, vertex_t const* v1, vertex_t const* v2)
				238	{
				239	m_dx01 = v1->window.x - v0->window.x;
				240	m_dy10 = v0->window.y - v1->window.y;
				241	m_dx20 = v0->window.x - v2->window.x;
				242	m_dy02 = v2->window.y - v0->window.y;
				243	m_area = m_dx01m_dy02 + (-m_dy10)m_dx20;
				244	}
				245
				246	void compute_iterators_t::initLine(
				247	vertex_t const* v0, vertex_t const* v1)
				248	{
				249	m_dx01 = m_dy02 = v1->window.x - v0->window.x;
				250	m_dy10 = m_dx20 = v0->window.y - v1->window.y;
				251	m_area = m_dx01m_dy02 + (-m_dy10)m_dx20;
				252	}
				253
				254	void compute_iterators_t::initLerp(vertex_t const* v0, uint32_t enables)
				255	{
				256	m_x0 = v0->window.x;
				257	m_y0 = v0->window.y;
				258	const GGLcoord area = (m_area + TRI_HALF) >> TRI_FRACTION_BITS;
				259	const GGLcoord minArea = 2; // cannot be inverted
				260	// triangles with an area smaller than 1.0 are not smooth-shaded
				261
				262	int q=0, s=0, d=0;
				263	if (abs(area) >= minArea) {
				264	// Here we do some voodoo magic, to compute a suitable scale
				265	// factor for deltas/area:
				266
				267	// First compute the 1/area with full 32-bits precision,
				268	// gglRecipQNormalized returns a number [-0.5, 0.5[ and an exponent.
				269	d = gglRecipQNormalized(area, &q);
				270
				271	// Then compute the minimum left-shift to not overflow the muls
				272	// below.
				273	s = 32 - gglClz(abs(m_dy02)\|abs(m_dy10)\|abs(m_dx01)\|abs(m_dx20));
				274
				275	// We'll keep 16-bits of precision for deltas/area. So we need
				276	// to shift everything left an extra 15 bits.
				277	s += 15;
				278
				279	// make sure all final shifts are not > 32, because gglMulx
				280	// can't handle it.
				281	if (s < q) s = q;
				282	if (s > 32) {
				283	d >>= 32-s;
				284	s = 32;
				285	}
				286	}
				287
				288	m_dx01 = gglMulx(m_dx01, d, s);
				289	m_dy10 = gglMulx(m_dy10, d, s);
				290	m_dx20 = gglMulx(m_dx20, d, s);
				291	m_dy02 = gglMulx(m_dy02, d, s);
				292	m_area_scale = 32 + q - s;
				293	m_scale = 0;
				294
				295	if (enables & GGL_ENABLE_TMUS) {
				296	const int A = gglClz(abs(m_dy02)\|abs(m_dy10)\|abs(m_dx01)\|abs(m_dx20));
				297	const int B = gglClz(abs(m_x0)\|abs(m_y0));
				298	m_scale = max(0, 32 - (A + 16)) +
				299	max(0, 32 - (B + TRI_FRACTION_BITS)) + 1;
				300	}
				301	}
				302
				303	int compute_iterators_t::iteratorsScale(GGLfixed* it,
				304	int32_t c0, int32_t c1, int32_t c2) const
				305	{
				306	int32_t dc01 = c1 - c0;
				307	int32_t dc02 = c2 - c0;
				308	const int A = gglClz(abs(c0));
				309	const int B = gglClz(abs(dc01)\|abs(dc02));
				310	const int scale = min(A, B - m_scale) - 2;
				311	if (scale >= 0) {
				312	c0 <<= scale;
				313	dc01 <<= scale;
				314	dc02 <<= scale;
				315	} else {
				316	c0 >>= -scale;
				317	dc01 >>= -scale;
				318	dc02 >>= -scale;
				319	}
				320	const int s = m_area_scale;
				321	int32_t dcdx = gglMulAddx(dc01, m_dy02, gglMulx(dc02, m_dy10, s), s);
				322	int32_t dcdy = gglMulAddx(dc02, m_dx01, gglMulx(dc01, m_dx20, s), s);
				323	int32_t c = c0 - (gglMulAddx(dcdx, m_x0,
				324	gglMulx(dcdy, m_y0, TRI_FRACTION_BITS), TRI_FRACTION_BITS));
				325	it[0] = c;
				326	it[1] = dcdx;
				327	it[2] = dcdy;
				328	return scale;
				329	}
				330
				331	void compute_iterators_t::iterators1616(GGLfixed* it,
				332	GGLfixed c0, GGLfixed c1, GGLfixed c2) const
				333	{
				334	const GGLfixed dc01 = c1 - c0;
				335	const GGLfixed dc02 = c2 - c0;
				336	// 16.16 x 16.16 == 32.32 --> 16.16
				337	const int s = m_area_scale;
				338	int32_t dcdx = gglMulAddx(dc01, m_dy02, gglMulx(dc02, m_dy10, s), s);
				339	int32_t dcdy = gglMulAddx(dc02, m_dx01, gglMulx(dc01, m_dx20, s), s);
				340	int32_t c = c0 - (gglMulAddx(dcdx, m_x0,
				341	gglMulx(dcdy, m_y0, TRI_FRACTION_BITS), TRI_FRACTION_BITS));
				342	it[0] = c;
				343	it[1] = dcdx;
				344	it[2] = dcdy;
				345	}
				346
				347	void compute_iterators_t::iterators0032(int64_t* it,
				348	int32_t c0, int32_t c1, int32_t c2) const
				349	{
				350	const int s = m_area_scale - 16;
				351	int32_t dc01 = (c1 - c0)>>s;
				352	int32_t dc02 = (c2 - c0)>>s;
				353	// 16.16 x 16.16 == 32.32
				354	int64_t dcdx = gglMulii(dc01, m_dy02) + gglMulii(dc02, m_dy10);
				355	int64_t dcdy = gglMulii(dc02, m_dx01) + gglMulii(dc01, m_dx20);
				356	it[ 0] = (c0<<16) - ((dcdxm_x0 + dcdym_y0)>>4);
				357	it[ 1] = dcdx;
				358	it[ 2] = dcdy;
				359	}
				360
				361	#if defined(__arm__) && !defined(__thumb__)
				362	inline void compute_iterators_t::iterators0032(int32_t* it,
				363	int32_t c0, int32_t c1, int32_t c2) const
				364	{
				365	::iterators0032(this, it, c0, c1, c2);
				366	}
				367	#else
				368	void compute_iterators_t::iterators0032(int32_t* it,
				369	int32_t c0, int32_t c1, int32_t c2) const
				370	{
				371	int64_t it64[3];
				372	iterators0032(it, c0, c1, c2);
				373	it[0] = it64[0];
				374	it[1] = it64[1];
				375	it[2] = it64[2];
				376	}
				377	#endif
				378
				379	// ----------------------------------------------------------------------------
				380
				381	static inline int32_t clampZ(GLfixed z) CONST;
				382	int32_t clampZ(GLfixed z) {
				383	z = (z & ~(z>>31));
				384	if (z >= 0x10000)
				385	z = 0xFFFF;
				386	return z;
				387	}
				388
				389	static __attribute__((noinline))
				390	void fetch_texcoord_impl(ogles_context_t* c,
				391	vertex_t* v0, vertex_t* v1, vertex_t* v2)
				392	{
				393	vertex_t* const vtx[3] = { v0, v1, v2 };
				394	array_t const * const texcoordArray = c->arrays.texture;
				395
				396	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
				397	if (!(c->rasterizer.state.texture[i].enable))
				398	continue;
				399
				400	for (int j=0 ; j<3 ; j++) {
				401	vertex_t* const v = vtx[j];
				402	if (v->flags & vertex_t::TT)
				403	continue;
				404
				405	// NOTE: here we could compute automatic texgen
				406	// such as sphere/cube maps, instead of fetching them
				407	// from the textcoord array.
				408
				409	vec4_t& coords = v->texture[i];
				410	const GLubyte* tp = texcoordArray[i].element(
				411	v->index & vertex_cache_t::INDEX_MASK);
				412	texcoordArray[i].fetch(c, coords.v, tp);
				413
				414	// transform texture coordinates...
				415	coords.Q = 0x10000;
				416	const transform_t& tr = c->transforms.texture[i].transform;
				417	if (ggl_unlikely(tr.ops)) {
				418	c->arrays.tex_transform[i](&tr, &coords, &coords);
				419	}
				420
				421	// divide by Q
				422	const GGLfixed q = coords.Q;
				423	if (ggl_unlikely(q != 0x10000)) {
				424	const int32_t qinv = gglRecip28(q);
				425	coords.S = gglMulx(coords.S, qinv, 28);
				426	coords.T = gglMulx(coords.T, qinv, 28);
				427	}
				428	}
				429	}
				430	v0->flags \|= vertex_t::TT;
				431	v1->flags \|= vertex_t::TT;
				432	v2->flags \|= vertex_t::TT;
				433	}
				434
				435	inline void fetch_texcoord(ogles_context_t* c,
				436	vertex_t* v0, vertex_t* v1, vertex_t* v2)
				437	{
				438	const uint32_t enables = c->rasterizer.state.enables;
				439	if (!(enables & GGL_ENABLE_TMUS))
				440	return;
				441
				442	// Fetch & transform texture coordinates...
				443	if (ggl_likely(v0->flags & v1->flags & v2->flags & vertex_t::TT)) {
				444	// already done for all three vertices, bail...
				445	return;
				446	}
				447	fetch_texcoord_impl(c, v0, v1, v2);
				448	}
				449
				450	// ----------------------------------------------------------------------------
				451	#if 0
				452	#pragma mark -
				453	#pragma mark Point
				454	#endif
				455
				456	void primitive_nop_point(ogles_context_t, vertex_t) {
				457	}
				458
				459	void primitive_point(ogles_context_t* c, vertex_t* v)
				460	{
				461	// lighting & clamping...
				462	const uint32_t enables = c->rasterizer.state.enables;
				463
				464	if (ggl_unlikely(!(v->flags & vertex_t::LIT))) {
				465	if (c->lighting.enable) {
				466	c->lighting.lightVertex(c, v);
				467	} else {
				468	v->flags \|= vertex_t::LIT;
				469	const GLvoid* cp = c->arrays.color.element(
				470	v->index & vertex_cache_t::INDEX_MASK);
				471	c->arrays.color.fetch(c, v->color.v, cp);
				472	}
				473	if (enables & GGL_ENABLE_FOG) {
				474	v->fog = c->fog.fog(c, v->eye.z);
				475	}
				476	}
				477
				478	// XXX: we don't need to do that each-time
				479	// if color array and lighting not enabled
				480	c->rasterizer.procs.color4xv(c, v->color.v);
				481
				482	// XXX: look into ES point-sprite extension
				483	if (enables & GGL_ENABLE_TMUS) {
				484	fetch_texcoord(c, v,v,v);
				485	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
				486	if (!c->rasterizer.state.texture[i].enable)
				487	continue;
				488	int32_t itt[8];
				489	itt[1] = itt[2] = itt[4] = itt[5] = 0;
				490	itt[6] = itt[7] = 16; // XXX: check that
				491	if (c->rasterizer.state.texture[i].s_wrap == GGL_CLAMP) {
				492	int width = c->textures.tmu[i].texture->surface.width;
				493	itt[0] = v->texture[i].S * width;
				494	itt[6] = 0;
				495	}
				496	if (c->rasterizer.state.texture[i].t_wrap == GGL_CLAMP) {
				497	int height = c->textures.tmu[i].texture->surface.height;
				498	itt[3] = v->texture[i].T * height;
				499	itt[7] = 0;
				500	}
				501	c->rasterizer.procs.texCoordGradScale8xv(c, i, itt);
				502	}
				503	}
				504
				505	if (enables & GGL_ENABLE_DEPTH_TEST) {
				506	int32_t itz[3];
				507	itz[0] = clampZ(v->window.z) * 0x00010001;
				508	itz[1] = itz[2] = 0;
				509	c->rasterizer.procs.zGrad3xv(c, itz);
				510	}
				511
				512	if (enables & GGL_ENABLE_FOG) {
				513	GLfixed itf[3];
				514	itf[0] = v->fog;
				515	itf[1] = itf[2] = 0;
				516	c->rasterizer.procs.fogGrad3xv(c, itf);
				517	}
				518
				519	// Render our point...
				520	c->rasterizer.procs.pointx(c, v->window.v, c->point.size);
				521	}
				522
				523	// ----------------------------------------------------------------------------
				524	#if 0
				525	#pragma mark -
				526	#pragma mark Line
				527	#endif
				528
				529	void primitive_nop_line(ogles_context_t, vertex_t, vertex_t*) {
				530	}
				531
				532	void primitive_line(ogles_context_t* c, vertex_t* v0, vertex_t* v1)
				533	{
				534	// get texture coordinates
				535	fetch_texcoord(c, v0, v1, v1);
				536
				537	// light/shade the vertices first (they're copied below)
				538	c->lighting.lightTriangle(c, v0, v1, v1);
				539
				540	// clip the line if needed
				541	if (ggl_unlikely((v0->flags \| v1->flags) & vertex_t::CLIP_ALL)) {
				542	unsigned int count = clip_line(c, v0, v1);
				543	if (ggl_unlikely(count == 0))
				544	return;
				545	}
				546
				547	// compute iterators...
				548	const uint32_t enables = c->rasterizer.state.enables;
				549	const uint32_t mask = GGL_ENABLE_TMUS \|
				550	GGL_ENABLE_SMOOTH \|
				551	GGL_ENABLE_W \|
				552	GGL_ENABLE_FOG \|
				553	GGL_ENABLE_DEPTH_TEST;
				554
				555	if (ggl_unlikely(enables & mask)) {
				556	c->lerp.initLine(v0, v1);
				557	lerp_triangle(c, v0, v1, v0);
				558	}
				559
				560	// render our line
				561	c->rasterizer.procs.linex(c, v0->window.v, v1->window.v, c->line.width);
				562	}
				563
				564	// ----------------------------------------------------------------------------
				565	#if 0
				566	#pragma mark -
				567	#pragma mark Triangle
				568	#endif
				569
				570	void primitive_nop_triangle(ogles_context_t* c,
				571	vertex_t* v0, vertex_t* v1, vertex_t* v2) {
				572	}
				573
				574	void primitive_clip_triangle(ogles_context_t* c,
				575	vertex_t* v0, vertex_t* v1, vertex_t* v2)
				576	{
				577	uint32_t cc = (v0->flags \| v1->flags \| v2->flags) & vertex_t::CLIP_ALL;
				578	if (ggl_likely(!cc)) {
				579	// code below must be as optimized as possible, this is the
				580	// common code path.
				581
				582	// This triangle is not clipped, test if it's culled
				583	// unclipped triangle...
				584	c->lerp.initTriangle(v0, v1, v2);
				585	if (cull_triangle(c, v0, v1, v2))
				586	return; // culled!
				587
				588	// Fetch all texture coordinates if needed
				589	fetch_texcoord(c, v0, v1, v2);
				590
				591	// light (or shade) our triangle!
				592	c->lighting.lightTriangle(c, v0, v1, v2);
				593
				594	triangle(c, v0, v1, v2);
				595	return;
				596	}
				597
				598	// The assumption here is that we're not going to clip very often,
				599	// and even more rarely will we clip a triangle that ends up
				600	// being culled out. So it's okay to light the vertices here, even though
				601	// in a few cases we won't render the triangle (if culled).
				602
				603	// Fetch texture coordinates...
				604	fetch_texcoord(c, v0, v1, v2);
				605
				606	// light (or shade) our triangle!
				607	c->lighting.lightTriangle(c, v0, v1, v2);
				608
				609	clip_triangle(c, v0, v1, v2);
				610	}
				611
				612	// -----------------------------------------------------------------------
				613
				614	void triangle(ogles_context_t* c,
				615	vertex_t* v0, vertex_t* v1, vertex_t* v2)
				616	{
				617	// compute iterators...
				618	const uint32_t enables = c->rasterizer.state.enables;
				619	const uint32_t mask = GGL_ENABLE_TMUS \|
				620	GGL_ENABLE_SMOOTH \|
				621	GGL_ENABLE_W \|
				622	GGL_ENABLE_FOG \|
				623	GGL_ENABLE_DEPTH_TEST;
				624
				625	if (ggl_likely(enables & mask))
				626	lerp_triangle(c, v0, v1, v2);
				627
				628	c->rasterizer.procs.trianglex(c, v0->window.v, v1->window.v, v2->window.v);
				629	}
				630
				631	void lerp_triangle(ogles_context_t* c,
				632	vertex_t* v0, vertex_t* v1, vertex_t* v2)
				633	{
				634	const uint32_t enables = c->rasterizer.state.enables;
				635	c->lerp.initLerp(v0, enables);
				636
				637	// set up texture iterators
				638	if (enables & GGL_ENABLE_TMUS) {
				639	if (enables & GGL_ENABLE_W) {
				640	lerp_texcoords_w(c, v0, v1, v2);
				641	} else {
				642	lerp_texcoords(c, v0, v1, v2);
				643	}
				644	}
				645
				646	// set up the color iterators
				647	const compute_iterators_t& lerp = c->lerp;
				648	if (enables & GGL_ENABLE_SMOOTH) {
				649	GLfixed itc[12];
				650	for (int i=0 ; i<4 ; i++) {
				651	const GGLcolor c0 = v0->color.v[i] * 255;
				652	const GGLcolor c1 = v1->color.v[i] * 255;
				653	const GGLcolor c2 = v2->color.v[i] * 255;
				654	lerp.iterators1616(&itc[i*3], c0, c1, c2);
				655	}
				656	c->rasterizer.procs.colorGrad12xv(c, itc);
				657	}
				658
				659	if (enables & GGL_ENABLE_DEPTH_TEST) {
				660	int32_t itz[3];
				661	const int32_t v0z = clampZ(v0->window.z);
				662	const int32_t v1z = clampZ(v1->window.z);
				663	const int32_t v2z = clampZ(v2->window.z);
				664	if (ggl_unlikely(c->polygonOffset.enable)) {
				665	const int32_t units = (c->polygonOffset.units << 16);
				666	const GLfixed factor = c->polygonOffset.factor;
				667	if (factor) {
				668	int64_t itz64[3];
				669	lerp.iterators0032(itz64, v0z, v1z, v2z);
				670	int64_t maxDepthSlope = max(itz64[1], itz64[2]);
				671	itz[0] = uint32_t(itz64[0])
				672	+ uint32_t((maxDepthSlope*factor)>>16) + units;
				673	itz[1] = uint32_t(itz64[1]);
				674	itz[2] = uint32_t(itz64[2]);
				675	} else {
				676	lerp.iterators0032(itz, v0z, v1z, v2z);
				677	itz[0] += units;
				678	}
				679	} else {
				680	lerp.iterators0032(itz, v0z, v1z, v2z);
				681	}
				682	c->rasterizer.procs.zGrad3xv(c, itz);
				683	}
				684
				685	if (ggl_unlikely(enables & GGL_ENABLE_FOG)) {
				686	GLfixed itf[3];
				687	lerp.iterators1616(itf, v0->fog, v1->fog, v2->fog);
				688	c->rasterizer.procs.fogGrad3xv(c, itf);
				689	}
				690	}
				691
				692
				693	static inline
				694	int compute_lod(ogles_context_t* c, int i,
				695	int32_t s0, int32_t t0, int32_t s1, int32_t t1, int32_t s2, int32_t t2)
				696	{
				697	// Compute mipmap level / primitive
				698	// rho = sqrt( texelArea / area )
				699	// lod = log2( rho )
				700	// lod = log2( texelArea / area ) / 2
				701	// lod = (log2( texelArea ) - log2( area )) / 2
				702	const compute_iterators_t& lerp = c->lerp;
				703	const GGLcoord area = abs(lerp.area());
				704	const int w = c->textures.tmu[i].texture->surface.width;
				705	const int h = c->textures.tmu[i].texture->surface.height;
				706	const int shift = 16 + (16 - TRI_FRACTION_BITS);
				707	int32_t texelArea = abs( gglMulx(s1-s0, t2-t0, shift) -
				708	gglMulx(s2-s0, t1-t0, shift) )wh;
				709	int log2TArea = (32-TRI_FRACTION_BITS -1) - gglClz(texelArea);
				710	int log2Area = (32-TRI_FRACTION_BITS*2-1) - gglClz(area);
				711	int lod = (log2TArea - log2Area + 1) >> 1;
				712	return lod;
				713	}
				714
				715	void lerp_texcoords(ogles_context_t* c,
				716	vertex_t* v0, vertex_t* v1, vertex_t* v2)
				717	{
				718	const compute_iterators_t& lerp = c->lerp;
				719	int32_t itt[8] __attribute__((aligned(16)));
				720	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
				721	const texture_t& tmu = c->rasterizer.state.texture[i];
				722	if (!tmu.enable)
				723	continue;
				724
				725	// compute the jacobians using block floating-point
				726	int32_t s0 = v0->texture[i].S;
				727	int32_t t0 = v0->texture[i].T;
				728	int32_t s1 = v1->texture[i].S;
				729	int32_t t1 = v1->texture[i].T;
				730	int32_t s2 = v2->texture[i].S;
				731	int32_t t2 = v2->texture[i].T;
				732
				733	const GLenum min_filter = c->textures.tmu[i].texture->min_filter;
				734	if (ggl_unlikely(min_filter >= GL_NEAREST_MIPMAP_NEAREST)) {
				735	int lod = compute_lod(c, i, s0, t0, s1, t1, s2, t2);
				736	c->rasterizer.procs.bindTextureLod(c, i,
				737	&c->textures.tmu[i].texture->mip(lod));
				738	}
				739
				740	// premultiply (s,t) when clampling
				741	if (tmu.s_wrap == GGL_CLAMP) {
				742	const int width = tmu.surface.width;
				743	s0 *= width;
				744	s1 *= width;
				745	s2 *= width;
				746	}
				747	if (tmu.t_wrap == GGL_CLAMP) {
				748	const int height = tmu.surface.height;
				749	t0 *= height;
				750	t1 *= height;
				751	t2 *= height;
				752	}
				753	itt[6] = -lerp.iteratorsScale(itt+0, s0, s1, s2);
				754	itt[7] = -lerp.iteratorsScale(itt+3, t0, t1, t2);
				755	c->rasterizer.procs.texCoordGradScale8xv(c, i, itt);
				756	}
				757	}
				758
				759	void lerp_texcoords_w(ogles_context_t* c,
				760	vertex_t* v0, vertex_t* v1, vertex_t* v2)
				761	{
				762	const compute_iterators_t& lerp = c->lerp;
				763	int32_t itt[8] __attribute__((aligned(16)));
				764	int32_t itw[3];
				765
				766	// compute W's scale to 2.30
				767	int32_t w0 = v0->window.w;
				768	int32_t w1 = v1->window.w;
				769	int32_t w2 = v2->window.w;
				770	int wscale = 32 - gglClz(w0\|w1\|w2);
				771
				772	// compute the jacobian using block floating-point
				773	int sc = lerp.iteratorsScale(itw, w0, w1, w2);
				774	sc += wscale - 16;
				775	c->rasterizer.procs.wGrad3xv(c, itw);
				776
				777	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
				778	const texture_t& tmu = c->rasterizer.state.texture[i];
				779	if (!tmu.enable)
				780	continue;
				781
				782	// compute the jacobians using block floating-point
				783	int32_t s0 = v0->texture[i].S;
				784	int32_t t0 = v0->texture[i].T;
				785	int32_t s1 = v1->texture[i].S;
				786	int32_t t1 = v1->texture[i].T;
				787	int32_t s2 = v2->texture[i].S;
				788	int32_t t2 = v2->texture[i].T;
				789
				790	const GLenum min_filter = c->textures.tmu[i].texture->min_filter;
				791	if (ggl_unlikely(min_filter >= GL_NEAREST_MIPMAP_NEAREST)) {
				792	int lod = compute_lod(c, i, s0, t0, s1, t1, s2, t2);
				793	c->rasterizer.procs.bindTextureLod(c, i,
				794	&c->textures.tmu[i].texture->mip(lod));
				795	}
				796
				797	// premultiply (s,t) when clampling
				798	if (tmu.s_wrap == GGL_CLAMP) {
				799	const int width = tmu.surface.width;
				800	s0 *= width;
				801	s1 *= width;
				802	s2 *= width;
				803	}
				804	if (tmu.t_wrap == GGL_CLAMP) {
				805	const int height = tmu.surface.height;
				806	t0 *= height;
				807	t1 *= height;
				808	t2 *= height;
				809	}
				810
				811	s0 = gglMulx(s0, w0, wscale);
				812	t0 = gglMulx(t0, w0, wscale);
				813	s1 = gglMulx(s1, w1, wscale);
				814	t1 = gglMulx(t1, w1, wscale);
				815	s2 = gglMulx(s2, w2, wscale);
				816	t2 = gglMulx(t2, w2, wscale);
				817
				818	itt[6] = sc - lerp.iteratorsScale(itt+0, s0, s1, s2);
				819	itt[7] = sc - lerp.iteratorsScale(itt+3, t0, t1, t2);
				820	c->rasterizer.procs.texCoordGradScale8xv(c, i, itt);
				821	}
				822	}
				823
				824
				825	static inline
				826	bool cull_triangle(ogles_context_t* c, vertex_t* v0, vertex_t* v1, vertex_t* v2)
				827	{
				828	if (ggl_likely(c->cull.enable)) {
				829	const GLenum winding = (c->lerp.area() > 0) ? GL_CW : GL_CCW;
				830	const GLenum face = (winding == c->cull.frontFace) ? GL_FRONT : GL_BACK;
				831	if (face == c->cull.cullFace)
				832	return true; // culled!
				833	}
				834	return false;
				835	}
				836
				837	static inline
				838	GLfixed frustumPlaneDist(int plane, const vec4_t& s)
				839	{
				840	const GLfixed d = s.v[ plane >> 1 ];
				841	return ((plane & 1) ? (s.w - d) : (s.w + d));
				842	}
				843
				844	static inline
				845	int32_t clipDivide(GLfixed a, GLfixed b) {
				846	// returns a 4.28 fixed-point
				847	return gglMulDivi(1LU<<28, a, b);
				848	}
				849
				850	void clip_triangle(ogles_context_t* c,
				851	vertex_t* v0, vertex_t* v1, vertex_t* v2)
				852	{
				853	uint32_t all_cc = (v0->flags \| v1->flags \| v2->flags) & vertex_t::CLIP_ALL;
				854
				855	vertex_t p0, p1, *p2;
				856	const int MAX_CLIPPING_PLANES = 6 + OGLES_MAX_CLIP_PLANES;
				857	const int MAX_VERTICES = 3;
				858
				859	// Temporary buffer to hold the new vertices. Each plane can add up to
				860	// two new vertices (because the polygon is convex).
				861	// We need one extra element, to handle an overflow case when
				862	// the polygon degenerates into something non convex.
				863	vertex_t buffer[MAX_CLIPPING_PLANES * 2 + 1]; // ~3KB
				864	vertex_t* buf = buffer;
				865
				866	// original list of vertices (polygon to clip, in fact this
				867	// function works with an arbitrary polygon).
				868	vertex_t* in[3] = { v0, v1, v2 };
				869
				870	// output lists (we need 2, which we use back and forth)
				871	// (maximum outpout list's size is MAX_CLIPPING_PLANES + MAX_VERTICES)
				872	// 2 more elements for overflow when non convex polygons.
				873	vertex_t* out[2][MAX_CLIPPING_PLANES + MAX_VERTICES + 2];
				874	unsigned int outi = 0;
				875
				876	// current input list
				877	vertex_t** ivl = in;
				878
				879	// 3 input vertices, 0 in the output list, first plane
				880	unsigned int ic = 3;
				881
				882	// User clip-planes first, the clipping is always done in eye-coordinate
				883	// this is basically the same algorithm than for the view-volume
				884	// clipping, except for the computation of the distance (vertex, plane)
				885	// and the fact that we need to compute the eye-coordinates of each
				886	// new vertex we create.
				887
				888	if (ggl_unlikely(all_cc & vertex_t::USER_CLIP_ALL))
				889	{
				890	unsigned int plane = 0;
				891	uint32_t cc = (all_cc & vertex_t::USER_CLIP_ALL) >> 8;
				892	do {
				893	if (cc & 1) {
				894	// pointers to our output list (head and current)
				895	vertex_t** const ovl = &out[outi][0];
				896	vertex_t** output = ovl;
				897	unsigned int oc = 0;
				898	unsigned int sentinel = 0;
				899	// previous vertex, compute distance to the plane
				900	vertex_t* s = ivl[ic-1];
				901	const vec4_t& equation = c->clipPlanes.plane[plane].equation;
				902	GLfixed sd = dot4(equation.v, s->eye.v);
				903	// clip each vertex against this plane...
				904	for (unsigned int i=0 ; i<ic ; i++) {
				905	vertex_t* p = ivl[i];
				906	const GLfixed pd = dot4(equation.v, p->eye.v);
				907	if (sd >= 0) {
				908	if (pd >= 0) {
				909	// both inside
				910	*output++ = p;
				911	oc++;
				912	} else {
				913	// s inside, p outside (exiting)
				914	const GLfixed t = clipDivide(sd, sd-pd);
				915	c->arrays.clipEye(c, buf, t, p, s);
				916	*output++ = buf++;
				917	oc++;
				918	if (++sentinel >= 3)
				919	return; // non-convex polygon!
				920	}
				921	} else {
				922	if (pd >= 0) {
				923	// s outside (entering)
				924	if (pd) {
				925	const GLfixed t = clipDivide(pd, pd-sd);
				926	c->arrays.clipEye(c, buf, t, s, p);
				927	*output++ = buf++;
				928	oc++;
				929	if (++sentinel >= 3)
				930	return; // non-convex polygon!
				931	}
				932	*output++ = p;
				933	oc++;
				934	} else {
				935	// both outside
				936	}
				937	}
				938	s = p;
				939	sd = pd;
				940	}
				941	// output list become the new input list
				942	if (oc<3)
				943	return; // less than 3 vertices left? we're done!
				944	ivl = ovl;
				945	ic = oc;
				946	outi = 1-outi;
				947	}
				948	cc >>= 1;
				949	plane++;
				950	} while (cc);
				951	}
				952
				953	// frustum clip-planes
				954	if (all_cc & vertex_t::FRUSTUM_CLIP_ALL)
				955	{
				956	unsigned int plane = 0;
				957	uint32_t cc = all_cc & vertex_t::FRUSTUM_CLIP_ALL;
				958	do {
				959	if (cc & 1) {
				960	// pointers to our output list (head and current)
				961	vertex_t** const ovl = &out[outi][0];
				962	vertex_t** output = ovl;
				963	unsigned int oc = 0;
				964	unsigned int sentinel = 0;
				965	// previous vertex, compute distance to the plane
				966	vertex_t* s = ivl[ic-1];
				967	GLfixed sd = frustumPlaneDist(plane, s->clip);
				968	// clip each vertex against this plane...
				969	for (unsigned int i=0 ; i<ic ; i++) {
				970	vertex_t* p = ivl[i];
				971	const GLfixed pd = frustumPlaneDist(plane, p->clip);
				972	if (sd >= 0) {
				973	if (pd >= 0) {
				974	// both inside
				975	*output++ = p;
				976	oc++;
				977	} else {
				978	// s inside, p outside (exiting)
				979	const GLfixed t = clipDivide(sd, sd-pd);
				980	c->arrays.clipVertex(c, buf, t, p, s);
				981	*output++ = buf++;
				982	oc++;
				983	if (++sentinel >= 3)
				984	return; // non-convex polygon!
				985	}
				986	} else {
				987	if (pd >= 0) {
				988	// s outside (entering)
				989	if (pd) {
				990	const GLfixed t = clipDivide(pd, pd-sd);
				991	c->arrays.clipVertex(c, buf, t, s, p);
				992	*output++ = buf++;
				993	oc++;
				994	if (++sentinel >= 3)
				995	return; // non-convex polygon!
				996	}
				997	*output++ = p;
				998	oc++;
				999	} else {
				1000	// both outside
				1001	}
				1002	}
				1003	s = p;
				1004	sd = pd;
				1005	}
				1006	// output list become the new input list
				1007	if (oc<3)
				1008	return; // less than 3 vertices left? we're done!
				1009	ivl = ovl;
				1010	ic = oc;
				1011	outi = 1-outi;
				1012	}
				1013	cc >>= 1;
				1014	plane++;
				1015	} while (cc);
				1016	}
				1017
				1018	// finally we can render our triangles...
				1019	p0 = ivl[0];
				1020	p1 = ivl[1];
				1021	for (unsigned int i=2 ; i<ic ; i++) {
				1022	p2 = ivl[i];
				1023	c->lerp.initTriangle(p0, p1, p2);
				1024	if (cull_triangle(c, p0, p1, p2)) {
				1025	p1 = p2;
				1026	continue; // culled!
				1027	}
				1028	triangle(c, p0, p1, p2);
				1029	p1 = p2;
				1030	}
				1031	}
				1032
				1033	unsigned int clip_line(ogles_context_t* c, vertex_t* s, vertex_t* p)
				1034	{
				1035	const uint32_t all_cc = (s->flags \| p->flags) & vertex_t::CLIP_ALL;
				1036
				1037	if (ggl_unlikely(all_cc & vertex_t::USER_CLIP_ALL))
				1038	{
				1039	unsigned int plane = 0;
				1040	uint32_t cc = (all_cc & vertex_t::USER_CLIP_ALL) >> 8;
				1041	do {
				1042	if (cc & 1) {
				1043	const vec4_t& equation = c->clipPlanes.plane[plane].equation;
				1044	const GLfixed sd = dot4(equation.v, s->eye.v);
				1045	const GLfixed pd = dot4(equation.v, p->eye.v);
				1046	if (sd >= 0) {
				1047	if (pd >= 0) {
				1048	// both inside
				1049	} else {
				1050	// s inside, p outside (exiting)
				1051	const GLfixed t = clipDivide(sd, sd-pd);
				1052	c->arrays.clipEye(c, p, t, p, s);
				1053	}
				1054	} else {
				1055	if (pd >= 0) {
				1056	// s outside (entering)
				1057	if (pd) {
				1058	const GLfixed t = clipDivide(pd, pd-sd);
				1059	c->arrays.clipEye(c, s, t, s, p);
				1060	}
				1061	} else {
				1062	// both outside
				1063	return 0;
				1064	}
				1065	}
				1066	}
				1067	cc >>= 1;
				1068	plane++;
				1069	} while (cc);
				1070	}
				1071
				1072	// frustum clip-planes
				1073	if (all_cc & vertex_t::FRUSTUM_CLIP_ALL)
				1074	{
				1075	unsigned int plane = 0;
				1076	uint32_t cc = all_cc & vertex_t::FRUSTUM_CLIP_ALL;
				1077	do {
				1078	if (cc & 1) {
				1079	const GLfixed sd = frustumPlaneDist(plane, s->clip);
				1080	const GLfixed pd = frustumPlaneDist(plane, p->clip);
				1081	if (sd >= 0) {
				1082	if (pd >= 0) {
				1083	// both inside
				1084	} else {
				1085	// s inside, p outside (exiting)
				1086	const GLfixed t = clipDivide(sd, sd-pd);
				1087	c->arrays.clipVertex(c, p, t, p, s);
				1088	}
				1089	} else {
				1090	if (pd >= 0) {
				1091	// s outside (entering)
				1092	if (pd) {
				1093	const GLfixed t = clipDivide(pd, pd-sd);
				1094	c->arrays.clipVertex(c, s, t, s, p);
				1095	}
				1096	} else {
				1097	// both outside
				1098	return 0;
				1099	}
				1100	}
				1101	}
				1102	cc >>= 1;
				1103	plane++;
				1104	} while (cc);
				1105	}
				1106
				1107	return 2;
				1108	}
				1109
				1110
				1111	}; // namespace android