blob: 90c275e4b285226c282960a90a3075cdb12351a1 [file] [log] [blame]
The Android Open Source Project4f6e8d72008-10-21 07:00:00 -07001/* libs/pixelflinger/codeflinger/GGLAssembler.cpp
2**
3** Copyright 2006, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9** http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#define LOG_TAG "GGLAssembler"
19
20#include <assert.h>
21#include <stdint.h>
22#include <stdlib.h>
23#include <stdio.h>
24#include <sys/types.h>
25#include <cutils/log.h>
26
27#include "codeflinger/GGLAssembler.h"
28
29namespace android {
30
31// ----------------------------------------------------------------------------
32
33GGLAssembler::GGLAssembler(ARMAssemblerInterface* target)
34 : ARMAssemblerProxy(target), RegisterAllocator(), mOptLevel(7)
35{
36}
37
38GGLAssembler::~GGLAssembler()
39{
40}
41
42void GGLAssembler::prolog()
43{
44 ARMAssemblerProxy::prolog();
45}
46
47void GGLAssembler::epilog(uint32_t touched)
48{
49 ARMAssemblerProxy::epilog(touched);
50}
51
52void GGLAssembler::reset(int opt_level)
53{
54 ARMAssemblerProxy::reset();
55 RegisterAllocator::reset();
56 mOptLevel = opt_level;
57}
58
59// ---------------------------------------------------------------------------
60
61int GGLAssembler::scanline(const needs_t& needs, context_t const* c)
62{
63 int err = 0;
64 int opt_level = mOptLevel;
65 while (opt_level >= 0) {
66 reset(opt_level);
67 err = scanline_core(needs, c);
68 if (err == 0)
69 break;
70 opt_level--;
71 }
72
73 // XXX: in theory, pcForLabel is not valid before generate()
74 uint32_t* fragment_start_pc = pcForLabel("fragment_loop");
75 uint32_t* fragment_end_pc = pcForLabel("epilog");
76 const int per_fragment_ops = int(fragment_end_pc - fragment_start_pc);
77
78 // build a name for our pipeline
79 char name[64];
80 sprintf(name,
81 "scanline__%08X:%08X_%08X_%08X [%3d ipp]",
82 needs.p, needs.n, needs.t[0], needs.t[1], per_fragment_ops);
83
84 if (err) {
85 LOGE("Error while generating ""%s""\n", name);
86 disassemble(name);
87 return -1;
88 }
89
90 return generate(name);
91}
92
93int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c)
94{
95 int64_t duration = ggl_system_time();
96
97 mBlendFactorCached = 0;
98 mBlending = 0;
99 mMasking = 0;
100 mAA = GGL_READ_NEEDS(P_AA, needs.p);
101 mDithering = GGL_READ_NEEDS(P_DITHER, needs.p);
102 mAlphaTest = GGL_READ_NEEDS(P_ALPHA_TEST, needs.p) + GGL_NEVER;
103 mDepthTest = GGL_READ_NEEDS(P_DEPTH_TEST, needs.p) + GGL_NEVER;
104 mFog = GGL_READ_NEEDS(P_FOG, needs.p) != 0;
105 mSmooth = GGL_READ_NEEDS(SHADE, needs.n) != 0;
106 mBuilderContext.needs = needs;
107 mBuilderContext.c = c;
108 mBuilderContext.Rctx = reserveReg(R0); // context always in R0
109 mCbFormat = c->formats[ GGL_READ_NEEDS(CB_FORMAT, needs.n) ];
110
111 // ------------------------------------------------------------------------
112
113 decodeLogicOpNeeds(needs);
114
115 decodeTMUNeeds(needs, c);
116
117 mBlendSrc = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRC, needs.n));
118 mBlendDst = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DST, needs.n));
119 mBlendSrcA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRCA, needs.n));
120 mBlendDstA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DSTA, needs.n));
121
122 if (!mCbFormat.c[GGLFormat::ALPHA].h) {
123 if ((mBlendSrc == GGL_ONE_MINUS_DST_ALPHA) ||
124 (mBlendSrc == GGL_DST_ALPHA)) {
125 mBlendSrc = GGL_ONE;
126 }
127 if ((mBlendSrcA == GGL_ONE_MINUS_DST_ALPHA) ||
128 (mBlendSrcA == GGL_DST_ALPHA)) {
129 mBlendSrcA = GGL_ONE;
130 }
131 if ((mBlendDst == GGL_ONE_MINUS_DST_ALPHA) ||
132 (mBlendDst == GGL_DST_ALPHA)) {
133 mBlendDst = GGL_ONE;
134 }
135 if ((mBlendDstA == GGL_ONE_MINUS_DST_ALPHA) ||
136 (mBlendDstA == GGL_DST_ALPHA)) {
137 mBlendDstA = GGL_ONE;
138 }
139 }
140
141 // if we need the framebuffer, read it now
142 const int blending = blending_codes(mBlendSrc, mBlendDst) |
143 blending_codes(mBlendSrcA, mBlendDstA);
144
145 // XXX: handle special cases, destination not modified...
146 if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
147 (mBlendDst==GGL_ONE) && (mBlendDstA==GGL_ONE)) {
148 // Destination unmodified (beware of logic ops)
149 } else if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
150 (mBlendDst==GGL_ZERO) && (mBlendDstA==GGL_ZERO)) {
151 // Destination is zero (beware of logic ops)
152 }
153
154 const int masking = GGL_READ_NEEDS(MASK_ARGB, needs.n);
155 for (int i=0 ; i<4 ; i++) {
156 const int mask = 1<<i;
157 component_info_t& info = mInfo[i];
158 int fs = i==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
159 int fd = i==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
160 if (fs==GGL_SRC_ALPHA_SATURATE && i==GGLFormat::ALPHA)
161 fs = GGL_ONE;
162 info.masked = !!(masking & mask);
163 info.inDest = !info.masked && mCbFormat.c[i].h &&
164 ((mLogicOp & LOGIC_OP_SRC) || (!mLogicOp));
165 if (mCbFormat.components >= GGL_LUMINANCE &&
166 (i==GGLFormat::GREEN || i==GGLFormat::BLUE)) {
167 info.inDest = false;
168 }
169 info.needed = (i==GGLFormat::ALPHA) &&
170 (isAlphaSourceNeeded() || mAlphaTest != GGL_ALWAYS);
171 info.replaced = !!(mTextureMachine.replaced & mask);
172 info.iterated = (!info.replaced && (info.inDest || info.needed));
173 info.smooth = mSmooth && info.iterated;
174 info.fog = mFog && info.inDest && (i != GGLFormat::ALPHA);
175 info.blend = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
176
177 mBlending |= (info.blend ? mask : 0);
178 mMasking |= (mCbFormat.c[i].h && info.masked) ? mask : 0;
179 }
180
181
182 fragment_parts_t parts;
183
184 // ------------------------------------------------------------------------
185 prolog();
186 // ------------------------------------------------------------------------
187
188 build_scanline_prolog(parts, needs);
189
190 if (registerFile().status())
191 return registerFile().status();
192
193 // ------------------------------------------------------------------------
194 label("fragment_loop");
195 // ------------------------------------------------------------------------
196 {
197 Scratch regs(registerFile());
198
199 if (mDithering) {
200 // update the dither index.
201 MOV(AL, 0, parts.count.reg,
202 reg_imm(parts.count.reg, ROR, GGL_DITHER_ORDER_SHIFT));
203 ADD(AL, 0, parts.count.reg, parts.count.reg,
204 imm( 1 << (32 - GGL_DITHER_ORDER_SHIFT)));
205 MOV(AL, 0, parts.count.reg,
206 reg_imm(parts.count.reg, ROR, 32 - GGL_DITHER_ORDER_SHIFT));
207 }
208
209 // XXX: could we do an early alpha-test here in some cases?
210 // It would probaly be used only with smooth-alpha and no texture
211 // (or no alpha component in the texture).
212
213 // Early z-test
214 if (mAlphaTest==GGL_ALWAYS) {
215 build_depth_test(parts, Z_TEST|Z_WRITE);
216 } else {
217 // we cannot do the z-write here, because
218 // it might be killed by the alpha-test later
219 build_depth_test(parts, Z_TEST);
220 }
221
222 { // texture coordinates
223 Scratch scratches(registerFile());
224
225 // texel generation
226 build_textures(parts, regs);
227 }
228
229 if ((blending & (FACTOR_DST|BLEND_DST)) || mMasking ||
230 (mLogicOp & LOGIC_OP_DST)) {
231 // blending / logic_op / masking need the framebuffer
232 mDstPixel.setTo(regs.obtain(), &mCbFormat);
233
234 // load the framebuffer pixel
235 comment("fetch color-buffer");
236 load(parts.cbPtr, mDstPixel);
237 }
238
239 if (registerFile().status())
240 return registerFile().status();
241
242 pixel_t pixel;
243 int directTex = mTextureMachine.directTexture;
244 if (directTex | parts.packed) {
245 // note: we can't have both here
246 // iterated color or direct texture
247 pixel = directTex ? parts.texel[directTex-1] : parts.iterated;
248 pixel.flags &= ~CORRUPTIBLE;
249 } else {
250 if (mDithering) {
251 const int ctxtReg = mBuilderContext.Rctx;
252 const int mask = GGL_DITHER_SIZE-1;
253 parts.dither = reg_t(regs.obtain());
254 AND(AL, 0, parts.dither.reg, parts.count.reg, imm(mask));
255 ADD(AL, 0, parts.dither.reg, parts.dither.reg, ctxtReg);
256 LDRB(AL, parts.dither.reg, parts.dither.reg,
257 immed12_pre(GGL_OFFSETOF(ditherMatrix)));
258 }
259
260 // allocate a register for the resulting pixel
261 pixel.setTo(regs.obtain(), &mCbFormat, FIRST);
262
263 build_component(pixel, parts, GGLFormat::ALPHA, regs);
264
265 if (mAlphaTest!=GGL_ALWAYS) {
266 // only handle the z-write part here. We know z-test
267 // was successful, as well as alpha-test.
268 build_depth_test(parts, Z_WRITE);
269 }
270
271 build_component(pixel, parts, GGLFormat::RED, regs);
272 build_component(pixel, parts, GGLFormat::GREEN, regs);
273 build_component(pixel, parts, GGLFormat::BLUE, regs);
274
275 pixel.flags |= CORRUPTIBLE;
276 }
277
278 if (registerFile().status())
279 return registerFile().status();
280
281 if (pixel.reg == -1) {
282 // be defensive here. if we're here it's probably
283 // that this whole fragment is a no-op.
284 pixel = mDstPixel;
285 }
286
287 // logic operation
288 build_logic_op(pixel, regs);
289
290 // masking
291 build_masking(pixel, regs);
292
293 comment("store");
294 store(parts.cbPtr, pixel, WRITE_BACK);
295 }
296
297 if (registerFile().status())
298 return registerFile().status();
299
300 // update the iterated color...
301 if (parts.reload != 3) {
302 build_smooth_shade(parts);
303 }
304
305 // update iterated z
306 build_iterate_z(parts);
307
308 // update iterated fog
309 build_iterate_f(parts);
310
311 SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
312 B(PL, "fragment_loop");
313 label("epilog");
314 epilog(registerFile().touched());
315
316 if ((mAlphaTest!=GGL_ALWAYS) || (mDepthTest!=GGL_ALWAYS)) {
317 if (mDepthTest!=GGL_ALWAYS) {
318 label("discard_before_textures");
319 build_iterate_texture_coordinates(parts);
320 }
321 label("discard_after_textures");
322 build_smooth_shade(parts);
323 build_iterate_z(parts);
324 build_iterate_f(parts);
325 ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3));
326 SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
327 B(PL, "fragment_loop");
328 epilog(registerFile().touched());
329 }
330
331 return registerFile().status();
332}
333
334// ---------------------------------------------------------------------------
335
336void GGLAssembler::build_scanline_prolog(
337 fragment_parts_t& parts, const needs_t& needs)
338{
339 Scratch scratches(registerFile());
340 int Rctx = mBuilderContext.Rctx;
341
342 // compute count
343 comment("compute ct (# of pixels to process)");
344 parts.count.setTo(obtainReg());
345 int Rx = scratches.obtain();
346 int Ry = scratches.obtain();
347 CONTEXT_LOAD(Rx, iterators.xl);
348 CONTEXT_LOAD(parts.count.reg, iterators.xr);
349 CONTEXT_LOAD(Ry, iterators.y);
350
351 // parts.count = iterators.xr - Rx
352 SUB(AL, 0, parts.count.reg, parts.count.reg, Rx);
353 SUB(AL, 0, parts.count.reg, parts.count.reg, imm(1));
354
355 if (mDithering) {
356 // parts.count.reg = 0xNNNNXXDD
357 // NNNN = count-1
358 // DD = dither offset
359 // XX = 0xxxxxxx (x = garbage)
360 Scratch scratches(registerFile());
361 int tx = scratches.obtain();
362 int ty = scratches.obtain();
363 AND(AL, 0, tx, Rx, imm(GGL_DITHER_MASK));
364 AND(AL, 0, ty, Ry, imm(GGL_DITHER_MASK));
365 ADD(AL, 0, tx, tx, reg_imm(ty, LSL, GGL_DITHER_ORDER_SHIFT));
366 ORR(AL, 0, parts.count.reg, tx, reg_imm(parts.count.reg, LSL, 16));
367 } else {
368 // parts.count.reg = 0xNNNN0000
369 // NNNN = count-1
370 MOV(AL, 0, parts.count.reg, reg_imm(parts.count.reg, LSL, 16));
371 }
372
373 // compute dst ptr
374 comment("compute color-buffer pointer");
375 const int cb_bits = mCbFormat.size*8;
376 int Rs = scratches.obtain();
377 parts.cbPtr.setTo(obtainReg(), cb_bits);
378 CONTEXT_LOAD(Rs, state.buffers.color.stride);
379 CONTEXT_LOAD(parts.cbPtr.reg, state.buffers.color.data);
380 SMLABB(AL, Rs, Ry, Rs, Rx); // Rs = Rx + Ry*Rs
381 base_offset(parts.cbPtr, parts.cbPtr, Rs);
382 scratches.recycle(Rs);
383
384 // init fog
385 const int need_fog = GGL_READ_NEEDS(P_FOG, needs.p);
386 if (need_fog) {
387 comment("compute initial fog coordinate");
388 Scratch scratches(registerFile());
389 int dfdx = scratches.obtain();
390 int ydfdy = scratches.obtain();
391 int f = ydfdy;
392 CONTEXT_LOAD(dfdx, generated_vars.dfdx);
393 CONTEXT_LOAD(ydfdy, iterators.ydfdy);
394 MLA(AL, 0, f, Rx, dfdx, ydfdy);
395 CONTEXT_STORE(f, generated_vars.f);
396 }
397
398 // init Z coordinate
399 if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
400 parts.z = reg_t(obtainReg());
401 comment("compute initial Z coordinate");
402 Scratch scratches(registerFile());
403 int dzdx = scratches.obtain();
404 int ydzdy = parts.z.reg;
405 CONTEXT_LOAD(dzdx, generated_vars.dzdx); // 1.31 fixed-point
406 CONTEXT_LOAD(ydzdy, iterators.ydzdy); // 1.31 fixed-point
407 MLA(AL, 0, parts.z.reg, Rx, dzdx, ydzdy);
408
409 // we're going to index zbase of parts.count
410 // zbase = base + (xl-count + stride*y)*2
411 int Rs = dzdx;
412 int zbase = scratches.obtain();
413 CONTEXT_LOAD(Rs, state.buffers.depth.stride);
414 CONTEXT_LOAD(zbase, state.buffers.depth.data);
415 SMLABB(AL, Rs, Ry, Rs, Rx);
416 ADD(AL, 0, Rs, Rs, reg_imm(parts.count.reg, LSR, 16));
417 ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1));
418 CONTEXT_STORE(zbase, generated_vars.zbase);
419 }
420
421 // init texture coordinates
422 init_textures(parts.coords, reg_t(Rx), reg_t(Ry));
423 scratches.recycle(Ry);
424
425 // iterated color
426 init_iterated_color(parts, reg_t(Rx));
427
428 // init coverage factor application (anti-aliasing)
429 if (mAA) {
430 parts.covPtr.setTo(obtainReg(), 16);
431 CONTEXT_LOAD(parts.covPtr.reg, state.buffers.coverage);
432 ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1));
433 }
434}
435
436// ---------------------------------------------------------------------------
437
438void GGLAssembler::build_component( pixel_t& pixel,
439 const fragment_parts_t& parts,
440 int component,
441 Scratch& regs)
442{
443 static char const * comments[] = {"alpha", "red", "green", "blue"};
444 comment(comments[component]);
445
446 // local register file
447 Scratch scratches(registerFile());
448 const int dst_component_size = pixel.component_size(component);
449
450 component_t temp(-1);
451 build_incoming_component( temp, dst_component_size,
452 parts, component, scratches, regs);
453
454 if (mInfo[component].inDest) {
455
456 // blending...
457 build_blending( temp, mDstPixel, component, scratches );
458
459 // downshift component and rebuild pixel...
460 downshift(pixel, component, temp, parts.dither);
461 }
462}
463
464void GGLAssembler::build_incoming_component(
465 component_t& temp,
466 int dst_size,
467 const fragment_parts_t& parts,
468 int component,
469 Scratch& scratches,
470 Scratch& global_regs)
471{
472 const uint32_t component_mask = 1<<component;
473
474 // Figure out what we need for the blending stage...
475 int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
476 int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
477 if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) {
478 fs = GGL_ONE;
479 }
480
481 // Figure out what we need to extract and for what reason
482 const int blending = blending_codes(fs, fd);
483
484 // Are we actually going to blend?
485 const int need_blending = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
486
487 // expand the source if the destination has more bits
488 int need_expander = false;
489 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT-1 ; i++) {
490 texture_unit_t& tmu = mTextureMachine.tmu[i];
491 if ((tmu.format_idx) &&
492 (parts.texel[i].component_size(component) < dst_size)) {
493 need_expander = true;
494 }
495 }
496
497 // do we need to extract this component?
498 const bool multiTexture = mTextureMachine.activeUnits > 1;
499 const int blend_needs_alpha_source = (component==GGLFormat::ALPHA) &&
500 (isAlphaSourceNeeded());
501 int need_extract = mInfo[component].needed;
502 if (mInfo[component].inDest)
503 {
504 need_extract |= ((need_blending ?
505 (blending & (BLEND_SRC|FACTOR_SRC)) : need_expander));
506 need_extract |= (mTextureMachine.mask != mTextureMachine.replaced);
507 need_extract |= mInfo[component].smooth;
508 need_extract |= mInfo[component].fog;
509 need_extract |= mDithering;
510 need_extract |= multiTexture;
511 }
512
513 if (need_extract) {
514 Scratch& regs = blend_needs_alpha_source ? global_regs : scratches;
515 component_t fragment;
516
517 // iterated color
518 build_iterated_color(fragment, parts, component, regs);
519
520 // texture environement (decal, modulate, replace)
521 build_texture_environment(fragment, parts, component, regs);
522
523 // expand the source if the destination has more bits
524 if (need_expander && (fragment.size() < dst_size)) {
525 // we're here only if we fetched a texel
526 // (so we know for sure fragment is CORRUPTIBLE)
527 expand(fragment, fragment, dst_size);
528 }
529
530 // We have a few specific things to do for the alpha-channel
531 if ((component==GGLFormat::ALPHA) &&
532 (mInfo[component].needed || fragment.size()<dst_size))
533 {
534 // convert to integer_t first and make sure
535 // we don't corrupt a needed register
536 if (fragment.l) {
537 component_t incoming(fragment);
538 modify(fragment, regs);
539 MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSR, incoming.l));
540 fragment.h -= fragment.l;
541 fragment.l = 0;
542 }
543
544 // coverage factor application
545 build_coverage_application(fragment, parts, regs);
546
547 // alpha-test
548 build_alpha_test(fragment, parts);
549
550 if (blend_needs_alpha_source) {
551 // We keep only 8 bits for the blending stage
552 const int shift = fragment.h <= 8 ? 0 : fragment.h-8;
553 if (fragment.flags & CORRUPTIBLE) {
554 fragment.flags &= ~CORRUPTIBLE;
555 mAlphaSource.setTo(fragment.reg,
556 fragment.size(), fragment.flags);
557 if (shift) {
558 MOV(AL, 0, mAlphaSource.reg,
559 reg_imm(mAlphaSource.reg, LSR, shift));
560 }
561 } else {
562 // XXX: it would better to do this in build_blend_factor()
563 // so we can avoid the extra MOV below.
564 mAlphaSource.setTo(regs.obtain(),
565 fragment.size(), CORRUPTIBLE);
566 if (shift) {
567 MOV(AL, 0, mAlphaSource.reg,
568 reg_imm(fragment.reg, LSR, shift));
569 } else {
570 MOV(AL, 0, mAlphaSource.reg, fragment.reg);
571 }
572 }
573 mAlphaSource.s -= shift;
574 }
575 }
576
577 // fog...
578 build_fog( fragment, component, regs );
579
580 temp = fragment;
581 } else {
582 if (mInfo[component].inDest) {
583 // extraction not needed and replace
584 // we just select the right component
585 if ((mTextureMachine.replaced & component_mask) == 0) {
586 // component wasn't replaced, so use it!
587 temp = component_t(parts.iterated, component);
588 }
589 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
590 const texture_unit_t& tmu = mTextureMachine.tmu[i];
591 if ((tmu.mask & component_mask) &&
592 ((tmu.replaced & component_mask) == 0)) {
593 temp = component_t(parts.texel[i], component);
594 }
595 }
596 }
597 }
598}
599
600bool GGLAssembler::isAlphaSourceNeeded() const
601{
602 // XXX: also needed for alpha-test
603 const int bs = mBlendSrc;
604 const int bd = mBlendDst;
605 return bs==GGL_SRC_ALPHA_SATURATE ||
606 bs==GGL_SRC_ALPHA || bs==GGL_ONE_MINUS_SRC_ALPHA ||
607 bd==GGL_SRC_ALPHA || bd==GGL_ONE_MINUS_SRC_ALPHA ;
608}
609
610// ---------------------------------------------------------------------------
611
612void GGLAssembler::build_smooth_shade(const fragment_parts_t& parts)
613{
614 if (mSmooth && !parts.iterated_packed) {
615 // update the iterated color in a pipelined way...
616 comment("update iterated color");
617 Scratch scratches(registerFile());
618
619 const int reload = parts.reload;
620 for (int i=0 ; i<4 ; i++) {
621 if (!mInfo[i].iterated)
622 continue;
623
624 int c = parts.argb[i].reg;
625 int dx = parts.argb_dx[i].reg;
626
627 if (reload & 1) {
628 c = scratches.obtain();
629 CONTEXT_LOAD(c, generated_vars.argb[i].c);
630 }
631 if (reload & 2) {
632 dx = scratches.obtain();
633 CONTEXT_LOAD(dx, generated_vars.argb[i].dx);
634 }
635
636 if (mSmooth) {
637 ADD(AL, 0, c, c, dx);
638 }
639
640 if (reload & 1) {
641 CONTEXT_STORE(c, generated_vars.argb[i].c);
642 scratches.recycle(c);
643 }
644 if (reload & 2) {
645 scratches.recycle(dx);
646 }
647 }
648 }
649}
650
651// ---------------------------------------------------------------------------
652
653void GGLAssembler::build_coverage_application(component_t& fragment,
654 const fragment_parts_t& parts, Scratch& regs)
655{
656 // here fragment.l is guarenteed to be 0
657 if (mAA) {
658 // coverages are 1.15 fixed-point numbers
659 comment("coverage application");
660
661 component_t incoming(fragment);
662 modify(fragment, regs);
663
664 Scratch scratches(registerFile());
665 int cf = scratches.obtain();
666 LDRH(AL, cf, parts.covPtr.reg, immed8_post(2));
667 if (fragment.h > 31) {
668 fragment.h--;
669 SMULWB(AL, fragment.reg, incoming.reg, cf);
670 } else {
671 MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSL, 1));
672 SMULWB(AL, fragment.reg, fragment.reg, cf);
673 }
674 }
675}
676
677// ---------------------------------------------------------------------------
678
679void GGLAssembler::build_alpha_test(component_t& fragment,
680 const fragment_parts_t& parts)
681{
682 if (mAlphaTest != GGL_ALWAYS) {
683 comment("Alpha Test");
684 Scratch scratches(registerFile());
685 int ref = scratches.obtain();
686 const int shift = GGL_COLOR_BITS-fragment.size();
687 CONTEXT_LOAD(ref, state.alpha_test.ref);
688 if (shift) CMP(AL, fragment.reg, reg_imm(ref, LSR, shift));
689 else CMP(AL, fragment.reg, ref);
690 int cc = NV;
691 switch (mAlphaTest) {
692 case GGL_NEVER: cc = NV; break;
693 case GGL_LESS: cc = LT; break;
694 case GGL_EQUAL: cc = EQ; break;
695 case GGL_LEQUAL: cc = LS; break;
696 case GGL_GREATER: cc = HI; break;
697 case GGL_NOTEQUAL: cc = NE; break;
698 case GGL_GEQUAL: cc = HS; break;
699 }
700 B(cc^1, "discard_after_textures");
701 }
702}
703
704// ---------------------------------------------------------------------------
705
706void GGLAssembler::build_depth_test(
707 const fragment_parts_t& parts, uint32_t mask)
708{
709 mask &= Z_TEST|Z_WRITE;
710 const needs_t& needs = mBuilderContext.needs;
711 const int zmask = GGL_READ_NEEDS(P_MASK_Z, needs.p);
712 Scratch scratches(registerFile());
713
714 if (mDepthTest != GGL_ALWAYS || zmask) {
715 int cc=AL, ic=AL;
716 switch (mDepthTest) {
717 case GGL_LESS: ic = HI; break;
718 case GGL_EQUAL: ic = EQ; break;
719 case GGL_LEQUAL: ic = HS; break;
720 case GGL_GREATER: ic = LT; break;
721 case GGL_NOTEQUAL: ic = NE; break;
722 case GGL_GEQUAL: ic = LS; break;
723 case GGL_NEVER:
724 // this never happens, because it's taken care of when
725 // computing the needs. but we keep it for completness.
726 comment("Depth Test (NEVER)");
727 B(AL, "discard_before_textures");
728 return;
729 case GGL_ALWAYS:
730 // we're here because zmask is enabled
731 mask &= ~Z_TEST; // test always passes.
732 break;
733 }
734
735 // inverse the condition
736 cc = ic^1;
737
738 if ((mask & Z_WRITE) && !zmask) {
739 mask &= ~Z_WRITE;
740 }
741
742 if (!mask)
743 return;
744
745 comment("Depth Test");
746
747 int zbase = scratches.obtain();
748 int depth = scratches.obtain();
749 int z = parts.z.reg;
750
751 CONTEXT_LOAD(zbase, generated_vars.zbase); // stall
752 SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15));
753 // above does zbase = zbase + ((count >> 16) << 1)
754
755 if (mask & Z_TEST) {
756 LDRH(AL, depth, zbase); // stall
757 CMP(AL, depth, reg_imm(z, LSR, 16));
758 B(cc, "discard_before_textures");
759 }
760 if (mask & Z_WRITE) {
761 if (mask == Z_WRITE) {
762 // only z-write asked, cc is meaningless
763 ic = AL;
764 }
765 MOV(AL, 0, depth, reg_imm(z, LSR, 16));
766 STRH(ic, depth, zbase);
767 }
768 }
769}
770
771void GGLAssembler::build_iterate_z(const fragment_parts_t& parts)
772{
773 const needs_t& needs = mBuilderContext.needs;
774 if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
775 Scratch scratches(registerFile());
776 int dzdx = scratches.obtain();
777 CONTEXT_LOAD(dzdx, generated_vars.dzdx); // stall
778 ADD(AL, 0, parts.z.reg, parts.z.reg, dzdx);
779 }
780}
781
782void GGLAssembler::build_iterate_f(const fragment_parts_t& parts)
783{
784 const needs_t& needs = mBuilderContext.needs;
785 if (GGL_READ_NEEDS(P_FOG, needs.p)) {
786 Scratch scratches(registerFile());
787 int dfdx = scratches.obtain();
788 int f = scratches.obtain();
789 CONTEXT_LOAD(f, generated_vars.f);
790 CONTEXT_LOAD(dfdx, generated_vars.dfdx); // stall
791 ADD(AL, 0, f, f, dfdx);
792 CONTEXT_STORE(f, generated_vars.f);
793 }
794}
795
796// ---------------------------------------------------------------------------
797
798void GGLAssembler::build_logic_op(pixel_t& pixel, Scratch& regs)
799{
800 const needs_t& needs = mBuilderContext.needs;
801 const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
802 if (opcode == GGL_COPY)
803 return;
804
805 comment("logic operation");
806
807 pixel_t s(pixel);
808 if (!(pixel.flags & CORRUPTIBLE)) {
809 pixel.reg = regs.obtain();
810 pixel.flags |= CORRUPTIBLE;
811 }
812
813 pixel_t d(mDstPixel);
814 switch(opcode) {
815 case GGL_CLEAR: MOV(AL, 0, pixel.reg, imm(0)); break;
816 case GGL_AND: AND(AL, 0, pixel.reg, s.reg, d.reg); break;
817 case GGL_AND_REVERSE: BIC(AL, 0, pixel.reg, s.reg, d.reg); break;
818 case GGL_COPY: break;
819 case GGL_AND_INVERTED: BIC(AL, 0, pixel.reg, d.reg, s.reg); break;
820 case GGL_NOOP: MOV(AL, 0, pixel.reg, d.reg); break;
821 case GGL_XOR: EOR(AL, 0, pixel.reg, s.reg, d.reg); break;
822 case GGL_OR: ORR(AL, 0, pixel.reg, s.reg, d.reg); break;
823 case GGL_NOR: ORR(AL, 0, pixel.reg, s.reg, d.reg);
824 MVN(AL, 0, pixel.reg, pixel.reg); break;
825 case GGL_EQUIV: EOR(AL, 0, pixel.reg, s.reg, d.reg);
826 MVN(AL, 0, pixel.reg, pixel.reg); break;
827 case GGL_INVERT: MVN(AL, 0, pixel.reg, d.reg); break;
828 case GGL_OR_REVERSE: // s | ~d == ~(~s & d)
829 BIC(AL, 0, pixel.reg, d.reg, s.reg);
830 MVN(AL, 0, pixel.reg, pixel.reg); break;
831 case GGL_COPY_INVERTED: MVN(AL, 0, pixel.reg, s.reg); break;
832 case GGL_OR_INVERTED: // ~s | d == ~(s & ~d)
833 BIC(AL, 0, pixel.reg, s.reg, d.reg);
834 MVN(AL, 0, pixel.reg, pixel.reg); break;
835 case GGL_NAND: AND(AL, 0, pixel.reg, s.reg, d.reg);
836 MVN(AL, 0, pixel.reg, pixel.reg); break;
837 case GGL_SET: MVN(AL, 0, pixel.reg, imm(0)); break;
838 };
839}
840
841// ---------------------------------------------------------------------------
842
843static uint32_t find_bottom(uint32_t val)
844{
845 uint32_t i = 0;
846 while (!(val & (3<<i)))
847 i+= 2;
848 return i;
849}
850
851static void normalize(uint32_t& val, uint32_t& rot)
852{
853 rot = 0;
854 while (!(val&3) || (val & 0xFC000000)) {
855 uint32_t newval;
856 newval = val >> 2;
857 newval |= (val&3) << 30;
858 val = newval;
859 rot += 2;
860 if (rot == 32) {
861 rot = 0;
862 break;
863 }
864 }
865}
866
867void GGLAssembler::build_and_immediate(int d, int s, uint32_t mask, int bits)
868{
869 uint32_t rot;
870 uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1;
871 mask &= size;
872
873 if (mask == size) {
874 if (d != s)
875 MOV( AL, 0, d, s);
876 return;
877 }
878
879 int negative_logic = !isValidImmediate(mask);
880 if (negative_logic) {
881 mask = ~mask & size;
882 }
883 normalize(mask, rot);
884
885 if (mask) {
886 while (mask) {
887 uint32_t bitpos = find_bottom(mask);
888 int shift = rot + bitpos;
889 uint32_t m = mask & (0xff << bitpos);
890 mask &= ~m;
891 m >>= bitpos;
892 int32_t newMask = (m<<shift) | (m>>(32-shift));
893 if (!negative_logic) {
894 AND( AL, 0, d, s, imm(newMask) );
895 } else {
896 BIC( AL, 0, d, s, imm(newMask) );
897 }
898 s = d;
899 }
900 } else {
901 MOV( AL, 0, d, imm(0));
902 }
903}
904
905void GGLAssembler::build_masking(pixel_t& pixel, Scratch& regs)
906{
907 if (!mMasking)
908 return;
909
910 comment("color mask");
911
912 pixel_t fb(mDstPixel);
913 pixel_t s(pixel);
914 if (!(pixel.flags & CORRUPTIBLE)) {
915 pixel.reg = regs.obtain();
916 pixel.flags |= CORRUPTIBLE;
917 }
918
919 int mask = 0;
920 for (int i=0 ; i<4 ; i++) {
921 const int component_mask = 1<<i;
922 const int h = fb.format.c[i].h;
923 const int l = fb.format.c[i].l;
924 if (h && (!(mMasking & component_mask))) {
925 mask |= ((1<<(h-l))-1) << l;
926 }
927 }
928
929 // There is no need to clear the masked components of the source
930 // (unless we applied a logic op), because they're already zeroed
931 // by contruction (masked components are not computed)
932
933 if (mLogicOp) {
934 const needs_t& needs = mBuilderContext.needs;
935 const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
936 if (opcode != GGL_CLEAR) {
937 // clear masked component of source
938 build_and_immediate(pixel.reg, s.reg, mask, fb.size());
939 s = pixel;
940 }
941 }
942
943 // clear non masked components of destination
944 build_and_immediate(fb.reg, fb.reg, ~mask, fb.size());
945
946 // or back the channels that were masked
947 if (s.reg == fb.reg) {
948 // this is in fact a MOV
949 if (s.reg == pixel.reg) {
950 // ugh. this in in fact a nop
951 } else {
952 MOV(AL, 0, pixel.reg, fb.reg);
953 }
954 } else {
955 ORR(AL, 0, pixel.reg, s.reg, fb.reg);
956 }
957}
958
959// ---------------------------------------------------------------------------
960
961void GGLAssembler::base_offset(
962 const pointer_t& d, const pointer_t& b, const reg_t& o)
963{
964 switch (b.size) {
965 case 32:
966 ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2));
967 break;
968 case 24:
969 if (d.reg == b.reg) {
970 ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
971 ADD(AL, 0, d.reg, d.reg, o.reg);
972 } else {
973 ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1));
974 ADD(AL, 0, d.reg, d.reg, b.reg);
975 }
976 break;
977 case 16:
978 ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
979 break;
980 case 8:
981 ADD(AL, 0, d.reg, b.reg, o.reg);
982 break;
983 }
984}
985
986// ----------------------------------------------------------------------------
987// cheezy register allocator...
988// ----------------------------------------------------------------------------
989
990void RegisterAllocator::reset()
991{
992 mRegs.reset();
993}
994
995int RegisterAllocator::reserveReg(int reg)
996{
997 return mRegs.reserve(reg);
998}
999
1000int RegisterAllocator::obtainReg()
1001{
1002 return mRegs.obtain();
1003}
1004
1005void RegisterAllocator::recycleReg(int reg)
1006{
1007 mRegs.recycle(reg);
1008}
1009
1010RegisterAllocator::RegisterFile& RegisterAllocator::registerFile()
1011{
1012 return mRegs;
1013}
1014
1015// ----------------------------------------------------------------------------
1016
1017RegisterAllocator::RegisterFile::RegisterFile()
1018 : mRegs(0), mTouched(0), mStatus(0)
1019{
1020 reserve(ARMAssemblerInterface::SP);
1021 reserve(ARMAssemblerInterface::PC);
1022}
1023
1024RegisterAllocator::RegisterFile::RegisterFile(const RegisterFile& rhs)
1025 : mRegs(rhs.mRegs), mTouched(rhs.mTouched)
1026{
1027}
1028
1029RegisterAllocator::RegisterFile::~RegisterFile()
1030{
1031}
1032
1033bool RegisterAllocator::RegisterFile::operator == (const RegisterFile& rhs) const
1034{
1035 return (mRegs == rhs.mRegs);
1036}
1037
1038void RegisterAllocator::RegisterFile::reset()
1039{
1040 mRegs = mTouched = mStatus = 0;
1041 reserve(ARMAssemblerInterface::SP);
1042 reserve(ARMAssemblerInterface::PC);
1043}
1044
1045int RegisterAllocator::RegisterFile::reserve(int reg)
1046{
1047 LOG_ALWAYS_FATAL_IF(isUsed(reg),
1048 "reserving register %d, but already in use",
1049 reg);
1050 mRegs |= (1<<reg);
1051 mTouched |= mRegs;
1052 return reg;
1053}
1054
1055void RegisterAllocator::RegisterFile::reserveSeveral(uint32_t regMask)
1056{
1057 mRegs |= regMask;
1058 mTouched |= regMask;
1059}
1060
1061int RegisterAllocator::RegisterFile::isUsed(int reg) const
1062{
1063 LOG_ALWAYS_FATAL_IF(reg>=16, "invalid register %d", reg);
1064 return mRegs & (1<<reg);
1065}
1066
1067int RegisterAllocator::RegisterFile::obtain()
1068{
1069 const char priorityList[14] = { 0, 1, 2, 3,
1070 12, 14, 4, 5,
1071 6, 7, 8, 9,
1072 10, 11 };
1073 const int nbreg = sizeof(priorityList);
1074 int i, r;
1075 for (i=0 ; i<nbreg ; i++) {
1076 r = priorityList[i];
1077 if (!isUsed(r)) {
1078 break;
1079 }
1080 }
1081 // this is not an error anymore because, we'll try again with
1082 // a lower optimization level.
1083 //LOGE_IF(i >= nbreg, "pixelflinger ran out of registers\n");
1084 if (i >= nbreg) {
1085 mStatus |= OUT_OF_REGISTERS;
1086 // we return SP so we can more easily debug things
1087 // the code will never be run anyway.
1088 return ARMAssemblerInterface::SP;
1089 }
1090 reserve(r);
1091 return r;
1092}
1093
1094bool RegisterAllocator::RegisterFile::hasFreeRegs() const
1095{
1096 return ((mRegs & 0xFFFF) == 0xFFFF) ? false : true;
1097}
1098
1099int RegisterAllocator::RegisterFile::countFreeRegs() const
1100{
1101 int f = ~mRegs & 0xFFFF;
1102 // now count number of 1
1103 f = (f & 0x5555) + ((f>>1) & 0x5555);
1104 f = (f & 0x3333) + ((f>>2) & 0x3333);
1105 f = (f & 0x0F0F) + ((f>>4) & 0x0F0F);
1106 f = (f & 0x00FF) + ((f>>8) & 0x00FF);
1107 return f;
1108}
1109
1110void RegisterAllocator::RegisterFile::recycle(int reg)
1111{
1112 LOG_FATAL_IF(!isUsed(reg),
1113 "recycling unallocated register %d",
1114 reg);
1115 mRegs &= ~(1<<reg);
1116}
1117
1118void RegisterAllocator::RegisterFile::recycleSeveral(uint32_t regMask)
1119{
1120 LOG_FATAL_IF((mRegs & regMask)!=regMask,
1121 "recycling unallocated registers "
1122 "(recycle=%08x, allocated=%08x, unallocated=%08x)",
1123 regMask, mRegs, mRegs&regMask);
1124 mRegs &= ~regMask;
1125}
1126
1127uint32_t RegisterAllocator::RegisterFile::touched() const
1128{
1129 return mTouched;
1130}
1131
1132// ----------------------------------------------------------------------------
1133
1134}; // namespace android
1135