| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 1 | /* libs/pixelflinger/codeflinger/texturing.cpp | 
 | 2 | ** | 
 | 3 | ** Copyright 2006, The Android Open Source Project | 
 | 4 | ** | 
| Mark Salyzyn | 66ce3e0 | 2016-09-28 10:07:20 -0700 | [diff] [blame] | 5 | ** Licensed under the Apache License, Version 2.0 (the "License"); | 
 | 6 | ** you may not use this file except in compliance with the License. | 
 | 7 | ** You may obtain a copy of the License at | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 8 | ** | 
| Mark Salyzyn | 66ce3e0 | 2016-09-28 10:07:20 -0700 | [diff] [blame] | 9 | **     http://www.apache.org/licenses/LICENSE-2.0 | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 10 | ** | 
| Mark Salyzyn | 66ce3e0 | 2016-09-28 10:07:20 -0700 | [diff] [blame] | 11 | ** Unless required by applicable law or agreed to in writing, software | 
 | 12 | ** distributed under the License is distributed on an "AS IS" BASIS, | 
 | 13 | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
 | 14 | ** See the License for the specific language governing permissions and | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 15 | ** limitations under the License. | 
 | 16 | */ | 
 | 17 |  | 
| Mark Salyzyn | cfd5b08 | 2016-10-17 14:28:00 -0700 | [diff] [blame] | 18 | #define LOG_TAG "pixelflinger-code" | 
 | 19 |  | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 20 | #include <assert.h> | 
 | 21 | #include <stdint.h> | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 22 | #include <stdio.h> | 
| Mark Salyzyn | 66ce3e0 | 2016-09-28 10:07:20 -0700 | [diff] [blame] | 23 | #include <stdlib.h> | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 24 | #include <sys/types.h> | 
 | 25 |  | 
| Mark Salyzyn | 30f991f | 2017-01-10 13:19:54 -0800 | [diff] [blame] | 26 | #include <log/log.h> | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 27 |  | 
| Mathias Agopian | 9857d99 | 2013-04-01 15:17:55 -0700 | [diff] [blame] | 28 | #include "GGLAssembler.h" | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 29 |  | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 30 | namespace android { | 
 | 31 |  | 
 | 32 | // --------------------------------------------------------------------------- | 
 | 33 |  | 
 | 34 | // iterators are initialized like this: | 
 | 35 | // (intToFixedCenter(x) * dx)>>16 + x0 | 
 | 36 | // ((x<<16 + 0x8000) * dx)>>16 + x0 | 
 | 37 | // ((x<<16)*dx + (0x8000*dx))>>16 + x0 | 
 | 38 | // ( (x*dx) + dx>>1 ) + x0 | 
 | 39 | // (x*dx) + (dx>>1 + x0) | 
 | 40 |  | 
 | 41 | void GGLAssembler::init_iterated_color(fragment_parts_t& parts, const reg_t& x) | 
 | 42 | { | 
 | 43 |     context_t const* c = mBuilderContext.c; | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 44 |  | 
 | 45 |     if (mSmooth) { | 
 | 46 |         // NOTE: we could take this case in the mDithering + !mSmooth case, | 
 | 47 |         // but this would use up to 4 more registers for the color components | 
 | 48 |         // for only a little added quality. | 
 | 49 |         // Currently, this causes the system to run out of registers in | 
 | 50 |         // some case (see issue #719496) | 
 | 51 |  | 
 | 52 |         comment("compute initial iterated color (smooth and/or dither case)"); | 
 | 53 |  | 
 | 54 |         parts.iterated_packed = 0; | 
 | 55 |         parts.packed = 0; | 
 | 56 |  | 
 | 57 |         // 0x1: color component | 
 | 58 |         // 0x2: iterators | 
 | 59 |         const int optReload = mOptLevel >> 1; | 
 | 60 |         if (optReload >= 3)         parts.reload = 0; // reload nothing | 
 | 61 |         else if (optReload == 2)    parts.reload = 2; // reload iterators | 
 | 62 |         else if (optReload == 1)    parts.reload = 1; // reload colors | 
 | 63 |         else if (optReload <= 0)    parts.reload = 3; // reload both | 
 | 64 |  | 
 | 65 |         if (!mSmooth) { | 
 | 66 |             // we're not smoothing (just dithering), we never have to  | 
 | 67 |             // reload the iterators | 
 | 68 |             parts.reload &= ~2; | 
 | 69 |         } | 
 | 70 |  | 
 | 71 |         Scratch scratches(registerFile()); | 
 | 72 |         const int t0 = (parts.reload & 1) ? scratches.obtain() : 0; | 
 | 73 |         const int t1 = (parts.reload & 2) ? scratches.obtain() : 0; | 
 | 74 |         for (int i=0 ; i<4 ; i++) { | 
 | 75 |             if (!mInfo[i].iterated) | 
 | 76 |                 continue;             | 
 | 77 |              | 
 | 78 |             // this component exists in the destination and is not replaced | 
 | 79 |             // by a texture unit. | 
 | 80 |             const int c = (parts.reload & 1) ? t0 : obtainReg();               | 
 | 81 |             if (i==0) CONTEXT_LOAD(c, iterators.ydady); | 
 | 82 |             if (i==1) CONTEXT_LOAD(c, iterators.ydrdy); | 
 | 83 |             if (i==2) CONTEXT_LOAD(c, iterators.ydgdy); | 
 | 84 |             if (i==3) CONTEXT_LOAD(c, iterators.ydbdy); | 
 | 85 |             parts.argb[i].reg = c; | 
 | 86 |  | 
 | 87 |             if (mInfo[i].smooth) { | 
 | 88 |                 parts.argb_dx[i].reg = (parts.reload & 2) ? t1 : obtainReg(); | 
 | 89 |                 const int dvdx = parts.argb_dx[i].reg; | 
 | 90 |                 CONTEXT_LOAD(dvdx, generated_vars.argb[i].dx); | 
 | 91 |                 MLA(AL, 0, c, x.reg, dvdx, c); | 
 | 92 |                  | 
 | 93 |                 // adjust the color iterator to make sure it won't overflow | 
 | 94 |                 if (!mAA) { | 
 | 95 |                     // this is not needed when we're using anti-aliasing | 
 | 96 |                     // because we will (have to) clamp the components | 
 | 97 |                     // anyway. | 
 | 98 |                     int end = scratches.obtain(); | 
 | 99 |                     MOV(AL, 0, end, reg_imm(parts.count.reg, LSR, 16)); | 
 | 100 |                     MLA(AL, 1, end, dvdx, end, c); | 
 | 101 |                     SUB(MI, 0, c, c, end); | 
 | 102 |                     BIC(AL, 0, c, c, reg_imm(c, ASR, 31));  | 
 | 103 |                     scratches.recycle(end); | 
 | 104 |                 } | 
 | 105 |             } | 
 | 106 |              | 
 | 107 |             if (parts.reload & 1) { | 
 | 108 |                 CONTEXT_STORE(c, generated_vars.argb[i].c); | 
 | 109 |             } | 
 | 110 |         } | 
 | 111 |     } else { | 
 | 112 |         // We're not smoothed, so we can  | 
 | 113 |         // just use a packed version of the color and extract the | 
 | 114 |         // components as needed (or not at all if we don't blend) | 
 | 115 |  | 
 | 116 |         // figure out if we need the iterated color | 
 | 117 |         int load = 0; | 
 | 118 |         for (int i=0 ; i<4 ; i++) { | 
 | 119 |             component_info_t& info = mInfo[i]; | 
 | 120 |             if ((info.inDest || info.needed) && !info.replaced) | 
 | 121 |                 load |= 1; | 
 | 122 |         } | 
 | 123 |          | 
 | 124 |         parts.iterated_packed = 1; | 
 | 125 |         parts.packed = (!mTextureMachine.mask && !mBlending | 
 | 126 |                 && !mFog && !mDithering); | 
 | 127 |         parts.reload = 0; | 
 | 128 |         if (load || parts.packed) { | 
 | 129 |             if (mBlending || mDithering || mInfo[GGLFormat::ALPHA].needed) { | 
 | 130 |                 comment("load initial iterated color (8888 packed)"); | 
 | 131 |                 parts.iterated.setTo(obtainReg(), | 
 | 132 |                         &(c->formats[GGL_PIXEL_FORMAT_RGBA_8888])); | 
 | 133 |                 CONTEXT_LOAD(parts.iterated.reg, packed8888); | 
 | 134 |             } else { | 
 | 135 |                 comment("load initial iterated color (dest format packed)"); | 
 | 136 |  | 
 | 137 |                 parts.iterated.setTo(obtainReg(), &mCbFormat); | 
 | 138 |  | 
 | 139 |                 // pre-mask the iterated color | 
 | 140 |                 const int bits = parts.iterated.size(); | 
 | 141 |                 const uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1; | 
 | 142 |                 uint32_t mask = 0; | 
 | 143 |                 if (mMasking) { | 
 | 144 |                     for (int i=0 ; i<4 ; i++) { | 
 | 145 |                         const int component_mask = 1<<i; | 
 | 146 |                         const int h = parts.iterated.format.c[i].h; | 
 | 147 |                         const int l = parts.iterated.format.c[i].l; | 
 | 148 |                         if (h && (!(mMasking & component_mask))) { | 
 | 149 |                             mask |= ((1<<(h-l))-1) << l; | 
 | 150 |                         } | 
 | 151 |                     } | 
 | 152 |                 } | 
 | 153 |  | 
 | 154 |                 if (mMasking && ((mask & size)==0)) { | 
 | 155 |                     // none of the components are present in the mask | 
 | 156 |                 } else { | 
 | 157 |                     CONTEXT_LOAD(parts.iterated.reg, packed); | 
 | 158 |                     if (mCbFormat.size == 1) { | 
 | 159 |                         AND(AL, 0, parts.iterated.reg, | 
 | 160 |                                 parts.iterated.reg, imm(0xFF)); | 
 | 161 |                     } else if (mCbFormat.size == 2) { | 
 | 162 |                         MOV(AL, 0, parts.iterated.reg, | 
 | 163 |                                 reg_imm(parts.iterated.reg, LSR, 16)); | 
 | 164 |                     } | 
 | 165 |                 } | 
 | 166 |  | 
 | 167 |                 // pre-mask the iterated color | 
 | 168 |                 if (mMasking) { | 
 | 169 |                     build_and_immediate(parts.iterated.reg, parts.iterated.reg, | 
 | 170 |                             mask, bits); | 
 | 171 |                 } | 
 | 172 |             } | 
 | 173 |         } | 
 | 174 |     } | 
 | 175 | } | 
 | 176 |  | 
 | 177 | void GGLAssembler::build_iterated_color( | 
 | 178 |         component_t& fragment, | 
 | 179 |         const fragment_parts_t& parts, | 
 | 180 |         int component, | 
 | 181 |         Scratch& regs) | 
 | 182 | { | 
 | 183 |     fragment.setTo( regs.obtain(), 0, 32, CORRUPTIBLE);  | 
 | 184 |  | 
 | 185 |     if (!mInfo[component].iterated) | 
 | 186 |         return; | 
 | 187 |  | 
 | 188 |     if (parts.iterated_packed) { | 
 | 189 |         // iterated colors are packed, extract the one we need | 
 | 190 |         extract(fragment, parts.iterated, component); | 
 | 191 |     } else { | 
 | 192 |         fragment.h = GGL_COLOR_BITS; | 
 | 193 |         fragment.l = GGL_COLOR_BITS - 8; | 
 | 194 |         fragment.flags |= CLEAR_LO; | 
 | 195 |         // iterated colors are held in their own register, | 
 | 196 |         // (smooth and/or dithering case) | 
 | 197 |         if (parts.reload==3) { | 
 | 198 |             // this implies mSmooth | 
 | 199 |             Scratch scratches(registerFile()); | 
 | 200 |             int dx = scratches.obtain(); | 
 | 201 |             CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c); | 
 | 202 |             CONTEXT_LOAD(dx, generated_vars.argb[component].dx); | 
 | 203 |             ADD(AL, 0, dx, fragment.reg, dx); | 
 | 204 |             CONTEXT_STORE(dx, generated_vars.argb[component].c); | 
 | 205 |         } else if (parts.reload & 1) { | 
 | 206 |             CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c); | 
 | 207 |         } else { | 
 | 208 |             // we don't reload, so simply rename the register and mark as | 
 | 209 |             // non CORRUPTIBLE so that the texture env or blending code | 
 | 210 |             // won't modify this (renamed) register | 
 | 211 |             regs.recycle(fragment.reg); | 
 | 212 |             fragment.reg = parts.argb[component].reg; | 
 | 213 |             fragment.flags &= ~CORRUPTIBLE; | 
 | 214 |         } | 
 | 215 |         if (mInfo[component].smooth && mAA) { | 
 | 216 |             // when using smooth shading AND anti-aliasing, we need to clamp | 
 | 217 |             // the iterators because there is always an extra pixel on the | 
 | 218 |             // edges, which most of the time will cause an overflow | 
 | 219 |             // (since technically its outside of the domain). | 
 | 220 |             BIC(AL, 0, fragment.reg, fragment.reg, | 
 | 221 |                     reg_imm(fragment.reg, ASR, 31)); | 
 | 222 |             component_sat(fragment); | 
 | 223 |         } | 
 | 224 |     } | 
 | 225 | } | 
 | 226 |  | 
 | 227 | // --------------------------------------------------------------------------- | 
 | 228 |  | 
 | 229 | void GGLAssembler::decodeLogicOpNeeds(const needs_t& needs) | 
 | 230 | { | 
 | 231 |     // gather some informations about the components we need to process... | 
 | 232 |     const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR; | 
 | 233 |     switch(opcode) { | 
 | 234 |     case GGL_COPY: | 
 | 235 |         mLogicOp = 0; | 
 | 236 |         break; | 
 | 237 |     case GGL_CLEAR: | 
 | 238 |     case GGL_SET: | 
 | 239 |         mLogicOp = LOGIC_OP; | 
 | 240 |         break; | 
 | 241 |     case GGL_AND: | 
 | 242 |     case GGL_AND_REVERSE: | 
 | 243 |     case GGL_AND_INVERTED: | 
 | 244 |     case GGL_XOR: | 
 | 245 |     case GGL_OR: | 
 | 246 |     case GGL_NOR: | 
 | 247 |     case GGL_EQUIV: | 
 | 248 |     case GGL_OR_REVERSE: | 
 | 249 |     case GGL_OR_INVERTED: | 
 | 250 |     case GGL_NAND: | 
 | 251 |         mLogicOp = LOGIC_OP|LOGIC_OP_SRC|LOGIC_OP_DST; | 
 | 252 |         break; | 
 | 253 |     case GGL_NOOP: | 
 | 254 |     case GGL_INVERT: | 
 | 255 |         mLogicOp = LOGIC_OP|LOGIC_OP_DST; | 
 | 256 |         break;         | 
 | 257 |     case GGL_COPY_INVERTED: | 
 | 258 |         mLogicOp = LOGIC_OP|LOGIC_OP_SRC; | 
 | 259 |         break; | 
 | 260 |     };         | 
 | 261 | } | 
 | 262 |  | 
 | 263 | void GGLAssembler::decodeTMUNeeds(const needs_t& needs, context_t const* c) | 
 | 264 | { | 
 | 265 |     uint8_t replaced=0; | 
 | 266 |     mTextureMachine.mask = 0; | 
 | 267 |     mTextureMachine.activeUnits = 0; | 
 | 268 |     for (int i=GGL_TEXTURE_UNIT_COUNT-1 ; i>=0 ; i--) { | 
 | 269 |         texture_unit_t& tmu = mTextureMachine.tmu[i]; | 
 | 270 |         if (replaced == 0xF) { | 
 | 271 |             // all components are replaced, skip this TMU. | 
 | 272 |             tmu.format_idx = 0; | 
 | 273 |             tmu.mask = 0; | 
 | 274 |             tmu.replaced = replaced; | 
 | 275 |             continue; | 
 | 276 |         } | 
 | 277 |         tmu.format_idx = GGL_READ_NEEDS(T_FORMAT, needs.t[i]); | 
 | 278 |         tmu.format = c->formats[tmu.format_idx]; | 
 | 279 |         tmu.bits = tmu.format.size*8; | 
 | 280 |         tmu.swrap = GGL_READ_NEEDS(T_S_WRAP, needs.t[i]); | 
 | 281 |         tmu.twrap = GGL_READ_NEEDS(T_T_WRAP, needs.t[i]); | 
 | 282 |         tmu.env = ggl_needs_to_env(GGL_READ_NEEDS(T_ENV, needs.t[i])); | 
 | 283 |         tmu.pot = GGL_READ_NEEDS(T_POT, needs.t[i]); | 
 | 284 |         tmu.linear = GGL_READ_NEEDS(T_LINEAR, needs.t[i]) | 
 | 285 |                 && tmu.format.size!=3; // XXX: only 8, 16 and 32 modes for now | 
 | 286 |  | 
 | 287 |         // 5551 linear filtering is not supported | 
 | 288 |         if (tmu.format_idx == GGL_PIXEL_FORMAT_RGBA_5551) | 
 | 289 |             tmu.linear = 0; | 
 | 290 |          | 
 | 291 |         tmu.mask = 0; | 
 | 292 |         tmu.replaced = replaced; | 
 | 293 |  | 
 | 294 |         if (tmu.format_idx) { | 
 | 295 |             mTextureMachine.activeUnits++; | 
 | 296 |             if (tmu.format.c[0].h)    tmu.mask |= 0x1; | 
 | 297 |             if (tmu.format.c[1].h)    tmu.mask |= 0x2; | 
 | 298 |             if (tmu.format.c[2].h)    tmu.mask |= 0x4; | 
 | 299 |             if (tmu.format.c[3].h)    tmu.mask |= 0x8; | 
 | 300 |             if (tmu.env == GGL_REPLACE) { | 
 | 301 |                 replaced |= tmu.mask; | 
 | 302 |             } else if (tmu.env == GGL_DECAL) { | 
 | 303 |                 if (!tmu.format.c[GGLFormat::ALPHA].h) { | 
 | 304 |                     // if we don't have alpha, decal does nothing | 
 | 305 |                     tmu.mask = 0; | 
 | 306 |                 } else { | 
 | 307 |                     // decal always ignores At | 
 | 308 |                     tmu.mask &= ~(1<<GGLFormat::ALPHA); | 
 | 309 |                 } | 
 | 310 |             } | 
 | 311 |         } | 
 | 312 |         mTextureMachine.mask |= tmu.mask; | 
 | 313 |         //printf("%d: mask=%08lx, replaced=%08lx\n", | 
 | 314 |         //    i, int(tmu.mask), int(tmu.replaced)); | 
 | 315 |     } | 
 | 316 |     mTextureMachine.replaced = replaced; | 
 | 317 |     mTextureMachine.directTexture = 0; | 
 | 318 |     //printf("replaced=%08lx\n", mTextureMachine.replaced); | 
 | 319 | } | 
 | 320 |  | 
 | 321 |  | 
 | 322 | void GGLAssembler::init_textures( | 
 | 323 |         tex_coord_t* coords, | 
 | 324 |         const reg_t& x, const reg_t& y) | 
 | 325 | { | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 326 |     const needs_t& needs = mBuilderContext.needs; | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 327 |     int Rx = x.reg; | 
 | 328 |     int Ry = y.reg; | 
 | 329 |  | 
 | 330 |     if (mTextureMachine.mask) { | 
 | 331 |         comment("compute texture coordinates"); | 
 | 332 |     } | 
 | 333 |  | 
 | 334 |     // init texture coordinates for each tmu | 
 | 335 |     const int cb_format_idx = GGL_READ_NEEDS(CB_FORMAT, needs.n); | 
 | 336 |     const bool multiTexture = mTextureMachine.activeUnits > 1; | 
 | 337 |     for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { | 
 | 338 |         const texture_unit_t& tmu = mTextureMachine.tmu[i]; | 
 | 339 |         if (tmu.format_idx == 0) | 
 | 340 |             continue; | 
 | 341 |         if ((tmu.swrap == GGL_NEEDS_WRAP_11) && | 
 | 342 |             (tmu.twrap == GGL_NEEDS_WRAP_11))  | 
 | 343 |         { | 
 | 344 |             // 1:1 texture | 
 | 345 |             pointer_t& txPtr = coords[i].ptr; | 
 | 346 |             txPtr.setTo(obtainReg(), tmu.bits); | 
 | 347 |             CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydsdy); | 
 | 348 |             ADD(AL, 0, Rx, Rx, reg_imm(txPtr.reg, ASR, 16));    // x += (s>>16) | 
 | 349 |             CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydtdy); | 
 | 350 |             ADD(AL, 0, Ry, Ry, reg_imm(txPtr.reg, ASR, 16));    // y += (t>>16) | 
 | 351 |             // merge base & offset | 
 | 352 |             CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].stride); | 
 | 353 |             SMLABB(AL, Rx, Ry, txPtr.reg, Rx);               // x+y*stride | 
| Ashok Bhat | bfc6dc4 | 2013-02-21 10:27:40 +0000 | [diff] [blame] | 354 |             CONTEXT_ADDR_LOAD(txPtr.reg, generated_vars.texture[i].data); | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 355 |             base_offset(txPtr, txPtr, Rx); | 
 | 356 |         } else { | 
 | 357 |             Scratch scratches(registerFile()); | 
 | 358 |             reg_t& s = coords[i].s; | 
 | 359 |             reg_t& t = coords[i].t; | 
 | 360 |             // s = (x * dsdx)>>16 + ydsdy | 
 | 361 |             // s = (x * dsdx)>>16 + (y*dsdy)>>16 + s0 | 
 | 362 |             // t = (x * dtdx)>>16 + ydtdy | 
 | 363 |             // t = (x * dtdx)>>16 + (y*dtdy)>>16 + t0 | 
 | 364 |             s.setTo(obtainReg()); | 
 | 365 |             t.setTo(obtainReg()); | 
 | 366 |             const int need_w = GGL_READ_NEEDS(W, needs.n); | 
 | 367 |             if (need_w) { | 
 | 368 |                 CONTEXT_LOAD(s.reg, state.texture[i].iterators.ydsdy); | 
 | 369 |                 CONTEXT_LOAD(t.reg, state.texture[i].iterators.ydtdy); | 
 | 370 |             } else { | 
 | 371 |                 int ydsdy = scratches.obtain(); | 
 | 372 |                 int ydtdy = scratches.obtain(); | 
 | 373 |                 CONTEXT_LOAD(s.reg, generated_vars.texture[i].dsdx); | 
 | 374 |                 CONTEXT_LOAD(ydsdy, state.texture[i].iterators.ydsdy); | 
 | 375 |                 CONTEXT_LOAD(t.reg, generated_vars.texture[i].dtdx); | 
 | 376 |                 CONTEXT_LOAD(ydtdy, state.texture[i].iterators.ydtdy); | 
 | 377 |                 MLA(AL, 0, s.reg, Rx, s.reg, ydsdy); | 
 | 378 |                 MLA(AL, 0, t.reg, Rx, t.reg, ydtdy); | 
 | 379 |             } | 
 | 380 |              | 
 | 381 |             if ((mOptLevel&1)==0) { | 
 | 382 |                 CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]); | 
 | 383 |                 CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]); | 
 | 384 |                 recycleReg(s.reg); | 
 | 385 |                 recycleReg(t.reg); | 
 | 386 |             } | 
 | 387 |         } | 
 | 388 |  | 
 | 389 |         // direct texture? | 
 | 390 |         if (!multiTexture && !mBlending && !mDithering && !mFog &&  | 
 | 391 |             cb_format_idx == tmu.format_idx && !tmu.linear && | 
 | 392 |             mTextureMachine.replaced == tmu.mask)  | 
 | 393 |         { | 
 | 394 |                 mTextureMachine.directTexture = i + 1;  | 
 | 395 |         } | 
 | 396 |     } | 
 | 397 | } | 
 | 398 |  | 
 | 399 | void GGLAssembler::build_textures(  fragment_parts_t& parts, | 
 | 400 |                                     Scratch& regs) | 
 | 401 | { | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 402 |     // We don't have a way to spill registers automatically | 
 | 403 |     // spill depth and AA regs, when we know we may have to. | 
 | 404 |     // build the spill list... | 
 | 405 |     uint32_t spill_list = 0; | 
 | 406 |     for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { | 
 | 407 |         const texture_unit_t& tmu = mTextureMachine.tmu[i]; | 
 | 408 |         if (tmu.format_idx == 0) | 
 | 409 |             continue; | 
 | 410 |         if (tmu.linear) { | 
 | 411 |             // we may run out of register if we have linear filtering | 
 | 412 |             // at 1 or 4 bytes / pixel on any texture unit. | 
 | 413 |             if (tmu.format.size == 1) { | 
 | 414 |                 // if depth and AA enabled, we'll run out of 1 register | 
 | 415 |                 if (parts.z.reg > 0 && parts.covPtr.reg > 0) | 
 | 416 |                     spill_list |= 1<<parts.covPtr.reg; | 
 | 417 |             } | 
 | 418 |             if (tmu.format.size == 4) { | 
 | 419 |                 // if depth or AA enabled, we'll run out of 1 or 2 registers | 
 | 420 |                 if (parts.z.reg > 0) | 
 | 421 |                     spill_list |= 1<<parts.z.reg; | 
 | 422 |                 if (parts.covPtr.reg > 0)    | 
 | 423 |                     spill_list |= 1<<parts.covPtr.reg; | 
 | 424 |             } | 
 | 425 |         } | 
 | 426 |     } | 
 | 427 |  | 
 | 428 |     Spill spill(registerFile(), *this, spill_list); | 
 | 429 |  | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 430 |     for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { | 
 | 431 |         const texture_unit_t& tmu = mTextureMachine.tmu[i]; | 
 | 432 |         if (tmu.format_idx == 0) | 
 | 433 |             continue; | 
 | 434 |  | 
 | 435 |         pointer_t& txPtr = parts.coords[i].ptr; | 
 | 436 |         pixel_t& texel = parts.texel[i]; | 
| Chih-Hung Hsieh | c7f344e | 2017-10-26 11:19:43 -0700 | [diff] [blame] | 437 |  | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 438 |         // repeat... | 
 | 439 |         if ((tmu.swrap == GGL_NEEDS_WRAP_11) && | 
 | 440 |             (tmu.twrap == GGL_NEEDS_WRAP_11)) | 
 | 441 |         { // 1:1 textures | 
 | 442 |             comment("fetch texel"); | 
 | 443 |             texel.setTo(regs.obtain(), &tmu.format); | 
 | 444 |             load(txPtr, texel, WRITE_BACK); | 
 | 445 |         } else { | 
 | 446 |             Scratch scratches(registerFile()); | 
 | 447 |             reg_t& s = parts.coords[i].s; | 
 | 448 |             reg_t& t = parts.coords[i].t; | 
 | 449 |             if ((mOptLevel&1)==0) { | 
 | 450 |                 comment("reload s/t (multitexture or linear filtering)"); | 
 | 451 |                 s.reg = scratches.obtain(); | 
 | 452 |                 t.reg = scratches.obtain(); | 
 | 453 |                 CONTEXT_LOAD(s.reg, generated_vars.texture[i].spill[0]); | 
 | 454 |                 CONTEXT_LOAD(t.reg, generated_vars.texture[i].spill[1]); | 
 | 455 |             } | 
 | 456 |  | 
| Paul Lind | 2bc2b79 | 2012-02-01 10:54:19 -0800 | [diff] [blame] | 457 |             if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS) | 
 | 458 |                 return; | 
 | 459 |  | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 460 |             comment("compute repeat/clamp"); | 
 | 461 |             int u       = scratches.obtain(); | 
 | 462 |             int v       = scratches.obtain(); | 
 | 463 |             int width   = scratches.obtain(); | 
 | 464 |             int height  = scratches.obtain(); | 
 | 465 |             int U = 0; | 
 | 466 |             int V = 0; | 
 | 467 |  | 
| Paul Lind | 2bc2b79 | 2012-02-01 10:54:19 -0800 | [diff] [blame] | 468 |             if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS) | 
 | 469 |                 return; | 
 | 470 |  | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 471 |             CONTEXT_LOAD(width,  generated_vars.texture[i].width); | 
 | 472 |             CONTEXT_LOAD(height, generated_vars.texture[i].height); | 
 | 473 |  | 
 | 474 |             int FRAC_BITS = 0; | 
 | 475 |             if (tmu.linear) { | 
 | 476 |                 // linear interpolation | 
 | 477 |                 if (tmu.format.size == 1) { | 
 | 478 |                     // for 8-bits textures, we can afford | 
 | 479 |                     // 7 bits of fractional precision at no | 
 | 480 |                     // additional cost (we can't do 8 bits | 
 | 481 |                     // because filter8 uses signed 16 bits muls) | 
 | 482 |                     FRAC_BITS = 7; | 
 | 483 |                 } else if (tmu.format.size == 2) { | 
 | 484 |                     // filter16() is internally limited to 4 bits, so: | 
 | 485 |                     // FRAC_BITS=2 generates less instructions, | 
 | 486 |                     // FRAC_BITS=3,4,5 creates unpleasant artifacts, | 
 | 487 |                     // FRAC_BITS=6+ looks good | 
 | 488 |                     FRAC_BITS = 6; | 
 | 489 |                 } else if (tmu.format.size == 4) { | 
 | 490 |                     // filter32() is internally limited to 8 bits, so: | 
 | 491 |                     // FRAC_BITS=4 looks good | 
 | 492 |                     // FRAC_BITS=5+ looks better, but generates 3 extra ipp | 
 | 493 |                     FRAC_BITS = 6; | 
 | 494 |                 } else { | 
 | 495 |                     // for all other cases we use 4 bits. | 
 | 496 |                     FRAC_BITS = 4; | 
 | 497 |                 } | 
 | 498 |             } | 
 | 499 |             wrapping(u, s.reg, width,  tmu.swrap, FRAC_BITS); | 
 | 500 |             wrapping(v, t.reg, height, tmu.twrap, FRAC_BITS); | 
 | 501 |  | 
 | 502 |             if (tmu.linear) { | 
 | 503 |                 comment("compute linear filtering offsets"); | 
 | 504 |                 // pixel size scale | 
 | 505 |                 const int shift = 31 - gglClz(tmu.format.size); | 
 | 506 |                 U = scratches.obtain(); | 
 | 507 |                 V = scratches.obtain(); | 
 | 508 |  | 
| Paul Lind | 2bc2b79 | 2012-02-01 10:54:19 -0800 | [diff] [blame] | 509 |                 if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS) | 
 | 510 |                     return; | 
 | 511 |  | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 512 |                 // sample the texel center | 
 | 513 |                 SUB(AL, 0, u, u, imm(1<<(FRAC_BITS-1))); | 
 | 514 |                 SUB(AL, 0, v, v, imm(1<<(FRAC_BITS-1))); | 
 | 515 |  | 
 | 516 |                 // get the fractionnal part of U,V | 
 | 517 |                 AND(AL, 0, U, u, imm((1<<FRAC_BITS)-1)); | 
 | 518 |                 AND(AL, 0, V, v, imm((1<<FRAC_BITS)-1)); | 
 | 519 |  | 
 | 520 |                 // compute width-1 and height-1 | 
 | 521 |                 SUB(AL, 0, width,  width,  imm(1)); | 
 | 522 |                 SUB(AL, 0, height, height, imm(1)); | 
 | 523 |  | 
 | 524 |                 // get the integer part of U,V and clamp/wrap | 
 | 525 |                 // and compute offset to the next texel | 
 | 526 |                 if (tmu.swrap == GGL_NEEDS_WRAP_REPEAT) { | 
 | 527 |                     // u has already been REPEATed | 
 | 528 |                     MOV(AL, 1, u, reg_imm(u, ASR, FRAC_BITS)); | 
 | 529 |                     MOV(MI, 0, u, width);                     | 
 | 530 |                     CMP(AL, u, width); | 
 | 531 |                     MOV(LT, 0, width, imm(1 << shift)); | 
 | 532 |                     if (shift) | 
 | 533 |                         MOV(GE, 0, width, reg_imm(width, LSL, shift)); | 
 | 534 |                     RSB(GE, 0, width, width, imm(0)); | 
 | 535 |                 } else { | 
 | 536 |                     // u has not been CLAMPed yet | 
 | 537 |                     // algorithm: | 
 | 538 |                     // if ((u>>4) >= width) | 
 | 539 |                     //      u = width<<4 | 
 | 540 |                     //      width = 0 | 
 | 541 |                     // else | 
 | 542 |                     //      width = 1<<shift | 
 | 543 |                     // u = u>>4; // get integer part | 
 | 544 |                     // if (u<0) | 
 | 545 |                     //      u = 0 | 
 | 546 |                     //      width = 0 | 
 | 547 |                     // generated_vars.rt = width | 
 | 548 |                      | 
 | 549 |                     CMP(AL, width, reg_imm(u, ASR, FRAC_BITS)); | 
 | 550 |                     MOV(LE, 0, u, reg_imm(width, LSL, FRAC_BITS)); | 
 | 551 |                     MOV(LE, 0, width, imm(0)); | 
 | 552 |                     MOV(GT, 0, width, imm(1 << shift)); | 
 | 553 |                     MOV(AL, 1, u, reg_imm(u, ASR, FRAC_BITS)); | 
 | 554 |                     MOV(MI, 0, u, imm(0)); | 
 | 555 |                     MOV(MI, 0, width, imm(0)); | 
 | 556 |                 } | 
 | 557 |                 CONTEXT_STORE(width, generated_vars.rt); | 
 | 558 |  | 
 | 559 |                 const int stride = width; | 
 | 560 |                 CONTEXT_LOAD(stride, generated_vars.texture[i].stride); | 
 | 561 |                 if (tmu.twrap == GGL_NEEDS_WRAP_REPEAT) { | 
 | 562 |                     // v has already been REPEATed | 
 | 563 |                     MOV(AL, 1, v, reg_imm(v, ASR, FRAC_BITS)); | 
 | 564 |                     MOV(MI, 0, v, height); | 
 | 565 |                     CMP(AL, v, height); | 
 | 566 |                     MOV(LT, 0, height, imm(1 << shift)); | 
 | 567 |                     if (shift) | 
 | 568 |                         MOV(GE, 0, height, reg_imm(height, LSL, shift)); | 
 | 569 |                     RSB(GE, 0, height, height, imm(0)); | 
 | 570 |                     MUL(AL, 0, height, stride, height); | 
 | 571 |                 } else { | 
| Martyn Capewell | 96dbb4f | 2009-12-07 13:59:59 +0000 | [diff] [blame] | 572 |                     // v has not been CLAMPed yet | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 573 |                     CMP(AL, height, reg_imm(v, ASR, FRAC_BITS)); | 
 | 574 |                     MOV(LE, 0, v, reg_imm(height, LSL, FRAC_BITS)); | 
 | 575 |                     MOV(LE, 0, height, imm(0)); | 
 | 576 |                     if (shift) { | 
 | 577 |                         MOV(GT, 0, height, reg_imm(stride, LSL, shift)); | 
 | 578 |                     } else { | 
 | 579 |                         MOV(GT, 0, height, stride); | 
 | 580 |                     } | 
 | 581 |                     MOV(AL, 1, v, reg_imm(v, ASR, FRAC_BITS)); | 
 | 582 |                     MOV(MI, 0, v, imm(0)); | 
 | 583 |                     MOV(MI, 0, height, imm(0)); | 
 | 584 |                 } | 
 | 585 |                 CONTEXT_STORE(height, generated_vars.lb); | 
 | 586 |             } | 
 | 587 |      | 
 | 588 |             scratches.recycle(width); | 
 | 589 |             scratches.recycle(height); | 
 | 590 |  | 
 | 591 |             // iterate texture coordinates... | 
 | 592 |             comment("iterate s,t"); | 
 | 593 |             int dsdx = scratches.obtain(); | 
 | 594 |             int dtdx = scratches.obtain(); | 
| Paul Lind | 2bc2b79 | 2012-02-01 10:54:19 -0800 | [diff] [blame] | 595 |  | 
 | 596 |             if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS) | 
 | 597 |                 return; | 
 | 598 |  | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 599 |             CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx); | 
 | 600 |             CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx); | 
 | 601 |             ADD(AL, 0, s.reg, s.reg, dsdx); | 
 | 602 |             ADD(AL, 0, t.reg, t.reg, dtdx); | 
 | 603 |             if ((mOptLevel&1)==0) { | 
 | 604 |                 CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]); | 
 | 605 |                 CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]); | 
 | 606 |                 scratches.recycle(s.reg); | 
 | 607 |                 scratches.recycle(t.reg); | 
 | 608 |             } | 
 | 609 |             scratches.recycle(dsdx); | 
 | 610 |             scratches.recycle(dtdx); | 
 | 611 |  | 
 | 612 |             // merge base & offset... | 
 | 613 |             comment("merge base & offset"); | 
 | 614 |             texel.setTo(regs.obtain(), &tmu.format); | 
 | 615 |             txPtr.setTo(texel.reg, tmu.bits); | 
 | 616 |             int stride = scratches.obtain(); | 
| Paul Lind | 2bc2b79 | 2012-02-01 10:54:19 -0800 | [diff] [blame] | 617 |  | 
 | 618 |             if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS) | 
 | 619 |                 return; | 
 | 620 |  | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 621 |             CONTEXT_LOAD(stride,    generated_vars.texture[i].stride); | 
| Ashok Bhat | bfc6dc4 | 2013-02-21 10:27:40 +0000 | [diff] [blame] | 622 |             CONTEXT_ADDR_LOAD(txPtr.reg, generated_vars.texture[i].data); | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 623 |             SMLABB(AL, u, v, stride, u);    // u+v*stride  | 
 | 624 |             base_offset(txPtr, txPtr, u); | 
 | 625 |  | 
 | 626 |             // load texel | 
 | 627 |             if (!tmu.linear) { | 
 | 628 |                 comment("fetch texel"); | 
 | 629 |                 load(txPtr, texel, 0); | 
 | 630 |             } else { | 
 | 631 |                 // recycle registers we don't need anymore | 
 | 632 |                 scratches.recycle(u); | 
 | 633 |                 scratches.recycle(v); | 
 | 634 |                 scratches.recycle(stride); | 
 | 635 |  | 
 | 636 |                 comment("fetch texel, bilinear"); | 
 | 637 |                 switch (tmu.format.size) { | 
 | 638 |                 case 1:  filter8(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break; | 
 | 639 |                 case 2: filter16(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break; | 
 | 640 |                 case 3: filter24(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break; | 
 | 641 |                 case 4: filter32(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break; | 
 | 642 |                 } | 
 | 643 |             }             | 
 | 644 |         } | 
 | 645 |     } | 
 | 646 | } | 
 | 647 |  | 
 | 648 | void GGLAssembler::build_iterate_texture_coordinates( | 
 | 649 |     const fragment_parts_t& parts) | 
 | 650 | { | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 651 |     for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { | 
 | 652 |         const texture_unit_t& tmu = mTextureMachine.tmu[i]; | 
 | 653 |         if (tmu.format_idx == 0) | 
 | 654 |             continue; | 
 | 655 |  | 
 | 656 |         if ((tmu.swrap == GGL_NEEDS_WRAP_11) && | 
 | 657 |             (tmu.twrap == GGL_NEEDS_WRAP_11)) | 
 | 658 |         { // 1:1 textures | 
 | 659 |             const pointer_t& txPtr = parts.coords[i].ptr; | 
 | 660 |             ADD(AL, 0, txPtr.reg, txPtr.reg, imm(txPtr.size>>3)); | 
 | 661 |         } else { | 
 | 662 |             Scratch scratches(registerFile()); | 
 | 663 |             int s = parts.coords[i].s.reg; | 
 | 664 |             int t = parts.coords[i].t.reg; | 
 | 665 |             if ((mOptLevel&1)==0) { | 
 | 666 |                 s = scratches.obtain(); | 
 | 667 |                 t = scratches.obtain(); | 
 | 668 |                 CONTEXT_LOAD(s, generated_vars.texture[i].spill[0]); | 
 | 669 |                 CONTEXT_LOAD(t, generated_vars.texture[i].spill[1]); | 
 | 670 |             } | 
 | 671 |             int dsdx = scratches.obtain(); | 
 | 672 |             int dtdx = scratches.obtain(); | 
 | 673 |             CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx); | 
 | 674 |             CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx); | 
 | 675 |             ADD(AL, 0, s, s, dsdx); | 
 | 676 |             ADD(AL, 0, t, t, dtdx); | 
 | 677 |             if ((mOptLevel&1)==0) { | 
 | 678 |                 CONTEXT_STORE(s, generated_vars.texture[i].spill[0]); | 
 | 679 |                 CONTEXT_STORE(t, generated_vars.texture[i].spill[1]); | 
 | 680 |             } | 
 | 681 |         } | 
 | 682 |     } | 
 | 683 | } | 
 | 684 |  | 
 | 685 | void GGLAssembler::filter8( | 
| Ashok Bhat | 3078b13 | 2014-02-17 15:15:46 +0000 | [diff] [blame] | 686 |         const fragment_parts_t& /*parts*/, | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 687 |         pixel_t& texel, const texture_unit_t& tmu, | 
 | 688 |         int U, int V, pointer_t& txPtr, | 
 | 689 |         int FRAC_BITS) | 
 | 690 | { | 
 | 691 |     if (tmu.format.components != GGL_ALPHA && | 
 | 692 |         tmu.format.components != GGL_LUMINANCE) | 
 | 693 |     { | 
 | 694 |         // this is a packed format, and we don't support | 
 | 695 |         // linear filtering (it's probably RGB 332) | 
 | 696 |         // Should not happen with OpenGL|ES | 
 | 697 |         LDRB(AL, texel.reg, txPtr.reg); | 
 | 698 |         return; | 
 | 699 |     } | 
 | 700 |  | 
 | 701 |     // ------------------------ | 
 | 702 |     // about ~22 cycles / pixel | 
 | 703 |     Scratch scratches(registerFile()); | 
 | 704 |  | 
 | 705 |     int pixel= scratches.obtain(); | 
 | 706 |     int d    = scratches.obtain(); | 
 | 707 |     int u    = scratches.obtain(); | 
 | 708 |     int k    = scratches.obtain(); | 
 | 709 |     int rt   = scratches.obtain(); | 
 | 710 |     int lb   = scratches.obtain(); | 
 | 711 |  | 
 | 712 |     // RB -> U * V | 
 | 713 |  | 
 | 714 |     CONTEXT_LOAD(rt, generated_vars.rt); | 
 | 715 |     CONTEXT_LOAD(lb, generated_vars.lb); | 
 | 716 |  | 
 | 717 |     int offset = pixel; | 
 | 718 |     ADD(AL, 0, offset, lb, rt); | 
 | 719 |     LDRB(AL, pixel, txPtr.reg, reg_scale_pre(offset)); | 
 | 720 |     SMULBB(AL, u, U, V); | 
 | 721 |     SMULBB(AL, d, pixel, u); | 
 | 722 |     RSB(AL, 0, k, u, imm(1<<(FRAC_BITS*2))); | 
 | 723 |      | 
 | 724 |     // LB -> (1-U) * V | 
 | 725 |     RSB(AL, 0, U, U, imm(1<<FRAC_BITS)); | 
 | 726 |     LDRB(AL, pixel, txPtr.reg, reg_scale_pre(lb)); | 
 | 727 |     SMULBB(AL, u, U, V); | 
 | 728 |     SMLABB(AL, d, pixel, u, d); | 
 | 729 |     SUB(AL, 0, k, k, u); | 
 | 730 |      | 
 | 731 |     // LT -> (1-U)*(1-V) | 
 | 732 |     RSB(AL, 0, V, V, imm(1<<FRAC_BITS)); | 
 | 733 |     LDRB(AL, pixel, txPtr.reg); | 
 | 734 |     SMULBB(AL, u, U, V); | 
 | 735 |     SMLABB(AL, d, pixel, u, d); | 
 | 736 |  | 
 | 737 |     // RT -> U*(1-V) | 
 | 738 |     LDRB(AL, pixel, txPtr.reg, reg_scale_pre(rt)); | 
 | 739 |     SUB(AL, 0, u, k, u); | 
 | 740 |     SMLABB(AL, texel.reg, pixel, u, d); | 
 | 741 |      | 
 | 742 |     for (int i=0 ; i<4 ; i++) { | 
 | 743 |         if (!texel.format.c[i].h) continue; | 
 | 744 |         texel.format.c[i].h = FRAC_BITS*2+8; | 
 | 745 |         texel.format.c[i].l = FRAC_BITS*2; // keeping 8 bits in enough | 
 | 746 |     } | 
 | 747 |     texel.format.size = 4; | 
 | 748 |     texel.format.bitsPerPixel = 32; | 
 | 749 |     texel.flags |= CLEAR_LO; | 
 | 750 | } | 
 | 751 |  | 
 | 752 | void GGLAssembler::filter16( | 
| Ashok Bhat | 3078b13 | 2014-02-17 15:15:46 +0000 | [diff] [blame] | 753 |         const fragment_parts_t& /*parts*/, | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 754 |         pixel_t& texel, const texture_unit_t& tmu, | 
 | 755 |         int U, int V, pointer_t& txPtr, | 
 | 756 |         int FRAC_BITS) | 
 | 757 | {     | 
 | 758 |     // compute the mask | 
 | 759 |     // XXX: it would be nice if the mask below could be computed | 
 | 760 |     // automatically. | 
 | 761 |     uint32_t mask = 0; | 
 | 762 |     int shift = 0; | 
 | 763 |     int prec = 0; | 
 | 764 |     switch (tmu.format_idx) { | 
 | 765 |         case GGL_PIXEL_FORMAT_RGB_565: | 
 | 766 |             // source: 00000ggg.ggg00000 | rrrrr000.000bbbbb | 
 | 767 |             // result: gggggggg.gggrrrrr | rrrrr0bb.bbbbbbbb | 
 | 768 |             mask = 0x07E0F81F; | 
 | 769 |             shift = 16; | 
 | 770 |             prec = 5; | 
 | 771 |             break; | 
 | 772 |         case GGL_PIXEL_FORMAT_RGBA_4444: | 
 | 773 |             // 0000,1111,0000,1111 | 0000,1111,0000,1111 | 
 | 774 |             mask = 0x0F0F0F0F; | 
 | 775 |             shift = 12; | 
 | 776 |             prec = 4; | 
 | 777 |             break; | 
 | 778 |         case GGL_PIXEL_FORMAT_LA_88: | 
 | 779 |             // 0000,0000,1111,1111 | 0000,0000,1111,1111 | 
 | 780 |             // AALL -> 00AA | 00LL | 
 | 781 |             mask = 0x00FF00FF; | 
 | 782 |             shift = 8; | 
 | 783 |             prec = 8; | 
 | 784 |             break; | 
 | 785 |         default: | 
 | 786 |             // unsupported format, do something sensical... | 
| Steve Block | 01dda20 | 2012-01-06 14:13:42 +0000 | [diff] [blame] | 787 |             ALOGE("Unsupported 16-bits texture format (%d)", tmu.format_idx); | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 788 |             LDRH(AL, texel.reg, txPtr.reg); | 
 | 789 |             return; | 
 | 790 |     } | 
 | 791 |  | 
 | 792 |     const int adjust = FRAC_BITS*2 - prec; | 
 | 793 |     const int round  = 0; | 
 | 794 |  | 
 | 795 |     // update the texel format | 
 | 796 |     texel.format.size = 4; | 
 | 797 |     texel.format.bitsPerPixel = 32; | 
 | 798 |     texel.flags |= CLEAR_HI|CLEAR_LO; | 
 | 799 |     for (int i=0 ; i<4 ; i++) { | 
 | 800 |         if (!texel.format.c[i].h) continue; | 
 | 801 |         const uint32_t offset = (mask & tmu.format.mask(i)) ? 0 : shift; | 
 | 802 |         texel.format.c[i].h = tmu.format.c[i].h + offset + prec; | 
 | 803 |         texel.format.c[i].l = texel.format.c[i].h - (tmu.format.bits(i) + prec); | 
 | 804 |     } | 
 | 805 |  | 
 | 806 |     // ------------------------ | 
 | 807 |     // about ~40 cycles / pixel | 
 | 808 |     Scratch scratches(registerFile()); | 
 | 809 |  | 
 | 810 |     int pixel= scratches.obtain(); | 
 | 811 |     int d    = scratches.obtain(); | 
 | 812 |     int u    = scratches.obtain(); | 
 | 813 |     int k    = scratches.obtain(); | 
 | 814 |  | 
 | 815 |     // RB -> U * V | 
 | 816 |     int offset = pixel; | 
 | 817 |     CONTEXT_LOAD(offset, generated_vars.rt); | 
 | 818 |     CONTEXT_LOAD(u, generated_vars.lb); | 
 | 819 |     ADD(AL, 0, offset, offset, u); | 
 | 820 |  | 
 | 821 |     LDRH(AL, pixel, txPtr.reg, reg_pre(offset)); | 
 | 822 |     SMULBB(AL, u, U, V); | 
 | 823 |     ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); | 
 | 824 |     build_and_immediate(pixel, pixel, mask, 32); | 
 | 825 |     if (adjust) { | 
 | 826 |         if (round) | 
 | 827 |             ADD(AL, 0, u, u, imm(1<<(adjust-1))); | 
 | 828 |         MOV(AL, 0, u, reg_imm(u, LSR, adjust)); | 
 | 829 |     } | 
 | 830 |     MUL(AL, 0, d, pixel, u); | 
 | 831 |     RSB(AL, 0, k, u, imm(1<<prec)); | 
 | 832 |      | 
 | 833 |     // LB -> (1-U) * V | 
 | 834 |     CONTEXT_LOAD(offset, generated_vars.lb); | 
 | 835 |     RSB(AL, 0, U, U, imm(1<<FRAC_BITS)); | 
 | 836 |     LDRH(AL, pixel, txPtr.reg, reg_pre(offset)); | 
 | 837 |     SMULBB(AL, u, U, V); | 
 | 838 |     ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); | 
 | 839 |     build_and_immediate(pixel, pixel, mask, 32); | 
 | 840 |     if (adjust) { | 
 | 841 |         if (round) | 
 | 842 |             ADD(AL, 0, u, u, imm(1<<(adjust-1))); | 
 | 843 |         MOV(AL, 0, u, reg_imm(u, LSR, adjust)); | 
 | 844 |     } | 
 | 845 |     MLA(AL, 0, d, pixel, u, d); | 
 | 846 |     SUB(AL, 0, k, k, u); | 
 | 847 |      | 
 | 848 |     // LT -> (1-U)*(1-V) | 
 | 849 |     RSB(AL, 0, V, V, imm(1<<FRAC_BITS)); | 
 | 850 |     LDRH(AL, pixel, txPtr.reg); | 
 | 851 |     SMULBB(AL, u, U, V); | 
 | 852 |     ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); | 
 | 853 |     build_and_immediate(pixel, pixel, mask, 32); | 
 | 854 |     if (adjust) { | 
 | 855 |         if (round) | 
 | 856 |             ADD(AL, 0, u, u, imm(1<<(adjust-1))); | 
 | 857 |         MOV(AL, 0, u, reg_imm(u, LSR, adjust)); | 
 | 858 |     } | 
 | 859 |     MLA(AL, 0, d, pixel, u, d); | 
 | 860 |  | 
 | 861 |     // RT -> U*(1-V)             | 
 | 862 |     CONTEXT_LOAD(offset, generated_vars.rt); | 
 | 863 |     LDRH(AL, pixel, txPtr.reg, reg_pre(offset)); | 
 | 864 |     SUB(AL, 0, u, k, u); | 
 | 865 |     ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); | 
 | 866 |     build_and_immediate(pixel, pixel, mask, 32); | 
 | 867 |     MLA(AL, 0, texel.reg, pixel, u, d); | 
 | 868 | } | 
 | 869 |  | 
 | 870 | void GGLAssembler::filter24( | 
| Ashok Bhat | 3078b13 | 2014-02-17 15:15:46 +0000 | [diff] [blame] | 871 |         const fragment_parts_t& /*parts*/, | 
 | 872 |         pixel_t& texel, const texture_unit_t& /*tmu*/, | 
 | 873 |         int /*U*/, int /*V*/, pointer_t& txPtr, | 
 | 874 |         int /*FRAC_BITS*/) | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 875 | { | 
 | 876 |     // not supported yet (currently disabled) | 
 | 877 |     load(txPtr, texel, 0); | 
 | 878 | } | 
 | 879 |  | 
 | 880 | void GGLAssembler::filter32( | 
| Ashok Bhat | 3078b13 | 2014-02-17 15:15:46 +0000 | [diff] [blame] | 881 |         const fragment_parts_t& /*parts*/, | 
 | 882 |         pixel_t& texel, const texture_unit_t& /*tmu*/, | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 883 |         int U, int V, pointer_t& txPtr, | 
 | 884 |         int FRAC_BITS) | 
 | 885 | { | 
 | 886 |     const int adjust = FRAC_BITS*2 - 8; | 
 | 887 |     const int round  = 0; | 
 | 888 |  | 
 | 889 |     // ------------------------ | 
 | 890 |     // about ~38 cycles / pixel | 
 | 891 |     Scratch scratches(registerFile()); | 
 | 892 |      | 
 | 893 |     int pixel= scratches.obtain(); | 
 | 894 |     int dh   = scratches.obtain(); | 
 | 895 |     int u    = scratches.obtain(); | 
 | 896 |     int k    = scratches.obtain(); | 
 | 897 |  | 
 | 898 |     int temp = scratches.obtain(); | 
 | 899 |     int dl   = scratches.obtain(); | 
 | 900 |     int mask = scratches.obtain(); | 
 | 901 |  | 
 | 902 |     MOV(AL, 0, mask, imm(0xFF)); | 
 | 903 |     ORR(AL, 0, mask, mask, imm(0xFF0000)); | 
 | 904 |  | 
 | 905 |     // RB -> U * V | 
 | 906 |     int offset = pixel; | 
 | 907 |     CONTEXT_LOAD(offset, generated_vars.rt); | 
 | 908 |     CONTEXT_LOAD(u, generated_vars.lb); | 
 | 909 |     ADD(AL, 0, offset, offset, u); | 
 | 910 |  | 
 | 911 |     LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset)); | 
 | 912 |     SMULBB(AL, u, U, V); | 
 | 913 |     AND(AL, 0, temp, mask, pixel); | 
 | 914 |     if (adjust) { | 
 | 915 |         if (round) | 
 | 916 |             ADD(AL, 0, u, u, imm(1<<(adjust-1))); | 
 | 917 |         MOV(AL, 0, u, reg_imm(u, LSR, adjust)); | 
 | 918 |     } | 
 | 919 |     MUL(AL, 0, dh, temp, u); | 
 | 920 |     AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); | 
 | 921 |     MUL(AL, 0, dl, temp, u); | 
 | 922 |     RSB(AL, 0, k, u, imm(0x100)); | 
 | 923 |  | 
 | 924 |     // LB -> (1-U) * V | 
 | 925 |     CONTEXT_LOAD(offset, generated_vars.lb); | 
 | 926 |     RSB(AL, 0, U, U, imm(1<<FRAC_BITS)); | 
 | 927 |     LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset)); | 
 | 928 |     SMULBB(AL, u, U, V); | 
 | 929 |     AND(AL, 0, temp, mask, pixel); | 
 | 930 |     if (adjust) { | 
 | 931 |         if (round) | 
 | 932 |             ADD(AL, 0, u, u, imm(1<<(adjust-1))); | 
 | 933 |         MOV(AL, 0, u, reg_imm(u, LSR, adjust)); | 
 | 934 |     } | 
 | 935 |     MLA(AL, 0, dh, temp, u, dh);     | 
 | 936 |     AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); | 
 | 937 |     MLA(AL, 0, dl, temp, u, dl); | 
 | 938 |     SUB(AL, 0, k, k, u); | 
 | 939 |  | 
 | 940 |     // LT -> (1-U)*(1-V) | 
 | 941 |     RSB(AL, 0, V, V, imm(1<<FRAC_BITS)); | 
 | 942 |     LDR(AL, pixel, txPtr.reg); | 
 | 943 |     SMULBB(AL, u, U, V); | 
 | 944 |     AND(AL, 0, temp, mask, pixel); | 
 | 945 |     if (adjust) { | 
 | 946 |         if (round) | 
 | 947 |             ADD(AL, 0, u, u, imm(1<<(adjust-1))); | 
 | 948 |         MOV(AL, 0, u, reg_imm(u, LSR, adjust)); | 
 | 949 |     } | 
 | 950 |     MLA(AL, 0, dh, temp, u, dh);     | 
 | 951 |     AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); | 
 | 952 |     MLA(AL, 0, dl, temp, u, dl); | 
 | 953 |  | 
 | 954 |     // RT -> U*(1-V)             | 
 | 955 |     CONTEXT_LOAD(offset, generated_vars.rt); | 
 | 956 |     LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset)); | 
 | 957 |     SUB(AL, 0, u, k, u); | 
 | 958 |     AND(AL, 0, temp, mask, pixel); | 
 | 959 |     MLA(AL, 0, dh, temp, u, dh);     | 
 | 960 |     AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); | 
 | 961 |     MLA(AL, 0, dl, temp, u, dl); | 
 | 962 |  | 
 | 963 |     AND(AL, 0, dh, mask, reg_imm(dh, LSR, 8)); | 
 | 964 |     AND(AL, 0, dl, dl, reg_imm(mask, LSL, 8)); | 
 | 965 |     ORR(AL, 0, texel.reg, dh, dl); | 
 | 966 | } | 
 | 967 |  | 
 | 968 | void GGLAssembler::build_texture_environment( | 
 | 969 |         component_t& fragment, | 
 | 970 |         const fragment_parts_t& parts, | 
 | 971 |         int component, | 
 | 972 |         Scratch& regs) | 
 | 973 | { | 
 | 974 |     const uint32_t component_mask = 1<<component; | 
 | 975 |     const bool multiTexture = mTextureMachine.activeUnits > 1; | 
 | 976 |     for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) { | 
 | 977 |         texture_unit_t& tmu = mTextureMachine.tmu[i]; | 
 | 978 |  | 
 | 979 |         if (tmu.mask & component_mask) { | 
 | 980 |             // replace or modulate with this texture | 
 | 981 |             if ((tmu.replaced & component_mask) == 0) { | 
 | 982 |                 // not replaced by a later tmu... | 
 | 983 |  | 
 | 984 |                 Scratch scratches(registerFile()); | 
 | 985 |                 pixel_t texel(parts.texel[i]); | 
| Paul Lind | 2bc2b79 | 2012-02-01 10:54:19 -0800 | [diff] [blame] | 986 |  | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 987 |                 if (multiTexture &&  | 
 | 988 |                     tmu.swrap == GGL_NEEDS_WRAP_11 && | 
 | 989 |                     tmu.twrap == GGL_NEEDS_WRAP_11) | 
 | 990 |                 { | 
 | 991 |                     texel.reg = scratches.obtain(); | 
 | 992 |                     texel.flags |= CORRUPTIBLE; | 
 | 993 |                     comment("fetch texel (multitexture 1:1)"); | 
 | 994 |                     load(parts.coords[i].ptr, texel, WRITE_BACK); | 
 | 995 |                  } | 
 | 996 |  | 
 | 997 |                 component_t incoming(fragment); | 
 | 998 |                 modify(fragment, regs); | 
 | 999 |                  | 
 | 1000 |                 switch (tmu.env) { | 
 | 1001 |                 case GGL_REPLACE: | 
 | 1002 |                     extract(fragment, texel, component); | 
 | 1003 |                     break; | 
 | 1004 |                 case GGL_MODULATE: | 
 | 1005 |                     modulate(fragment, incoming, texel, component); | 
 | 1006 |                     break; | 
 | 1007 |                 case GGL_DECAL: | 
 | 1008 |                     decal(fragment, incoming, texel, component); | 
 | 1009 |                     break; | 
 | 1010 |                 case GGL_BLEND: | 
 | 1011 |                     blend(fragment, incoming, texel, component, i); | 
 | 1012 |                     break; | 
| The Android Open Source Project | 35237d1 | 2008-12-17 18:08:08 -0800 | [diff] [blame] | 1013 |                 case GGL_ADD: | 
 | 1014 |                     add(fragment, incoming, texel, component); | 
 | 1015 |                     break; | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 1016 |                 } | 
 | 1017 |             } | 
 | 1018 |         } | 
 | 1019 |     } | 
 | 1020 | } | 
 | 1021 |  | 
 | 1022 | // --------------------------------------------------------------------------- | 
 | 1023 |  | 
 | 1024 | void GGLAssembler::wrapping( | 
 | 1025 |             int d, | 
 | 1026 |             int coord, int size, | 
 | 1027 |             int tx_wrap, int tx_linear) | 
 | 1028 | { | 
 | 1029 |     // notes: | 
 | 1030 |     // if tx_linear is set, we need 4 extra bits of precision on the result | 
 | 1031 |     // SMULL/UMULL is 3 cycles | 
 | 1032 |     Scratch scratches(registerFile()); | 
 | 1033 |     int c = coord; | 
 | 1034 |     if (tx_wrap == GGL_NEEDS_WRAP_REPEAT) { | 
 | 1035 |         // UMULL takes 4 cycles (interlocked), and we can get away with | 
 | 1036 |         // 2 cycles using SMULWB, but we're loosing 16 bits of precision | 
 | 1037 |         // out of 32 (this is not a problem because the iterator keeps | 
 | 1038 |         // its full precision) | 
 | 1039 |         // UMULL(AL, 0, size, d, c, size); | 
 | 1040 |         // note: we can't use SMULTB because it's signed. | 
 | 1041 |         MOV(AL, 0, d, reg_imm(c, LSR, 16-tx_linear)); | 
 | 1042 |         SMULWB(AL, d, d, size); | 
 | 1043 |     } else if (tx_wrap == GGL_NEEDS_WRAP_CLAMP_TO_EDGE) { | 
 | 1044 |         if (tx_linear) { | 
 | 1045 |             // 1 cycle | 
 | 1046 |             MOV(AL, 0, d, reg_imm(coord, ASR, 16-tx_linear)); | 
 | 1047 |         } else { | 
 | 1048 |             // 4 cycles (common case) | 
 | 1049 |             MOV(AL, 0, d, reg_imm(coord, ASR, 16)); | 
 | 1050 |             BIC(AL, 0, d, d, reg_imm(d, ASR, 31)); | 
 | 1051 |             CMP(AL, d, size); | 
 | 1052 |             SUB(GE, 0, d, size, imm(1)); | 
 | 1053 |         } | 
 | 1054 |     } | 
 | 1055 | } | 
 | 1056 |  | 
 | 1057 | // --------------------------------------------------------------------------- | 
 | 1058 |  | 
 | 1059 | void GGLAssembler::modulate( | 
 | 1060 |         component_t& dest,  | 
 | 1061 |         const component_t& incoming, | 
 | 1062 |         const pixel_t& incomingTexel, int component) | 
 | 1063 | { | 
 | 1064 |     Scratch locals(registerFile()); | 
 | 1065 |     integer_t texel(locals.obtain(), 32, CORRUPTIBLE);             | 
 | 1066 |     extract(texel, incomingTexel, component); | 
 | 1067 |  | 
 | 1068 |     const int Nt = texel.size(); | 
 | 1069 |         // Nt should always be less than 10 bits because it comes | 
 | 1070 |         // from the TMU. | 
 | 1071 |  | 
 | 1072 |     int Ni = incoming.size(); | 
 | 1073 |         // Ni could be big because it comes from previous MODULATEs | 
 | 1074 |  | 
 | 1075 |     if (Nt == 1) { | 
 | 1076 |         // texel acts as a bit-mask | 
 | 1077 |         // dest = incoming & ((texel << incoming.h)-texel) | 
 | 1078 |         RSB(AL, 0, dest.reg, texel.reg, reg_imm(texel.reg, LSL, incoming.h)); | 
 | 1079 |         AND(AL, 0, dest.reg, dest.reg, incoming.reg); | 
 | 1080 |         dest.l = incoming.l; | 
 | 1081 |         dest.h = incoming.h; | 
 | 1082 |         dest.flags |= (incoming.flags & CLEAR_LO); | 
 | 1083 |     } else if (Ni == 1) { | 
 | 1084 |         MOV(AL, 0, dest.reg, reg_imm(incoming.reg, LSL, 31-incoming.h)); | 
 | 1085 |         AND(AL, 0, dest.reg, texel.reg, reg_imm(dest.reg, ASR, 31)); | 
 | 1086 |         dest.l = 0; | 
 | 1087 |         dest.h = Nt; | 
 | 1088 |     } else { | 
 | 1089 |         int inReg = incoming.reg; | 
 | 1090 |         int shift = incoming.l; | 
 | 1091 |         if ((Nt + Ni) > 32) { | 
 | 1092 |             // we will overflow, reduce the precision of Ni to 8 bits | 
 | 1093 |             // (Note Nt cannot be more than 10 bits which happens with  | 
 | 1094 |             // 565 textures and GGL_LINEAR) | 
 | 1095 |             shift += Ni-8; | 
 | 1096 |             Ni = 8; | 
 | 1097 |         } | 
 | 1098 |  | 
 | 1099 |         // modulate by the component with the lowest precision | 
 | 1100 |         if (Nt >= Ni) { | 
 | 1101 |             if (shift) { | 
 | 1102 |                 // XXX: we should be able to avoid this shift | 
 | 1103 |                 // when shift==16 && Nt<16 && Ni<16, in which | 
 | 1104 |                 // we could use SMULBT below. | 
 | 1105 |                 MOV(AL, 0, dest.reg, reg_imm(inReg, LSR, shift)); | 
 | 1106 |                 inReg = dest.reg; | 
 | 1107 |                 shift = 0; | 
 | 1108 |             } | 
 | 1109 |             // operation:           (Cf*Ct)/((1<<Ni)-1) | 
 | 1110 |             // approximated with:   Cf*(Ct + Ct>>(Ni-1))>>Ni | 
 | 1111 |             // this operation doesn't change texel's size | 
 | 1112 |             ADD(AL, 0, dest.reg, inReg, reg_imm(inReg, LSR, Ni-1)); | 
 | 1113 |             if (Nt<16 && Ni<16) SMULBB(AL, dest.reg, texel.reg, dest.reg); | 
 | 1114 |             else                MUL(AL, 0, dest.reg, texel.reg, dest.reg); | 
 | 1115 |             dest.l = Ni; | 
 | 1116 |             dest.h = Nt + Ni;             | 
 | 1117 |         } else { | 
 | 1118 |             if (shift && (shift != 16)) { | 
 | 1119 |                 // if shift==16, we can use 16-bits mul instructions later | 
 | 1120 |                 MOV(AL, 0, dest.reg, reg_imm(inReg, LSR, shift)); | 
 | 1121 |                 inReg = dest.reg; | 
 | 1122 |                 shift = 0; | 
 | 1123 |             } | 
 | 1124 |             // operation:           (Cf*Ct)/((1<<Nt)-1) | 
 | 1125 |             // approximated with:   Ct*(Cf + Cf>>(Nt-1))>>Nt | 
 | 1126 |             // this operation doesn't change incoming's size | 
 | 1127 |             Scratch scratches(registerFile()); | 
 | 1128 |             int t = (texel.flags & CORRUPTIBLE) ? texel.reg : dest.reg; | 
 | 1129 |             if (t == inReg) | 
 | 1130 |                 t = scratches.obtain(); | 
 | 1131 |             ADD(AL, 0, t, texel.reg, reg_imm(texel.reg, LSR, Nt-1)); | 
 | 1132 |             if (Nt<16 && Ni<16) { | 
 | 1133 |                 if (shift==16)  SMULBT(AL, dest.reg, t, inReg); | 
 | 1134 |                 else            SMULBB(AL, dest.reg, t, inReg); | 
 | 1135 |             } else              MUL(AL, 0, dest.reg, t, inReg); | 
 | 1136 |             dest.l = Nt; | 
 | 1137 |             dest.h = Nt + Ni; | 
 | 1138 |         } | 
 | 1139 |  | 
 | 1140 |         // low bits are not valid | 
 | 1141 |         dest.flags |= CLEAR_LO; | 
 | 1142 |  | 
 | 1143 |         // no need to keep more than 8 bits/component | 
 | 1144 |         if (dest.size() > 8) | 
 | 1145 |             dest.l = dest.h-8; | 
 | 1146 |     } | 
 | 1147 | } | 
 | 1148 |  | 
 | 1149 | void GGLAssembler::decal( | 
 | 1150 |         component_t& dest,  | 
 | 1151 |         const component_t& incoming, | 
 | 1152 |         const pixel_t& incomingTexel, int component) | 
 | 1153 | { | 
 | 1154 |     // RGBA: | 
 | 1155 |     // Cv = Cf*(1 - At) + Ct*At = Cf + (Ct - Cf)*At | 
 | 1156 |     // Av = Af | 
 | 1157 |     Scratch locals(registerFile()); | 
 | 1158 |     integer_t texel(locals.obtain(), 32, CORRUPTIBLE);             | 
 | 1159 |     integer_t factor(locals.obtain(), 32, CORRUPTIBLE); | 
 | 1160 |     extract(texel, incomingTexel, component); | 
 | 1161 |     extract(factor, incomingTexel, GGLFormat::ALPHA); | 
 | 1162 |  | 
 | 1163 |     // no need to keep more than 8-bits for decal  | 
 | 1164 |     int Ni = incoming.size(); | 
 | 1165 |     int shift = incoming.l; | 
 | 1166 |     if (Ni > 8) { | 
 | 1167 |         shift += Ni-8; | 
 | 1168 |         Ni = 8; | 
 | 1169 |     } | 
 | 1170 |     integer_t incomingNorm(incoming.reg, Ni, incoming.flags); | 
 | 1171 |     if (shift) { | 
 | 1172 |         MOV(AL, 0, dest.reg, reg_imm(incomingNorm.reg, LSR, shift)); | 
 | 1173 |         incomingNorm.reg = dest.reg; | 
 | 1174 |         incomingNorm.flags |= CORRUPTIBLE; | 
 | 1175 |     } | 
 | 1176 |     ADD(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, LSR, factor.s-1)); | 
 | 1177 |     build_blendOneMinusFF(dest, factor, incomingNorm, texel); | 
 | 1178 | } | 
 | 1179 |  | 
 | 1180 | void GGLAssembler::blend( | 
 | 1181 |         component_t& dest,  | 
 | 1182 |         const component_t& incoming, | 
 | 1183 |         const pixel_t& incomingTexel, int component, int tmu) | 
 | 1184 | { | 
 | 1185 |     // RGBA: | 
 | 1186 |     // Cv = (1 - Ct)*Cf + Ct*Cc = Cf + (Cc - Cf)*Ct | 
 | 1187 |     // Av = At*Af | 
 | 1188 |  | 
 | 1189 |     if (component == GGLFormat::ALPHA) { | 
 | 1190 |         modulate(dest, incoming, incomingTexel, component); | 
 | 1191 |         return; | 
 | 1192 |     } | 
 | 1193 |      | 
 | 1194 |     Scratch locals(registerFile()); | 
 | 1195 |     integer_t color(locals.obtain(), 8, CORRUPTIBLE);             | 
 | 1196 |     integer_t factor(locals.obtain(), 32, CORRUPTIBLE); | 
 | 1197 |     LDRB(AL, color.reg, mBuilderContext.Rctx, | 
 | 1198 |             immed12_pre(GGL_OFFSETOF(state.texture[tmu].env_color[component]))); | 
 | 1199 |     extract(factor, incomingTexel, component); | 
 | 1200 |  | 
 | 1201 |     // no need to keep more than 8-bits for blend  | 
 | 1202 |     int Ni = incoming.size(); | 
 | 1203 |     int shift = incoming.l; | 
 | 1204 |     if (Ni > 8) { | 
 | 1205 |         shift += Ni-8; | 
 | 1206 |         Ni = 8; | 
 | 1207 |     } | 
 | 1208 |     integer_t incomingNorm(incoming.reg, Ni, incoming.flags); | 
 | 1209 |     if (shift) { | 
 | 1210 |         MOV(AL, 0, dest.reg, reg_imm(incomingNorm.reg, LSR, shift)); | 
 | 1211 |         incomingNorm.reg = dest.reg; | 
 | 1212 |         incomingNorm.flags |= CORRUPTIBLE; | 
 | 1213 |     } | 
 | 1214 |     ADD(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, LSR, factor.s-1)); | 
 | 1215 |     build_blendOneMinusFF(dest, factor, incomingNorm, color); | 
 | 1216 | } | 
 | 1217 |  | 
| The Android Open Source Project | 35237d1 | 2008-12-17 18:08:08 -0800 | [diff] [blame] | 1218 | void GGLAssembler::add( | 
 | 1219 |         component_t& dest,  | 
 | 1220 |         const component_t& incoming, | 
 | 1221 |         const pixel_t& incomingTexel, int component) | 
 | 1222 | { | 
 | 1223 |     // RGBA: | 
 | 1224 |     // Cv = Cf + Ct; | 
 | 1225 |     Scratch locals(registerFile()); | 
 | 1226 |      | 
 | 1227 |     component_t incomingTemp(incoming); | 
 | 1228 |  | 
 | 1229 |     // use "dest" as a temporary for extracting the texel, unless "dest" | 
 | 1230 |     // overlaps "incoming". | 
 | 1231 |     integer_t texel(dest.reg, 32, CORRUPTIBLE); | 
 | 1232 |     if (dest.reg == incomingTemp.reg) | 
 | 1233 |         texel.reg = locals.obtain(); | 
 | 1234 |     extract(texel, incomingTexel, component); | 
 | 1235 |  | 
 | 1236 |     if (texel.s < incomingTemp.size()) { | 
 | 1237 |         expand(texel, texel, incomingTemp.size()); | 
 | 1238 |     } else if (texel.s > incomingTemp.size()) { | 
 | 1239 |         if (incomingTemp.flags & CORRUPTIBLE) { | 
 | 1240 |             expand(incomingTemp, incomingTemp, texel.s); | 
 | 1241 |         } else { | 
 | 1242 |             incomingTemp.reg = locals.obtain(); | 
 | 1243 |             expand(incomingTemp, incoming, texel.s); | 
 | 1244 |         } | 
 | 1245 |     } | 
 | 1246 |  | 
 | 1247 |     if (incomingTemp.l) { | 
 | 1248 |         ADD(AL, 0, dest.reg, texel.reg, | 
 | 1249 |                 reg_imm(incomingTemp.reg, LSR, incomingTemp.l)); | 
 | 1250 |     } else { | 
 | 1251 |         ADD(AL, 0, dest.reg, texel.reg, incomingTemp.reg); | 
 | 1252 |     } | 
 | 1253 |     dest.l = 0; | 
 | 1254 |     dest.h = texel.size(); | 
 | 1255 |     component_sat(dest); | 
 | 1256 | } | 
 | 1257 |  | 
| The Android Open Source Project | 4f6e8d7 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 1258 | // ---------------------------------------------------------------------------- | 
 | 1259 |  | 
 | 1260 | }; // namespace android | 
 | 1261 |  |