blob: d4aa475387034361f1c7e522a1dda269d23cb5e2 [file] [log] [blame]
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08001/* libs/pixelflinger/codeflinger/blending.cpp
2**
3** Copyright 2006, The Android Open Source Project
4**
Mark Salyzyn66ce3e02016-09-28 10:07:20 -07005** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08008**
Mark Salyzyn66ce3e02016-09-28 10:07:20 -07009** http://www.apache.org/licenses/LICENSE-2.0
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -080010**
Mark Salyzyn66ce3e02016-09-28 10:07:20 -070011** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -080015** limitations under the License.
16*/
17
18#include <assert.h>
19#include <stdint.h>
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -080020#include <stdio.h>
Mark Salyzyn66ce3e02016-09-28 10:07:20 -070021#include <stdlib.h>
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -080022#include <sys/types.h>
23
Mark Salyzyn66ce3e02016-09-28 10:07:20 -070024#include <android/log.h>
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -080025
Mathias Agopian9857d992013-04-01 15:17:55 -070026#include "GGLAssembler.h"
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -080027
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -080028namespace android {
29
30void GGLAssembler::build_fog(
31 component_t& temp, // incomming fragment / output
32 int component,
33 Scratch& regs)
34{
35 if (mInfo[component].fog) {
36 Scratch scratches(registerFile());
37 comment("fog");
38
39 integer_t fragment(temp.reg, temp.h, temp.flags);
40 if (!(temp.flags & CORRUPTIBLE)) {
41 temp.reg = regs.obtain();
42 temp.flags |= CORRUPTIBLE;
43 }
44
45 integer_t fogColor(scratches.obtain(), 8, CORRUPTIBLE);
46 LDRB(AL, fogColor.reg, mBuilderContext.Rctx,
47 immed12_pre(GGL_OFFSETOF(state.fog.color[component])));
48
49 integer_t factor(scratches.obtain(), 16, CORRUPTIBLE);
50 CONTEXT_LOAD(factor.reg, generated_vars.f);
51
52 // clamp fog factor (TODO: see if there is a way to guarantee
53 // we won't overflow, when setting the iterators)
54 BIC(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, ASR, 31));
55 CMP(AL, factor.reg, imm( 0x10000 ));
56 MOV(HS, 0, factor.reg, imm( 0x10000 ));
57
58 build_blendFOneMinusF(temp, factor, fragment, fogColor);
59 }
60}
61
62void GGLAssembler::build_blending(
63 component_t& temp, // incomming fragment / output
64 const pixel_t& pixel, // framebuffer
65 int component,
66 Scratch& regs)
67{
68 if (!mInfo[component].blend)
69 return;
70
71 int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
72 int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
73 if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA)
74 fs = GGL_ONE;
75 const int blending = blending_codes(fs, fd);
76 if (!temp.size()) {
77 // here, blending will produce something which doesn't depend on
78 // that component (eg: GL_ZERO:GL_*), so the register has not been
79 // allocated yet. Will never be used as a source.
80 temp = component_t(regs.obtain(), CORRUPTIBLE);
81 }
82
83 // we are doing real blending...
84 // fb: extracted dst
85 // fragment: extracted src
86 // temp: component_t(fragment) and result
87
88 // scoped register allocator
89 Scratch scratches(registerFile());
90 comment("blending");
91
92 // we can optimize these cases a bit...
93 // (1) saturation is not needed
94 // (2) we can use only one multiply instead of 2
95 // (3) we can reduce the register pressure
96 // R = S*f + D*(1-f) = (S-D)*f + D
97 // R = S*(1-f) + D*f = (D-S)*f + S
98
99 const bool same_factor_opt1 =
100 (fs==GGL_DST_COLOR && fd==GGL_ONE_MINUS_DST_COLOR) ||
101 (fs==GGL_SRC_COLOR && fd==GGL_ONE_MINUS_SRC_COLOR) ||
102 (fs==GGL_DST_ALPHA && fd==GGL_ONE_MINUS_DST_ALPHA) ||
103 (fs==GGL_SRC_ALPHA && fd==GGL_ONE_MINUS_SRC_ALPHA);
104
105 const bool same_factor_opt2 =
106 (fs==GGL_ONE_MINUS_DST_COLOR && fd==GGL_DST_COLOR) ||
107 (fs==GGL_ONE_MINUS_SRC_COLOR && fd==GGL_SRC_COLOR) ||
108 (fs==GGL_ONE_MINUS_DST_ALPHA && fd==GGL_DST_ALPHA) ||
109 (fs==GGL_ONE_MINUS_SRC_ALPHA && fd==GGL_SRC_ALPHA);
110
111
112 // XXX: we could also optimize these cases:
113 // R = S*f + D*f = (S+D)*f
114 // R = S*(1-f) + D*(1-f) = (S+D)*(1-f)
115 // R = S*D + D*S = 2*S*D
116
117
118 // see if we need to extract 'component' from the destination (fb)
119 integer_t fb;
120 if (blending & (BLEND_DST|FACTOR_DST)) {
121 fb.setTo(scratches.obtain(), 32);
122 extract(fb, pixel, component);
123 if (mDithering) {
124 // XXX: maybe what we should do instead, is simply
125 // expand fb -or- fragment to the larger of the two
126 if (fb.size() < temp.size()) {
127 // for now we expand 'fb' to min(fragment, 8)
128 int new_size = temp.size() < 8 ? temp.size() : 8;
129 expand(fb, fb, new_size);
130 }
131 }
132 }
133
134
135 // convert input fragment to integer_t
136 if (temp.l && (temp.flags & CORRUPTIBLE)) {
137 MOV(AL, 0, temp.reg, reg_imm(temp.reg, LSR, temp.l));
138 temp.h -= temp.l;
139 temp.l = 0;
140 }
141 integer_t fragment(temp.reg, temp.size(), temp.flags);
142
143 // if not done yet, convert input fragment to integer_t
144 if (temp.l) {
145 // here we know temp is not CORRUPTIBLE
146 fragment.reg = scratches.obtain();
147 MOV(AL, 0, fragment.reg, reg_imm(temp.reg, LSR, temp.l));
148 fragment.flags |= CORRUPTIBLE;
149 }
150
151 if (!(temp.flags & CORRUPTIBLE)) {
152 // temp is not corruptible, but since it's the destination it
153 // will be modified, so we need to allocate a new register.
154 temp.reg = regs.obtain();
155 temp.flags &= ~CORRUPTIBLE;
156 fragment.flags &= ~CORRUPTIBLE;
157 }
158
159 if ((blending & BLEND_SRC) && !same_factor_opt1) {
160 // source (fragment) is needed for the blending stage
161 // so it's not CORRUPTIBLE (unless we're doing same_factor_opt1)
162 fragment.flags &= ~CORRUPTIBLE;
163 }
164
165
166 if (same_factor_opt1) {
167 // R = S*f + D*(1-f) = (S-D)*f + D
168 integer_t factor;
169 build_blend_factor(factor, fs,
170 component, pixel, fragment, fb, scratches);
171 // fb is always corruptible from this point
172 fb.flags |= CORRUPTIBLE;
173 build_blendFOneMinusF(temp, factor, fragment, fb);
174 } else if (same_factor_opt2) {
175 // R = S*(1-f) + D*f = (D-S)*f + S
176 integer_t factor;
177 // fb is always corrruptible here
178 fb.flags |= CORRUPTIBLE;
179 build_blend_factor(factor, fd,
180 component, pixel, fragment, fb, scratches);
181 build_blendOneMinusFF(temp, factor, fragment, fb);
182 } else {
183 integer_t src_factor;
184 integer_t dst_factor;
185
186 // if destination (fb) is not needed for the blending stage,
187 // then it can be marked as CORRUPTIBLE
188 if (!(blending & BLEND_DST)) {
189 fb.flags |= CORRUPTIBLE;
190 }
191
192 // XXX: try to mark some registers as CORRUPTIBLE
193 // in most case we could make those corruptible
194 // when we're processing the last component
195 // but not always, for instance
196 // when fragment is constant and not reloaded
197 // when fb is needed for logic-ops or masking
198 // when a register is aliased (for instance with mAlphaSource)
199
200 // blend away...
201 if (fs==GGL_ZERO) {
202 if (fd==GGL_ZERO) { // R = 0
203 // already taken care of
204 } else if (fd==GGL_ONE) { // R = D
205 // already taken care of
206 } else { // R = D*fd
207 // compute fd
208 build_blend_factor(dst_factor, fd,
209 component, pixel, fragment, fb, scratches);
210 mul_factor(temp, fb, dst_factor);
211 }
212 } else if (fs==GGL_ONE) {
213 if (fd==GGL_ZERO) { // R = S
214 // NOP, taken care of
215 } else if (fd==GGL_ONE) { // R = S + D
216 component_add(temp, fb, fragment); // args order matters
217 component_sat(temp);
218 } else { // R = S + D*fd
219 // compute fd
220 build_blend_factor(dst_factor, fd,
221 component, pixel, fragment, fb, scratches);
222 mul_factor_add(temp, fb, dst_factor, component_t(fragment));
Mathias Agopian665a2222009-08-07 13:01:46 -0700223 component_sat(temp);
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800224 }
225 } else {
226 // compute fs
227 build_blend_factor(src_factor, fs,
228 component, pixel, fragment, fb, scratches);
229 if (fd==GGL_ZERO) { // R = S*fs
230 mul_factor(temp, fragment, src_factor);
231 } else if (fd==GGL_ONE) { // R = S*fs + D
232 mul_factor_add(temp, fragment, src_factor, component_t(fb));
233 component_sat(temp);
234 } else { // R = S*fs + D*fd
235 mul_factor(temp, fragment, src_factor);
236 if (scratches.isUsed(src_factor.reg))
237 scratches.recycle(src_factor.reg);
238 // compute fd
239 build_blend_factor(dst_factor, fd,
240 component, pixel, fragment, fb, scratches);
241 mul_factor_add(temp, fb, dst_factor, temp);
242 if (!same_factor_opt1 && !same_factor_opt2) {
243 component_sat(temp);
244 }
245 }
246 }
247 }
248
249 // now we can be corrupted (it's the dest)
250 temp.flags |= CORRUPTIBLE;
251}
252
253void GGLAssembler::build_blend_factor(
254 integer_t& factor, int f, int component,
255 const pixel_t& dst_pixel,
256 integer_t& fragment,
257 integer_t& fb,
258 Scratch& scratches)
259{
260 integer_t src_alpha(fragment);
261
262 // src_factor/dst_factor won't be used after blending,
263 // so it's fine to mark them as CORRUPTIBLE (if not aliased)
264 factor.flags |= CORRUPTIBLE;
265
266 switch(f) {
267 case GGL_ONE_MINUS_SRC_ALPHA:
268 case GGL_SRC_ALPHA:
269 if (component==GGLFormat::ALPHA && !isAlphaSourceNeeded()) {
270 // we're processing alpha, so we already have
271 // src-alpha in fragment, and we need src-alpha just this time.
272 } else {
273 // alpha-src will be needed for other components
274 if (!mBlendFactorCached || mBlendFactorCached==f) {
275 src_alpha = mAlphaSource;
276 factor = mAlphaSource;
277 factor.flags &= ~CORRUPTIBLE;
278 // we already computed the blend factor before, nothing to do.
279 if (mBlendFactorCached)
280 return;
281 // this is the first time, make sure to compute the blend
282 // factor properly.
283 mBlendFactorCached = f;
284 break;
285 } else {
286 // we have a cached alpha blend factor, but we want another one,
287 // this should really not happen because by construction,
288 // we cannot have BOTH source and destination
289 // blend factors use ALPHA *and* ONE_MINUS_ALPHA (because
290 // the blending stage uses the f/(1-f) optimization
291
292 // for completeness, we handle this case though. Since there
293 // are only 2 choices, this meens we want "the other one"
294 // (1-factor)
295 factor = mAlphaSource;
296 factor.flags &= ~CORRUPTIBLE;
297 RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
298 mBlendFactorCached = f;
299 return;
300 }
301 }
302 // fall-through...
303 case GGL_ONE_MINUS_DST_COLOR:
304 case GGL_DST_COLOR:
305 case GGL_ONE_MINUS_SRC_COLOR:
306 case GGL_SRC_COLOR:
307 case GGL_ONE_MINUS_DST_ALPHA:
308 case GGL_DST_ALPHA:
309 case GGL_SRC_ALPHA_SATURATE:
310 // help us find out what register we can use for the blend-factor
311 // CORRUPTIBLE registers are chosen first, or a new one is allocated.
312 if (fragment.flags & CORRUPTIBLE) {
313 factor.setTo(fragment.reg, 32, CORRUPTIBLE);
314 fragment.flags &= ~CORRUPTIBLE;
315 } else if (fb.flags & CORRUPTIBLE) {
316 factor.setTo(fb.reg, 32, CORRUPTIBLE);
317 fb.flags &= ~CORRUPTIBLE;
318 } else {
319 factor.setTo(scratches.obtain(), 32, CORRUPTIBLE);
320 }
321 break;
322 }
323
324 // XXX: doesn't work if size==1
325
326 switch(f) {
327 case GGL_ONE_MINUS_DST_COLOR:
328 case GGL_DST_COLOR:
329 factor.s = fb.s;
330 ADD(AL, 0, factor.reg, fb.reg, reg_imm(fb.reg, LSR, fb.s-1));
331 break;
332 case GGL_ONE_MINUS_SRC_COLOR:
333 case GGL_SRC_COLOR:
334 factor.s = fragment.s;
335 ADD(AL, 0, factor.reg, fragment.reg,
336 reg_imm(fragment.reg, LSR, fragment.s-1));
337 break;
338 case GGL_ONE_MINUS_SRC_ALPHA:
339 case GGL_SRC_ALPHA:
340 factor.s = src_alpha.s;
341 ADD(AL, 0, factor.reg, src_alpha.reg,
342 reg_imm(src_alpha.reg, LSR, src_alpha.s-1));
343 break;
344 case GGL_ONE_MINUS_DST_ALPHA:
345 case GGL_DST_ALPHA:
346 // XXX: should be precomputed
347 extract(factor, dst_pixel, GGLFormat::ALPHA);
348 ADD(AL, 0, factor.reg, factor.reg,
349 reg_imm(factor.reg, LSR, factor.s-1));
350 break;
351 case GGL_SRC_ALPHA_SATURATE:
352 // XXX: should be precomputed
353 // XXX: f = min(As, 1-Ad)
354 // btw, we're guaranteed that Ad's size is <= 8, because
355 // it's extracted from the framebuffer
356 break;
357 }
358
359 switch(f) {
360 case GGL_ONE_MINUS_DST_COLOR:
361 case GGL_ONE_MINUS_SRC_COLOR:
362 case GGL_ONE_MINUS_DST_ALPHA:
363 case GGL_ONE_MINUS_SRC_ALPHA:
364 RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
365 }
366
367 // don't need more than 8-bits for the blend factor
368 // and this will prevent overflows in the multiplies later
369 if (factor.s > 8) {
370 MOV(AL, 0, factor.reg, reg_imm(factor.reg, LSR, factor.s-8));
371 factor.s = 8;
372 }
373}
374
375int GGLAssembler::blending_codes(int fs, int fd)
376{
377 int blending = 0;
378 switch(fs) {
379 case GGL_ONE:
380 blending |= BLEND_SRC;
381 break;
382
383 case GGL_ONE_MINUS_DST_COLOR:
384 case GGL_DST_COLOR:
385 blending |= FACTOR_DST|BLEND_SRC;
386 break;
387 case GGL_ONE_MINUS_DST_ALPHA:
388 case GGL_DST_ALPHA:
389 // no need to extract 'component' from the destination
390 // for the blend factor, because we need ALPHA only.
391 blending |= BLEND_SRC;
392 break;
393
394 case GGL_ONE_MINUS_SRC_COLOR:
395 case GGL_SRC_COLOR:
396 blending |= FACTOR_SRC|BLEND_SRC;
397 break;
398 case GGL_ONE_MINUS_SRC_ALPHA:
399 case GGL_SRC_ALPHA:
400 case GGL_SRC_ALPHA_SATURATE:
401 blending |= FACTOR_SRC|BLEND_SRC;
402 break;
403 }
404 switch(fd) {
405 case GGL_ONE:
406 blending |= BLEND_DST;
407 break;
408
409 case GGL_ONE_MINUS_DST_COLOR:
410 case GGL_DST_COLOR:
411 blending |= FACTOR_DST|BLEND_DST;
412 break;
413 case GGL_ONE_MINUS_DST_ALPHA:
414 case GGL_DST_ALPHA:
415 blending |= FACTOR_DST|BLEND_DST;
416 break;
417
418 case GGL_ONE_MINUS_SRC_COLOR:
419 case GGL_SRC_COLOR:
420 blending |= FACTOR_SRC|BLEND_DST;
421 break;
422 case GGL_ONE_MINUS_SRC_ALPHA:
423 case GGL_SRC_ALPHA:
424 // no need to extract 'component' from the source
425 // for the blend factor, because we need ALPHA only.
426 blending |= BLEND_DST;
427 break;
428 }
429 return blending;
430}
431
432// ---------------------------------------------------------------------------
433
434void GGLAssembler::build_blendFOneMinusF(
435 component_t& temp,
436 const integer_t& factor,
437 const integer_t& fragment,
438 const integer_t& fb)
439{
440 // R = S*f + D*(1-f) = (S-D)*f + D
441 Scratch scratches(registerFile());
442 // compute S-D
443 integer_t diff(fragment.flags & CORRUPTIBLE ?
444 fragment.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
445 const int shift = fragment.size() - fb.size();
446 if (shift>0) RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
447 else if (shift<0) RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
448 else RSB(AL, 0, diff.reg, fb.reg, fragment.reg);
449 mul_factor_add(temp, diff, factor, component_t(fb));
450}
451
452void GGLAssembler::build_blendOneMinusFF(
453 component_t& temp,
454 const integer_t& factor,
455 const integer_t& fragment,
456 const integer_t& fb)
457{
458 // R = S*f + D*(1-f) = (S-D)*f + D
459 Scratch scratches(registerFile());
460 // compute D-S
461 integer_t diff(fb.flags & CORRUPTIBLE ?
462 fb.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
463 const int shift = fragment.size() - fb.size();
464 if (shift>0) SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
465 else if (shift<0) SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
466 else SUB(AL, 0, diff.reg, fb.reg, fragment.reg);
467 mul_factor_add(temp, diff, factor, component_t(fragment));
468}
469
470// ---------------------------------------------------------------------------
471
472void GGLAssembler::mul_factor( component_t& d,
473 const integer_t& v,
474 const integer_t& f)
475{
476 int vs = v.size();
477 int fs = f.size();
478 int ms = vs+fs;
479
480 // XXX: we could have special cases for 1 bit mul
481
482 // all this code below to use the best multiply instruction
483 // wrt the parameters size. We take advantage of the fact
484 // that the 16-bits multiplies allow a 16-bit shift
485 // The trick is that we just make sure that we have at least 8-bits
486 // per component (which is enough for a 8 bits display).
487
488 int xy;
489 int vshift = 0;
490 int fshift = 0;
491 int smulw = 0;
492
493 if (vs<16) {
494 if (fs<16) {
495 xy = xyBB;
496 } else if (GGL_BETWEEN(fs, 24, 31)) {
497 ms -= 16;
498 xy = xyTB;
499 } else {
500 // eg: 15 * 18 -> 15 * 15
501 fshift = fs - 15;
502 ms -= fshift;
503 xy = xyBB;
504 }
505 } else if (GGL_BETWEEN(vs, 24, 31)) {
506 if (fs<16) {
507 ms -= 16;
508 xy = xyTB;
509 } else if (GGL_BETWEEN(fs, 24, 31)) {
510 ms -= 32;
511 xy = xyTT;
512 } else {
513 // eg: 24 * 18 -> 8 * 18
514 fshift = fs - 15;
515 ms -= 16 + fshift;
516 xy = xyTB;
517 }
518 } else {
519 if (fs<16) {
520 // eg: 18 * 15 -> 15 * 15
521 vshift = vs - 15;
522 ms -= vshift;
523 xy = xyBB;
524 } else if (GGL_BETWEEN(fs, 24, 31)) {
525 // eg: 18 * 24 -> 15 * 8
526 vshift = vs - 15;
527 ms -= 16 + vshift;
528 xy = xyBT;
529 } else {
530 // eg: 18 * 18 -> (15 * 18)>>16
531 fshift = fs - 15;
532 ms -= 16 + fshift;
533 xy = yB; //XXX SMULWB
534 smulw = 1;
535 }
536 }
537
Steve Block01dda202012-01-06 14:13:42 +0000538 ALOGE_IF(ms>=32, "mul_factor overflow vs=%d, fs=%d", vs, fs);
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800539
540 int vreg = v.reg;
541 int freg = f.reg;
542 if (vshift) {
543 MOV(AL, 0, d.reg, reg_imm(vreg, LSR, vshift));
544 vreg = d.reg;
545 }
546 if (fshift) {
547 MOV(AL, 0, d.reg, reg_imm(vreg, LSR, fshift));
548 freg = d.reg;
549 }
550 if (smulw) SMULW(AL, xy, d.reg, vreg, freg);
551 else SMUL(AL, xy, d.reg, vreg, freg);
552
553
554 d.h = ms;
555 if (mDithering) {
556 d.l = 0;
557 } else {
558 d.l = fs;
559 d.flags |= CLEAR_LO;
560 }
561}
562
563void GGLAssembler::mul_factor_add( component_t& d,
564 const integer_t& v,
565 const integer_t& f,
566 const component_t& a)
567{
568 // XXX: we could have special cases for 1 bit mul
569 Scratch scratches(registerFile());
570
571 int vs = v.size();
572 int fs = f.size();
573 int as = a.h;
574 int ms = vs+fs;
575
Steve Block01dda202012-01-06 14:13:42 +0000576 ALOGE_IF(ms>=32, "mul_factor_add overflow vs=%d, fs=%d, as=%d", vs, fs, as);
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800577
578 integer_t add(a.reg, a.h, a.flags);
579
580 // 'a' is a component_t but it is guaranteed to have
581 // its high bits set to 0. However in the dithering case,
582 // we can't get away with truncating the potentially bad bits
583 // so extraction is needed.
584
585 if ((mDithering) && (a.size() < ms)) {
586 // we need to expand a
587 if (!(a.flags & CORRUPTIBLE)) {
588 // ... but it's not corruptible, so we need to pick a
589 // temporary register.
590 // Try to uses the destination register first (it's likely
591 // to be usable, unless it aliases an input).
592 if (d.reg!=a.reg && d.reg!=v.reg && d.reg!=f.reg) {
593 add.reg = d.reg;
594 } else {
595 add.reg = scratches.obtain();
596 }
597 }
598 expand(add, a, ms); // extracts and expands
599 as = ms;
600 }
601
602 if (ms == as) {
603 if (vs<16 && fs<16) SMLABB(AL, d.reg, v.reg, f.reg, add.reg);
604 else MLA(AL, 0, d.reg, v.reg, f.reg, add.reg);
605 } else {
606 int temp = d.reg;
607 if (temp == add.reg) {
608 // the mul will modify add.reg, we need an intermediary reg
609 if (v.flags & CORRUPTIBLE) temp = v.reg;
610 else if (f.flags & CORRUPTIBLE) temp = f.reg;
611 else temp = scratches.obtain();
612 }
613
614 if (vs<16 && fs<16) SMULBB(AL, temp, v.reg, f.reg);
615 else MUL(AL, 0, temp, v.reg, f.reg);
616
617 if (ms>as) {
618 ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSL, ms-as));
619 } else if (ms<as) {
620 // not sure if we should expand the mul instead?
621 ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSR, as-ms));
622 }
623 }
624
625 d.h = ms;
626 if (mDithering) {
627 d.l = a.l;
628 } else {
629 d.l = fs>a.l ? fs : a.l;
630 d.flags |= CLEAR_LO;
631 }
632}
633
634void GGLAssembler::component_add(component_t& d,
635 const integer_t& dst, const integer_t& src)
636{
637 // here we're guaranteed that fragment.size() >= fb.size()
638 const int shift = src.size() - dst.size();
639 if (!shift) {
640 ADD(AL, 0, d.reg, src.reg, dst.reg);
641 } else {
642 ADD(AL, 0, d.reg, src.reg, reg_imm(dst.reg, LSL, shift));
643 }
644
645 d.h = src.size();
646 if (mDithering) {
647 d.l = 0;
648 } else {
649 d.l = shift;
650 d.flags |= CLEAR_LO;
651 }
652}
653
654void GGLAssembler::component_sat(const component_t& v)
655{
656 const int one = ((1<<v.size())-1)<<v.l;
657 CMP(AL, v.reg, imm( 1<<v.h ));
658 if (isValidImmediate(one)) {
659 MOV(HS, 0, v.reg, imm( one ));
660 } else if (isValidImmediate(~one)) {
661 MVN(HS, 0, v.reg, imm( ~one ));
662 } else {
663 MOV(HS, 0, v.reg, imm( 1<<v.h ));
664 SUB(HS, 0, v.reg, v.reg, imm( 1<<v.l ));
665 }
666}
667
668// ----------------------------------------------------------------------------
669
670}; // namespace android
671