blob: f10217b74a7a7b38b08298cfd595813411eedac5 [file] [log] [blame]
The Android Open Source Project4f6e8d72008-10-21 07:00:00 -07001/* libs/pixelflinger/codeflinger/blending.cpp
2**
3** Copyright 2006, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9** http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <assert.h>
19#include <stdint.h>
20#include <stdlib.h>
21#include <stdio.h>
22#include <sys/types.h>
23
24#include <cutils/log.h>
25
26#include "codeflinger/GGLAssembler.h"
27
28
29namespace android {
30
31void GGLAssembler::build_fog(
32 component_t& temp, // incomming fragment / output
33 int component,
34 Scratch& regs)
35{
36 if (mInfo[component].fog) {
37 Scratch scratches(registerFile());
38 comment("fog");
39
40 integer_t fragment(temp.reg, temp.h, temp.flags);
41 if (!(temp.flags & CORRUPTIBLE)) {
42 temp.reg = regs.obtain();
43 temp.flags |= CORRUPTIBLE;
44 }
45
46 integer_t fogColor(scratches.obtain(), 8, CORRUPTIBLE);
47 LDRB(AL, fogColor.reg, mBuilderContext.Rctx,
48 immed12_pre(GGL_OFFSETOF(state.fog.color[component])));
49
50 integer_t factor(scratches.obtain(), 16, CORRUPTIBLE);
51 CONTEXT_LOAD(factor.reg, generated_vars.f);
52
The Android Open Source Project35237d12008-12-17 18:08:08 -080053 // clamp fog factor (TODO: see if there is a way to guarantee
54 // we won't overflow, when setting the iterators)
55 BIC(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, ASR, 31));
56 CMP(AL, factor.reg, imm( 0x10000 ));
57 MOV(HS, 0, factor.reg, imm( 0x10000 ));
58
The Android Open Source Project4f6e8d72008-10-21 07:00:00 -070059 build_blendFOneMinusF(temp, factor, fragment, fogColor);
60 }
61}
62
63void GGLAssembler::build_blending(
64 component_t& temp, // incomming fragment / output
65 const pixel_t& pixel, // framebuffer
66 int component,
67 Scratch& regs)
68{
69 if (!mInfo[component].blend)
70 return;
71
72 int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
73 int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
74 if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA)
75 fs = GGL_ONE;
76 const int blending = blending_codes(fs, fd);
77 if (!temp.size()) {
78 // here, blending will produce something which doesn't depend on
79 // that component (eg: GL_ZERO:GL_*), so the register has not been
80 // allocated yet. Will never be used as a source.
81 temp = component_t(regs.obtain(), CORRUPTIBLE);
82 }
83
84 // we are doing real blending...
85 // fb: extracted dst
86 // fragment: extracted src
87 // temp: component_t(fragment) and result
88
89 // scoped register allocator
90 Scratch scratches(registerFile());
91 comment("blending");
92
93 // we can optimize these cases a bit...
94 // (1) saturation is not needed
95 // (2) we can use only one multiply instead of 2
96 // (3) we can reduce the register pressure
97 // R = S*f + D*(1-f) = (S-D)*f + D
98 // R = S*(1-f) + D*f = (D-S)*f + S
99
100 const bool same_factor_opt1 =
101 (fs==GGL_DST_COLOR && fd==GGL_ONE_MINUS_DST_COLOR) ||
102 (fs==GGL_SRC_COLOR && fd==GGL_ONE_MINUS_SRC_COLOR) ||
103 (fs==GGL_DST_ALPHA && fd==GGL_ONE_MINUS_DST_ALPHA) ||
104 (fs==GGL_SRC_ALPHA && fd==GGL_ONE_MINUS_SRC_ALPHA);
105
106 const bool same_factor_opt2 =
107 (fs==GGL_ONE_MINUS_DST_COLOR && fd==GGL_DST_COLOR) ||
108 (fs==GGL_ONE_MINUS_SRC_COLOR && fd==GGL_SRC_COLOR) ||
109 (fs==GGL_ONE_MINUS_DST_ALPHA && fd==GGL_DST_ALPHA) ||
110 (fs==GGL_ONE_MINUS_SRC_ALPHA && fd==GGL_SRC_ALPHA);
111
112
113 // XXX: we could also optimize these cases:
114 // R = S*f + D*f = (S+D)*f
115 // R = S*(1-f) + D*(1-f) = (S+D)*(1-f)
116 // R = S*D + D*S = 2*S*D
117
118
119 // see if we need to extract 'component' from the destination (fb)
120 integer_t fb;
121 if (blending & (BLEND_DST|FACTOR_DST)) {
122 fb.setTo(scratches.obtain(), 32);
123 extract(fb, pixel, component);
124 if (mDithering) {
125 // XXX: maybe what we should do instead, is simply
126 // expand fb -or- fragment to the larger of the two
127 if (fb.size() < temp.size()) {
128 // for now we expand 'fb' to min(fragment, 8)
129 int new_size = temp.size() < 8 ? temp.size() : 8;
130 expand(fb, fb, new_size);
131 }
132 }
133 }
134
135
136 // convert input fragment to integer_t
137 if (temp.l && (temp.flags & CORRUPTIBLE)) {
138 MOV(AL, 0, temp.reg, reg_imm(temp.reg, LSR, temp.l));
139 temp.h -= temp.l;
140 temp.l = 0;
141 }
142 integer_t fragment(temp.reg, temp.size(), temp.flags);
143
144 // if not done yet, convert input fragment to integer_t
145 if (temp.l) {
146 // here we know temp is not CORRUPTIBLE
147 fragment.reg = scratches.obtain();
148 MOV(AL, 0, fragment.reg, reg_imm(temp.reg, LSR, temp.l));
149 fragment.flags |= CORRUPTIBLE;
150 }
151
152 if (!(temp.flags & CORRUPTIBLE)) {
153 // temp is not corruptible, but since it's the destination it
154 // will be modified, so we need to allocate a new register.
155 temp.reg = regs.obtain();
156 temp.flags &= ~CORRUPTIBLE;
157 fragment.flags &= ~CORRUPTIBLE;
158 }
159
160 if ((blending & BLEND_SRC) && !same_factor_opt1) {
161 // source (fragment) is needed for the blending stage
162 // so it's not CORRUPTIBLE (unless we're doing same_factor_opt1)
163 fragment.flags &= ~CORRUPTIBLE;
164 }
165
166
167 if (same_factor_opt1) {
168 // R = S*f + D*(1-f) = (S-D)*f + D
169 integer_t factor;
170 build_blend_factor(factor, fs,
171 component, pixel, fragment, fb, scratches);
172 // fb is always corruptible from this point
173 fb.flags |= CORRUPTIBLE;
174 build_blendFOneMinusF(temp, factor, fragment, fb);
175 } else if (same_factor_opt2) {
176 // R = S*(1-f) + D*f = (D-S)*f + S
177 integer_t factor;
178 // fb is always corrruptible here
179 fb.flags |= CORRUPTIBLE;
180 build_blend_factor(factor, fd,
181 component, pixel, fragment, fb, scratches);
182 build_blendOneMinusFF(temp, factor, fragment, fb);
183 } else {
184 integer_t src_factor;
185 integer_t dst_factor;
186
187 // if destination (fb) is not needed for the blending stage,
188 // then it can be marked as CORRUPTIBLE
189 if (!(blending & BLEND_DST)) {
190 fb.flags |= CORRUPTIBLE;
191 }
192
193 // XXX: try to mark some registers as CORRUPTIBLE
194 // in most case we could make those corruptible
195 // when we're processing the last component
196 // but not always, for instance
197 // when fragment is constant and not reloaded
198 // when fb is needed for logic-ops or masking
199 // when a register is aliased (for instance with mAlphaSource)
200
201 // blend away...
202 if (fs==GGL_ZERO) {
203 if (fd==GGL_ZERO) { // R = 0
204 // already taken care of
205 } else if (fd==GGL_ONE) { // R = D
206 // already taken care of
207 } else { // R = D*fd
208 // compute fd
209 build_blend_factor(dst_factor, fd,
210 component, pixel, fragment, fb, scratches);
211 mul_factor(temp, fb, dst_factor);
212 }
213 } else if (fs==GGL_ONE) {
214 if (fd==GGL_ZERO) { // R = S
215 // NOP, taken care of
216 } else if (fd==GGL_ONE) { // R = S + D
217 component_add(temp, fb, fragment); // args order matters
218 component_sat(temp);
219 } else { // R = S + D*fd
220 // compute fd
221 build_blend_factor(dst_factor, fd,
222 component, pixel, fragment, fb, scratches);
223 mul_factor_add(temp, fb, dst_factor, component_t(fragment));
224 if (fd==GGL_ONE_MINUS_SRC_ALPHA) {
225 // XXX: in theory this is not correct, we should
226 // saturate here. However, this mode is often
227 // used for displaying alpha-premultiplied graphics,
228 // in which case, saturation is not necessary.
229 // unfortunatelly, we have no way to know.
230 // This is a case, where we sacrifice correctness for
231 // performance. we should probably have some heuristics.
232 } else {
233 component_sat(temp);
234 }
235 }
236 } else {
237 // compute fs
238 build_blend_factor(src_factor, fs,
239 component, pixel, fragment, fb, scratches);
240 if (fd==GGL_ZERO) { // R = S*fs
241 mul_factor(temp, fragment, src_factor);
242 } else if (fd==GGL_ONE) { // R = S*fs + D
243 mul_factor_add(temp, fragment, src_factor, component_t(fb));
244 component_sat(temp);
245 } else { // R = S*fs + D*fd
246 mul_factor(temp, fragment, src_factor);
247 if (scratches.isUsed(src_factor.reg))
248 scratches.recycle(src_factor.reg);
249 // compute fd
250 build_blend_factor(dst_factor, fd,
251 component, pixel, fragment, fb, scratches);
252 mul_factor_add(temp, fb, dst_factor, temp);
253 if (!same_factor_opt1 && !same_factor_opt2) {
254 component_sat(temp);
255 }
256 }
257 }
258 }
259
260 // now we can be corrupted (it's the dest)
261 temp.flags |= CORRUPTIBLE;
262}
263
264void GGLAssembler::build_blend_factor(
265 integer_t& factor, int f, int component,
266 const pixel_t& dst_pixel,
267 integer_t& fragment,
268 integer_t& fb,
269 Scratch& scratches)
270{
271 integer_t src_alpha(fragment);
272
273 // src_factor/dst_factor won't be used after blending,
274 // so it's fine to mark them as CORRUPTIBLE (if not aliased)
275 factor.flags |= CORRUPTIBLE;
276
277 switch(f) {
278 case GGL_ONE_MINUS_SRC_ALPHA:
279 case GGL_SRC_ALPHA:
280 if (component==GGLFormat::ALPHA && !isAlphaSourceNeeded()) {
281 // we're processing alpha, so we already have
282 // src-alpha in fragment, and we need src-alpha just this time.
283 } else {
284 // alpha-src will be needed for other components
285 if (!mBlendFactorCached || mBlendFactorCached==f) {
286 src_alpha = mAlphaSource;
287 factor = mAlphaSource;
288 factor.flags &= ~CORRUPTIBLE;
289 // we already computed the blend factor before, nothing to do.
290 if (mBlendFactorCached)
291 return;
292 // this is the first time, make sure to compute the blend
293 // factor properly.
294 mBlendFactorCached = f;
295 break;
296 } else {
297 // we have a cached alpha blend factor, but we want another one,
298 // this should really not happen because by construction,
299 // we cannot have BOTH source and destination
300 // blend factors use ALPHA *and* ONE_MINUS_ALPHA (because
301 // the blending stage uses the f/(1-f) optimization
302
303 // for completeness, we handle this case though. Since there
304 // are only 2 choices, this meens we want "the other one"
305 // (1-factor)
306 factor = mAlphaSource;
307 factor.flags &= ~CORRUPTIBLE;
308 RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
309 mBlendFactorCached = f;
310 return;
311 }
312 }
313 // fall-through...
314 case GGL_ONE_MINUS_DST_COLOR:
315 case GGL_DST_COLOR:
316 case GGL_ONE_MINUS_SRC_COLOR:
317 case GGL_SRC_COLOR:
318 case GGL_ONE_MINUS_DST_ALPHA:
319 case GGL_DST_ALPHA:
320 case GGL_SRC_ALPHA_SATURATE:
321 // help us find out what register we can use for the blend-factor
322 // CORRUPTIBLE registers are chosen first, or a new one is allocated.
323 if (fragment.flags & CORRUPTIBLE) {
324 factor.setTo(fragment.reg, 32, CORRUPTIBLE);
325 fragment.flags &= ~CORRUPTIBLE;
326 } else if (fb.flags & CORRUPTIBLE) {
327 factor.setTo(fb.reg, 32, CORRUPTIBLE);
328 fb.flags &= ~CORRUPTIBLE;
329 } else {
330 factor.setTo(scratches.obtain(), 32, CORRUPTIBLE);
331 }
332 break;
333 }
334
335 // XXX: doesn't work if size==1
336
337 switch(f) {
338 case GGL_ONE_MINUS_DST_COLOR:
339 case GGL_DST_COLOR:
340 factor.s = fb.s;
341 ADD(AL, 0, factor.reg, fb.reg, reg_imm(fb.reg, LSR, fb.s-1));
342 break;
343 case GGL_ONE_MINUS_SRC_COLOR:
344 case GGL_SRC_COLOR:
345 factor.s = fragment.s;
346 ADD(AL, 0, factor.reg, fragment.reg,
347 reg_imm(fragment.reg, LSR, fragment.s-1));
348 break;
349 case GGL_ONE_MINUS_SRC_ALPHA:
350 case GGL_SRC_ALPHA:
351 factor.s = src_alpha.s;
352 ADD(AL, 0, factor.reg, src_alpha.reg,
353 reg_imm(src_alpha.reg, LSR, src_alpha.s-1));
354 break;
355 case GGL_ONE_MINUS_DST_ALPHA:
356 case GGL_DST_ALPHA:
357 // XXX: should be precomputed
358 extract(factor, dst_pixel, GGLFormat::ALPHA);
359 ADD(AL, 0, factor.reg, factor.reg,
360 reg_imm(factor.reg, LSR, factor.s-1));
361 break;
362 case GGL_SRC_ALPHA_SATURATE:
363 // XXX: should be precomputed
364 // XXX: f = min(As, 1-Ad)
365 // btw, we're guaranteed that Ad's size is <= 8, because
366 // it's extracted from the framebuffer
367 break;
368 }
369
370 switch(f) {
371 case GGL_ONE_MINUS_DST_COLOR:
372 case GGL_ONE_MINUS_SRC_COLOR:
373 case GGL_ONE_MINUS_DST_ALPHA:
374 case GGL_ONE_MINUS_SRC_ALPHA:
375 RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
376 }
377
378 // don't need more than 8-bits for the blend factor
379 // and this will prevent overflows in the multiplies later
380 if (factor.s > 8) {
381 MOV(AL, 0, factor.reg, reg_imm(factor.reg, LSR, factor.s-8));
382 factor.s = 8;
383 }
384}
385
386int GGLAssembler::blending_codes(int fs, int fd)
387{
388 int blending = 0;
389 switch(fs) {
390 case GGL_ONE:
391 blending |= BLEND_SRC;
392 break;
393
394 case GGL_ONE_MINUS_DST_COLOR:
395 case GGL_DST_COLOR:
396 blending |= FACTOR_DST|BLEND_SRC;
397 break;
398 case GGL_ONE_MINUS_DST_ALPHA:
399 case GGL_DST_ALPHA:
400 // no need to extract 'component' from the destination
401 // for the blend factor, because we need ALPHA only.
402 blending |= BLEND_SRC;
403 break;
404
405 case GGL_ONE_MINUS_SRC_COLOR:
406 case GGL_SRC_COLOR:
407 blending |= FACTOR_SRC|BLEND_SRC;
408 break;
409 case GGL_ONE_MINUS_SRC_ALPHA:
410 case GGL_SRC_ALPHA:
411 case GGL_SRC_ALPHA_SATURATE:
412 blending |= FACTOR_SRC|BLEND_SRC;
413 break;
414 }
415 switch(fd) {
416 case GGL_ONE:
417 blending |= BLEND_DST;
418 break;
419
420 case GGL_ONE_MINUS_DST_COLOR:
421 case GGL_DST_COLOR:
422 blending |= FACTOR_DST|BLEND_DST;
423 break;
424 case GGL_ONE_MINUS_DST_ALPHA:
425 case GGL_DST_ALPHA:
426 blending |= FACTOR_DST|BLEND_DST;
427 break;
428
429 case GGL_ONE_MINUS_SRC_COLOR:
430 case GGL_SRC_COLOR:
431 blending |= FACTOR_SRC|BLEND_DST;
432 break;
433 case GGL_ONE_MINUS_SRC_ALPHA:
434 case GGL_SRC_ALPHA:
435 // no need to extract 'component' from the source
436 // for the blend factor, because we need ALPHA only.
437 blending |= BLEND_DST;
438 break;
439 }
440 return blending;
441}
442
443// ---------------------------------------------------------------------------
444
445void GGLAssembler::build_blendFOneMinusF(
446 component_t& temp,
447 const integer_t& factor,
448 const integer_t& fragment,
449 const integer_t& fb)
450{
451 // R = S*f + D*(1-f) = (S-D)*f + D
452 Scratch scratches(registerFile());
453 // compute S-D
454 integer_t diff(fragment.flags & CORRUPTIBLE ?
455 fragment.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
456 const int shift = fragment.size() - fb.size();
457 if (shift>0) RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
458 else if (shift<0) RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
459 else RSB(AL, 0, diff.reg, fb.reg, fragment.reg);
460 mul_factor_add(temp, diff, factor, component_t(fb));
461}
462
463void GGLAssembler::build_blendOneMinusFF(
464 component_t& temp,
465 const integer_t& factor,
466 const integer_t& fragment,
467 const integer_t& fb)
468{
469 // R = S*f + D*(1-f) = (S-D)*f + D
470 Scratch scratches(registerFile());
471 // compute D-S
472 integer_t diff(fb.flags & CORRUPTIBLE ?
473 fb.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
474 const int shift = fragment.size() - fb.size();
475 if (shift>0) SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
476 else if (shift<0) SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
477 else SUB(AL, 0, diff.reg, fb.reg, fragment.reg);
478 mul_factor_add(temp, diff, factor, component_t(fragment));
479}
480
481// ---------------------------------------------------------------------------
482
483void GGLAssembler::mul_factor( component_t& d,
484 const integer_t& v,
485 const integer_t& f)
486{
487 int vs = v.size();
488 int fs = f.size();
489 int ms = vs+fs;
490
491 // XXX: we could have special cases for 1 bit mul
492
493 // all this code below to use the best multiply instruction
494 // wrt the parameters size. We take advantage of the fact
495 // that the 16-bits multiplies allow a 16-bit shift
496 // The trick is that we just make sure that we have at least 8-bits
497 // per component (which is enough for a 8 bits display).
498
499 int xy;
500 int vshift = 0;
501 int fshift = 0;
502 int smulw = 0;
503
504 if (vs<16) {
505 if (fs<16) {
506 xy = xyBB;
507 } else if (GGL_BETWEEN(fs, 24, 31)) {
508 ms -= 16;
509 xy = xyTB;
510 } else {
511 // eg: 15 * 18 -> 15 * 15
512 fshift = fs - 15;
513 ms -= fshift;
514 xy = xyBB;
515 }
516 } else if (GGL_BETWEEN(vs, 24, 31)) {
517 if (fs<16) {
518 ms -= 16;
519 xy = xyTB;
520 } else if (GGL_BETWEEN(fs, 24, 31)) {
521 ms -= 32;
522 xy = xyTT;
523 } else {
524 // eg: 24 * 18 -> 8 * 18
525 fshift = fs - 15;
526 ms -= 16 + fshift;
527 xy = xyTB;
528 }
529 } else {
530 if (fs<16) {
531 // eg: 18 * 15 -> 15 * 15
532 vshift = vs - 15;
533 ms -= vshift;
534 xy = xyBB;
535 } else if (GGL_BETWEEN(fs, 24, 31)) {
536 // eg: 18 * 24 -> 15 * 8
537 vshift = vs - 15;
538 ms -= 16 + vshift;
539 xy = xyBT;
540 } else {
541 // eg: 18 * 18 -> (15 * 18)>>16
542 fshift = fs - 15;
543 ms -= 16 + fshift;
544 xy = yB; //XXX SMULWB
545 smulw = 1;
546 }
547 }
548
549 LOGE_IF(ms>=32, "mul_factor overflow vs=%d, fs=%d", vs, fs);
550
551 int vreg = v.reg;
552 int freg = f.reg;
553 if (vshift) {
554 MOV(AL, 0, d.reg, reg_imm(vreg, LSR, vshift));
555 vreg = d.reg;
556 }
557 if (fshift) {
558 MOV(AL, 0, d.reg, reg_imm(vreg, LSR, fshift));
559 freg = d.reg;
560 }
561 if (smulw) SMULW(AL, xy, d.reg, vreg, freg);
562 else SMUL(AL, xy, d.reg, vreg, freg);
563
564
565 d.h = ms;
566 if (mDithering) {
567 d.l = 0;
568 } else {
569 d.l = fs;
570 d.flags |= CLEAR_LO;
571 }
572}
573
574void GGLAssembler::mul_factor_add( component_t& d,
575 const integer_t& v,
576 const integer_t& f,
577 const component_t& a)
578{
579 // XXX: we could have special cases for 1 bit mul
580 Scratch scratches(registerFile());
581
582 int vs = v.size();
583 int fs = f.size();
584 int as = a.h;
585 int ms = vs+fs;
586
587 LOGE_IF(ms>=32, "mul_factor_add overflow vs=%d, fs=%d, as=%d", vs, fs, as);
588
589 integer_t add(a.reg, a.h, a.flags);
590
591 // 'a' is a component_t but it is guaranteed to have
592 // its high bits set to 0. However in the dithering case,
593 // we can't get away with truncating the potentially bad bits
594 // so extraction is needed.
595
596 if ((mDithering) && (a.size() < ms)) {
597 // we need to expand a
598 if (!(a.flags & CORRUPTIBLE)) {
599 // ... but it's not corruptible, so we need to pick a
600 // temporary register.
601 // Try to uses the destination register first (it's likely
602 // to be usable, unless it aliases an input).
603 if (d.reg!=a.reg && d.reg!=v.reg && d.reg!=f.reg) {
604 add.reg = d.reg;
605 } else {
606 add.reg = scratches.obtain();
607 }
608 }
609 expand(add, a, ms); // extracts and expands
610 as = ms;
611 }
612
613 if (ms == as) {
614 if (vs<16 && fs<16) SMLABB(AL, d.reg, v.reg, f.reg, add.reg);
615 else MLA(AL, 0, d.reg, v.reg, f.reg, add.reg);
616 } else {
617 int temp = d.reg;
618 if (temp == add.reg) {
619 // the mul will modify add.reg, we need an intermediary reg
620 if (v.flags & CORRUPTIBLE) temp = v.reg;
621 else if (f.flags & CORRUPTIBLE) temp = f.reg;
622 else temp = scratches.obtain();
623 }
624
625 if (vs<16 && fs<16) SMULBB(AL, temp, v.reg, f.reg);
626 else MUL(AL, 0, temp, v.reg, f.reg);
627
628 if (ms>as) {
629 ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSL, ms-as));
630 } else if (ms<as) {
631 // not sure if we should expand the mul instead?
632 ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSR, as-ms));
633 }
634 }
635
636 d.h = ms;
637 if (mDithering) {
638 d.l = a.l;
639 } else {
640 d.l = fs>a.l ? fs : a.l;
641 d.flags |= CLEAR_LO;
642 }
643}
644
645void GGLAssembler::component_add(component_t& d,
646 const integer_t& dst, const integer_t& src)
647{
648 // here we're guaranteed that fragment.size() >= fb.size()
649 const int shift = src.size() - dst.size();
650 if (!shift) {
651 ADD(AL, 0, d.reg, src.reg, dst.reg);
652 } else {
653 ADD(AL, 0, d.reg, src.reg, reg_imm(dst.reg, LSL, shift));
654 }
655
656 d.h = src.size();
657 if (mDithering) {
658 d.l = 0;
659 } else {
660 d.l = shift;
661 d.flags |= CLEAR_LO;
662 }
663}
664
665void GGLAssembler::component_sat(const component_t& v)
666{
667 const int one = ((1<<v.size())-1)<<v.l;
668 CMP(AL, v.reg, imm( 1<<v.h ));
669 if (isValidImmediate(one)) {
670 MOV(HS, 0, v.reg, imm( one ));
671 } else if (isValidImmediate(~one)) {
672 MVN(HS, 0, v.reg, imm( ~one ));
673 } else {
674 MOV(HS, 0, v.reg, imm( 1<<v.h ));
675 SUB(HS, 0, v.reg, v.reg, imm( 1<<v.l ));
676 }
677}
678
679// ----------------------------------------------------------------------------
680
681}; // namespace android
682