Rev 70 | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
70 | giacomo | 1 | /* |
2 | * Written by José Fonseca <j_r_fonseca@yahoo.co.uk> |
||
3 | */ |
||
4 | |||
5 | #include "matypes.h" |
||
6 | |||
7 | |||
8 | /* integer multiplication - alpha plus one |
||
9 | * |
||
10 | * makes the following approximation to the division (Sree) |
||
11 | * |
||
12 | * rgb*a/255 ~= (rgb*(a+1)) >> 256 |
||
13 | * |
||
14 | * which is the fastest method that satisfies the following OpenGL criteria |
||
15 | * |
||
16 | * 0*0 = 0 and 255*255 = 255 |
||
17 | * |
||
18 | * note that MX1 is a register with 0xffffffffffffffff constant which can be easily obtained making |
||
19 | * |
||
20 | * PCMPEQW ( MX1, MX1 ) |
||
21 | */ |
||
22 | #define GMB_MULT_AP1( MP1, MA1, MP2, MA2, MX1 ) \ |
||
23 | PSUBW ( MX1, MA1 ) /* a1 + 1 | a1 + 1 | a1 + 1 | a1 + 1 */ ;\ |
||
24 | PMULLW ( MP1, MA1 ) /* t1 = p1*a1 */ ;\ |
||
25 | ;\ |
||
26 | TWO(PSUBW ( MX1, MA2 )) /* a2 + 1 | a2 + 1 | a2 + 1 | a2 + 1 */ ;\ |
||
27 | TWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\ |
||
28 | ;\ |
||
29 | PSRLW ( CONST(8), MA1 ) /* t1 >> 8 ~= t1/255 */ ;\ |
||
30 | TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 ~= t2/255 */ |
||
31 | |||
32 | |||
33 | /* integer multiplication - geometric series |
||
34 | * |
||
35 | * takes the geometric series approximation to the division |
||
36 | * |
||
37 | * t/255 = (t >> 8) + (t >> 16) + (t >> 24) .. |
||
38 | * |
||
39 | * in this case just the first two terms to fit in 16bit arithmetic |
||
40 | * |
||
41 | * t/255 ~= (t + (t >> 8)) >> 8 |
||
42 | * |
||
43 | * note that just by itself it doesn't satisfies the OpenGL criteria, as 255*255 = 254, |
||
44 | * so the special case a = 255 must be accounted or roundoff must be used |
||
45 | */ |
||
46 | #define GMB_MULT_GS( MP1, MA1, MP2, MA2 ) \ |
||
47 | PMULLW ( MP1, MA1 ) /* t1 = p1*a1 */ ;\ |
||
48 | TWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\ |
||
49 | ;\ |
||
50 | MOVQ ( MA1, MP1 ) ;\ |
||
51 | PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ |
||
52 | ;\ |
||
53 | TWO(MOVQ ( MA2, MP2 )) ;\ |
||
54 | TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ |
||
55 | ;\ |
||
56 | PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ |
||
57 | PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ |
||
58 | ;\ |
||
59 | TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ |
||
60 | TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ |
||
61 | |||
62 | |||
63 | /* integer multiplication - geometric series plus rounding |
||
64 | * |
||
65 | * when using a geometric series division instead of truncating the result |
||
66 | * use roundoff in the approximation (Jim Blinn) |
||
67 | * |
||
68 | * t = rgb*a + 0x80 |
||
69 | * |
||
70 | * achieving the exact results |
||
71 | * |
||
72 | * note that M80 is register with the 0x0080008000800080 constant |
||
73 | */ |
||
74 | #define GMB_MULT_GSR( MP1, MA1, MP2, MA2, M80 ) \ |
||
75 | PMULLW ( MP1, MA1 ) /* t1 = p1*a1 */ ;\ |
||
76 | PADDW ( M80, MA1 ) /* t1 += 0x80 */ ;\ |
||
77 | ;\ |
||
78 | TWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\ |
||
79 | TWO(PADDW ( M80, MA2 )) /* t2 += 0x80 */ ;\ |
||
80 | ;\ |
||
81 | MOVQ ( MA1, MP1 ) ;\ |
||
82 | PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ |
||
83 | ;\ |
||
84 | TWO(MOVQ ( MA2, MP2 )) ;\ |
||
85 | TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ |
||
86 | ;\ |
||
87 | PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ |
||
88 | PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ |
||
89 | ;\ |
||
90 | TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ |
||
91 | TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ |
||
92 | |||
93 | |||
94 | /* linear interpolation - geometric series |
||
95 | */ |
||
96 | #define GMB_LERP_GS( MP1, MQ1, MA1, MP2, MQ2, MA2) \ |
||
97 | PSUBW ( MQ1, MP1 ) /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ ;\ |
||
98 | PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\ |
||
99 | PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)*pa1 */ ;\ |
||
100 | ;\ |
||
101 | TWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\ |
||
102 | TWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\ |
||
103 | TWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)*pa2 */ ;\ |
||
104 | ;\ |
||
105 | MOVQ ( MA1, MP1 ) ;\ |
||
106 | PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ |
||
107 | ;\ |
||
108 | TWO(MOVQ ( MA2, MP2 )) ;\ |
||
109 | TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ |
||
110 | ;\ |
||
111 | PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ |
||
112 | TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ |
||
113 | ;\ |
||
114 | PADDW ( MQ1, MA1 ) /* (t1/255 + q1) << 8 */ ;\ |
||
115 | TWO(PADDW ( MQ2, MA2 )) /* (t2/255 + q2) << 8 */ ;\ |
||
116 | ;\ |
||
117 | PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ |
||
118 | TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ |
||
119 | |||
120 | |||
121 | /* linear interpolation - geometric series with roundoff |
||
122 | * |
||
123 | * this is a generalization of Blinn's formula to signed arithmetic |
||
124 | * |
||
125 | * note that M80 is a register with the 0x0080008000800080 constant |
||
126 | */ |
||
127 | #define GMB_LERP_GSR( MP1, MQ1, MA1, MP2, MQ2, MA2, M80) \ |
||
128 | PSUBW ( MQ1, MP1 ) /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ ;\ |
||
129 | PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\ |
||
130 | PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)*pa1 */ ;\ |
||
131 | ;\ |
||
132 | TWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\ |
||
133 | TWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\ |
||
134 | TWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)*pa2 */ ;\ |
||
135 | ;\ |
||
136 | PSRLW ( CONST(15), MP1 ) /* q1 > p1 ? 1 : 0 */ ;\ |
||
137 | TWO(PSRLW ( CONST(15), MP2 )) /* q2 > q2 ? 1 : 0 */ ;\ |
||
138 | ;\ |
||
139 | PSLLW ( CONST(8), MP1 ) /* q1 > p1 ? 0x100 : 0 */ ;\ |
||
140 | TWO(PSLLW ( CONST(8), MP2 )) /* q2 > q2 ? 0x100 : 0 */ ;\ |
||
141 | ;\ |
||
142 | PSUBW ( MP1, MA1 ) /* t1 -=? 0x100 */ ;\ |
||
143 | TWO(PSUBW ( MP2, MA2 )) /* t2 -=? 0x100 */ ;\ |
||
144 | ;\ |
||
145 | PADDW ( M80, MA1 ) /* t1 += 0x80 */ ;\ |
||
146 | TWO(PADDW ( M80, MA2 )) /* t2 += 0x80 */ ;\ |
||
147 | ;\ |
||
148 | MOVQ ( MA1, MP1 ) ;\ |
||
149 | PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ |
||
150 | ;\ |
||
151 | TWO(MOVQ ( MA2, MP2 )) ;\ |
||
152 | TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ |
||
153 | ;\ |
||
154 | PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ |
||
155 | TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ |
||
156 | ;\ |
||
157 | PADDW ( MQ1, MA1 ) /* (t1/255 + q1) << 8 */ ;\ |
||
158 | TWO(PADDW ( MQ2, MA2 )) /* (t2/255 + q2) << 8 */ ;\ |
||
159 | ;\ |
||
160 | PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ |
||
161 | TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ |
||
162 | |||
163 | |||
164 | /* linear interpolation - geometric series with correction |
||
165 | * |
||
166 | * instead of the roundoff this adds a small correction to satisfy the OpenGL criteria |
||
167 | * |
||
168 | * t/255 ~= (t + (t >> 8) + (t >> 15)) >> 8 |
||
169 | * |
||
170 | * note that although is faster than rounding off it doesn't give always the exact results |
||
171 | */ |
||
172 | #define GMB_LERP_GSC( MP1, MQ1, MA1, MP2, MQ2, MA2) \ |
||
173 | PSUBW ( MQ1, MP1 ) /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ ;\ |
||
174 | PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\ |
||
175 | PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)*pa1 */ ;\ |
||
176 | ;\ |
||
177 | TWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\ |
||
178 | TWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\ |
||
179 | TWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)*pa2 */ ;\ |
||
180 | ;\ |
||
181 | MOVQ ( MA1, MP1 ) ;\ |
||
182 | PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ |
||
183 | ;\ |
||
184 | TWO(MOVQ ( MA2, MP2 )) ;\ |
||
185 | TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ |
||
186 | ;\ |
||
187 | PADDW ( MA1, MP1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ |
||
188 | PSRLW ( CONST(7), MA1 ) /* t1 >> 15 */ ;\ |
||
189 | ;\ |
||
190 | TWO(PADDW ( MA2, MP2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ |
||
191 | TWO(PSRLW ( CONST(7), MA2 )) /* t2 >> 15 */ ;\ |
||
192 | ;\ |
||
193 | PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) + (t1 >>15) ~= (t1/255) << 8 */ ;\ |
||
194 | TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) + (t2 >>15) ~= (t2/255) << 8 */ ;\ |
||
195 | ;\ |
||
196 | PADDW ( MQ1, MA1 ) /* (t1/255 + q1) << 8 */ ;\ |
||
197 | TWO(PADDW ( MQ2, MA2 )) /* (t2/255 + q2) << 8 */ ;\ |
||
198 | ;\ |
||
199 | PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ |
||
200 | TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ |
||
201 | |||
202 | |||
203 | /* common blending setup code |
||
204 | * |
||
205 | * note that M00 is a register with 0x0000000000000000 constant which can be easily obtained making |
||
206 | * |
||
207 | * PXOR ( M00, M00 ) |
||
208 | */ |
||
209 | #define GMB_LOAD(rgba, dest, MPP, MQQ) \ |
||
210 | ONE(MOVD ( REGIND(rgba), MPP )) /* | | | | qa1 | qb1 | qg1 | qr1 */ ;\ |
||
211 | ONE(MOVD ( REGIND(dest), MQQ )) /* | | | | pa1 | pb1 | pg1 | pr1 */ ;\ |
||
212 | ;\ |
||
213 | TWO(MOVQ ( REGIND(rgba), MPP )) /* qa2 | qb2 | qg2 | qr2 | qa1 | qb1 | qg1 | qr1 */ ;\ |
||
214 | TWO(MOVQ ( REGIND(dest), MQQ )) /* pa2 | pb2 | pg2 | pr2 | pa1 | pb1 | pg1 | pr1 */ |
||
215 | |||
216 | #define GMB_UNPACK(MP1, MQ1, MP2, MQ2, M00) \ |
||
217 | TWO(MOVQ ( MP1, MP2 )) ;\ |
||
218 | TWO(MOVQ ( MQ1, MQ2 )) ;\ |
||
219 | ;\ |
||
220 | PUNPCKLBW ( M00, MQ1 ) /* qa1 | qb1 | qg1 | qr1 */ ;\ |
||
221 | TWO(PUNPCKHBW ( M00, MQ2 )) /* qa2 | qb2 | qg2 | qr2 */ ;\ |
||
222 | PUNPCKLBW ( M00, MP1 ) /* pa1 | pb1 | pg1 | pr1 */ ;\ |
||
223 | TWO(PUNPCKHBW ( M00, MP2 )) /* pa2 | pb2 | pg2 | pr2 */ |
||
224 | |||
225 | #define GMB_ALPHA(MP1, MA1, MP2, MA2) \ |
||
226 | MOVQ ( MP1, MA1 ) ;\ |
||
227 | TWO(MOVQ ( MP2, MA2 )) ;\ |
||
228 | ;\ |
||
229 | PUNPCKHWD ( MA1, MA1 ) /* pa1 | pa1 | | */ ;\ |
||
230 | TWO(PUNPCKHWD ( MA2, MA2 )) /* pa2 | pa2 | | */ ;\ |
||
231 | PUNPCKHDQ ( MA1, MA1 ) /* pa1 | pa1 | pa1 | pa1 */ ;\ |
||
232 | TWO(PUNPCKHDQ ( MA2, MA2 )) /* pa2 | pa2 | pa2 | pa2 */ |
||
233 | |||
234 | #define GMB_PACK( MS1, MS2 ) \ |
||
235 | PACKUSWB ( MS2, MS1 ) /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */ ;\ |
||
236 | |||
237 | #define GMB_STORE(rgba, MSS ) \ |
||
238 | ONE(MOVD ( MSS, REGIND(rgba) )) /* | | | | sa1 | sb1 | sg1 | sr1 */ ;\ |
||
239 | TWO(MOVQ ( MSS, REGIND(rgba) )) /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */ |
||
240 | |||
241 | |||
242 | SEG_DATA |
||
243 | |||
244 | ALIGNDATA8 |
||
245 | const_0080: |
||
246 | D_LONG 0x00800080, 0x00800080 |
||
247 | |||
248 | const_80: |
||
249 | D_LONG 0x80808080, 0x80808080 |
||
250 | |||
251 | SEG_TEXT |
||
252 | |||
253 | |||
254 | /* Blend transparency function |
||
255 | */ |
||
256 | |||
257 | #define TAG(x) x##_transparency |
||
258 | |||
259 | #define INIT \ |
||
260 | PXOR ( MM0, MM0 ) /* 0x0000 | 0x0000 | 0x0000 | 0x0000 */ |
||
261 | |||
262 | #define MAIN( rgba, dest ) \ |
||
263 | GMB_LOAD( rgba, dest, MM1, MM2 ) ;\ |
||
264 | GMB_UNPACK( MM1, MM2, MM4, MM5, MM0 ) ;\ |
||
265 | GMB_ALPHA( MM1, MM3, MM4, MM6 ) ;\ |
||
266 | GMB_LERP_GSC( MM1, MM2, MM3, MM4, MM5, MM6 ) ;\ |
||
267 | GMB_PACK( MM3, MM6 ) ;\ |
||
268 | GMB_STORE( rgba, MM3 ) |
||
269 | |||
270 | #include "mmx_blendtmp.h" |
||
271 | |||
272 | |||
273 | /* Blend add function |
||
274 | * |
||
275 | * FIXME: Add some loop unrolling here... |
||
276 | */ |
||
277 | |||
278 | #define TAG(x) x##_add |
||
279 | |||
280 | #define INIT |
||
281 | |||
282 | #define MAIN( rgba, dest ) \ |
||
283 | ONE(MOVD ( REGIND(rgba), MM1 )) /* | | | | qa1 | qb1 | qg1 | qr1 */ ;\ |
||
284 | ONE(MOVD ( REGIND(dest), MM2 )) /* | | | | pa1 | pb1 | pg1 | pr1 */ ;\ |
||
285 | ONE(PADDUSB ( MM2, MM1 )) ;\ |
||
286 | ONE(MOVD ( MM1, REGIND(rgba) )) /* | | | | sa1 | sb1 | sg1 | sr1 */ ;\ |
||
287 | ;\ |
||
288 | TWO(MOVQ ( REGIND(rgba), MM1 )) /* qa2 | qb2 | qg2 | qr2 | qa1 | qb1 | qg1 | qr1 */ ;\ |
||
289 | TWO(PADDUSB ( REGIND(dest), MM1 )) /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */ ;\ |
||
290 | TWO(MOVQ ( MM1, REGIND(rgba) )) |
||
291 | |||
292 | #include "mmx_blendtmp.h" |
||
293 | |||
294 | |||
295 | /* Blend min function |
||
296 | */ |
||
297 | |||
298 | #define TAG(x) x##_min |
||
299 | |||
300 | #define INIT \ |
||
301 | MOVQ ( CONTENT(const_80), MM7 ) /* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/ |
||
302 | |||
303 | #define MAIN( rgba, dest ) \ |
||
304 | GMB_LOAD( rgba, dest, MM1, MM2 ) ;\ |
||
305 | MOVQ ( MM1, MM3 ) ;\ |
||
306 | MOVQ ( MM2, MM4 ) ;\ |
||
307 | PXOR ( MM7, MM3 ) /* unsigned -> signed */ ;\ |
||
308 | PXOR ( MM7, MM4 ) /* unsigned -> signed */ ;\ |
||
309 | PCMPGTB ( MM3, MM4 ) /* q > p ? 0xff : 0x00 */ ;\ |
||
310 | PAND ( MM4, MM1 ) /* q > p ? p : 0 */ ;\ |
||
311 | PANDN ( MM2, MM4 ) /* q > p ? 0 : q */ ;\ |
||
312 | POR ( MM1, MM4 ) /* q > p ? p : q */ ;\ |
||
313 | GMB_STORE( rgba, MM4 ) |
||
314 | |||
315 | #include "mmx_blendtmp.h" |
||
316 | |||
317 | |||
318 | /* Blend max function |
||
319 | */ |
||
320 | |||
321 | #define TAG(x) x##_max |
||
322 | |||
323 | #define INIT \ |
||
324 | MOVQ ( CONTENT(const_80), MM7 ) /* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/ |
||
325 | |||
326 | #define MAIN( rgba, dest ) \ |
||
327 | GMB_LOAD( rgba, dest, MM1, MM2 ) ;\ |
||
328 | MOVQ ( MM1, MM3 ) ;\ |
||
329 | MOVQ ( MM2, MM4 ) ;\ |
||
330 | PXOR ( MM7, MM3 ) /* unsigned -> signed */ ;\ |
||
331 | PXOR ( MM7, MM4 ) /* unsigned -> signed */ ;\ |
||
332 | PCMPGTB ( MM3, MM4 ) /* q > p ? 0xff : 0x00 */ ;\ |
||
333 | PAND ( MM4, MM2 ) /* q > p ? q : 0 */ ;\ |
||
334 | PANDN ( MM1, MM4 ) /* q > p ? 0 : p */ ;\ |
||
335 | POR ( MM2, MM4 ) /* q > p ? p : q */ ;\ |
||
336 | GMB_STORE( rgba, MM4 ) |
||
337 | |||
338 | #include "mmx_blendtmp.h" |
||
339 | |||
340 | |||
341 | /* Blend modulate function |
||
342 | */ |
||
343 | |||
344 | #define TAG(x) x##_modulate |
||
345 | |||
346 | #define INIT \ |
||
347 | PXOR ( MM0, MM0 ) /* 0x0000 | 0x0000 | 0x0000 | 0x0000 */ ;\ |
||
348 | MOVQ ( CONTENT(const_0080), MM7 ) /* 0x0080 | 0x0080 | 0x0080 | 0x0080 */ |
||
349 | |||
350 | #define MAIN( rgba, dest ) \ |
||
351 | GMB_LOAD( rgba, dest, MM1, MM2 ) ;\ |
||
352 | GMB_UNPACK( MM1, MM2, MM4, MM5, MM0 ) ;\ |
||
353 | GMB_MULT_GSR( MM1, MM2, MM4, MM5, MM7 ) ;\ |
||
354 | GMB_PACK( MM2, MM5 ) ;\ |
||
355 | GMB_STORE( rgba, MM2 ) |
||
356 | |||
357 | #include "mmx_blendtmp.h" |
||
358 |