Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
55 | pj | 1 | /* $Id: vpexec.c,v 1.1 2003-02-28 11:42:06 pj Exp $ */ |
2 | |||
3 | /* |
||
4 | * Mesa 3-D graphics library |
||
5 | * Version: 4.1 |
||
6 | * |
||
7 | * Copyright (C) 1999-2002 Brian Paul All Rights Reserved. |
||
8 | * |
||
9 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
10 | * copy of this software and associated documentation files (the "Software"), |
||
11 | * to deal in the Software without restriction, including without limitation |
||
12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
13 | * and/or sell copies of the Software, and to permit persons to whom the |
||
14 | * Software is furnished to do so, subject to the following conditions: |
||
15 | * |
||
16 | * The above copyright notice and this permission notice shall be included |
||
17 | * in all copies or substantial portions of the Software. |
||
18 | * |
||
19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
22 | * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
||
23 | * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
||
24 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | */ |
||
26 | |||
27 | /* |
||
28 | * -------- Regarding NV_vertex_program -------- |
||
29 | * Redistribution and use in source and binary forms, with or without |
||
30 | * modification, are permitted provided that the following conditions are met: |
||
31 | * |
||
32 | * o Redistribution of the source code must contain a copyright notice |
||
33 | * and this list of conditions; |
||
34 | * |
||
35 | * o Redistribution in binary and source code form must contain the |
||
36 | * following Notice in the software and any documentation and/or other |
||
37 | * materials provided with the distribution; and |
||
38 | * |
||
39 | * o The name of Nvidia may not be used to promote or endorse software |
||
40 | * derived from the software. |
||
41 | * |
||
42 | * NOTICE: Nvidia hereby grants to each recipient a non-exclusive worldwide |
||
43 | * royalty free patent license under patent claims that are licensable by |
||
44 | * Nvidia and which are necessarily required and for which no commercially |
||
45 | * viable non infringing alternative exists to make, use, sell, offer to sell, |
||
46 | * import and otherwise transfer the vertex extension for the Mesa 3D Graphics |
||
47 | * Library as distributed in source code and object code form. No hardware or |
||
48 | * hardware implementation (including a semiconductor implementation and chips) |
||
49 | * are licensed hereunder. If a recipient makes a patent claim or institutes |
||
50 | * patent litigation against Nvidia or Nvidia's customers for use or sale of |
||
51 | * Nvidia products, then this license grant as to such recipient shall |
||
52 | * immediately terminate and recipient immediately agrees to cease use and |
||
53 | * distribution of the Mesa Program and derivatives thereof. |
||
54 | * |
||
55 | * THE MESA 3D GRAPHICS LIBRARY IS PROVIDED ON AN "AS IS BASIS, WITHOUT |
||
56 | * WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, |
||
57 | * WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-NFRINGEMENT |
||
58 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. |
||
59 | * |
||
60 | * NVIDIA SHALL NOT HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||
61 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION |
||
62 | * LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
||
63 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
||
64 | * ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE MESA 3D GRAPHICS |
||
65 | * LIBRARY OR EVIDENCE OR THE EXERCISE OF ANY RIGHTS GRANTED HEREUNDR, EVEN |
||
66 | * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||
67 | * |
||
68 | * If you do not comply with this agreement, then Nvidia may cancel the license |
||
69 | * and rights granted herein. |
||
70 | * --------------------------------------------- |
||
71 | */ |
||
72 | |||
73 | /** |
||
74 | * \file vpexec.c |
||
75 | * \brief Code to execute vertex programs. |
||
76 | * \author Brian Paul |
||
77 | */ |
||
78 | |||
79 | #include "glheader.h" |
||
80 | #include "context.h" |
||
81 | #include "imports.h" |
||
82 | #include "macros.h" |
||
83 | #include "mtypes.h" |
||
84 | #include "vpexec.h" |
||
85 | #include "mmath.h" |
||
86 | #include "math/m_matrix.h" |
||
87 | |||
88 | |||
89 | /** |
||
90 | * Load/initialize the vertex program registers. |
||
91 | * This needs to be done per vertex. |
||
92 | */ |
||
93 | void |
||
94 | _mesa_init_vp_registers(GLcontext *ctx) |
||
95 | { |
||
96 | struct vp_machine *machine = &(ctx->VertexProgram.Machine); |
||
97 | GLuint i; |
||
98 | |||
99 | /* Input registers get initialized from the current vertex attribs */ |
||
100 | MEMCPY(machine->Registers[VP_INPUT_REG_START], |
||
101 | ctx->Current.Attrib, |
||
102 | 16 * 4 * sizeof(GLfloat)); |
||
103 | |||
104 | /* Output and temp regs are initialized to [0,0,0,1] */ |
||
105 | for (i = VP_OUTPUT_REG_START; i <= VP_OUTPUT_REG_END; i++) { |
||
106 | machine->Registers[i][0] = 0.0F; |
||
107 | machine->Registers[i][1] = 0.0F; |
||
108 | machine->Registers[i][2] = 0.0F; |
||
109 | machine->Registers[i][3] = 1.0F; |
||
110 | } |
||
111 | for (i = VP_TEMP_REG_START; i <= VP_TEMP_REG_END; i++) { |
||
112 | machine->Registers[i][0] = 0.0F; |
||
113 | machine->Registers[i][1] = 0.0F; |
||
114 | machine->Registers[i][2] = 0.0F; |
||
115 | machine->Registers[i][3] = 1.0F; |
||
116 | } |
||
117 | |||
118 | /* The program regs aren't touched */ |
||
119 | } |
||
120 | |||
121 | |||
122 | |||
123 | /** |
||
124 | * Copy the 16 elements of a matrix into four consecutive program |
||
125 | * registers starting at 'pos'. |
||
126 | */ |
||
127 | static void |
||
128 | load_matrix(GLfloat registers[][4], GLuint pos, const GLfloat mat[16]) |
||
129 | { |
||
130 | GLuint i; |
||
131 | pos += VP_PROG_REG_START; |
||
132 | for (i = 0; i < 4; i++) { |
||
133 | registers[pos + i][0] = mat[0 + i]; |
||
134 | registers[pos + i][1] = mat[4 + i]; |
||
135 | registers[pos + i][2] = mat[8 + i]; |
||
136 | registers[pos + i][3] = mat[12 + i]; |
||
137 | } |
||
138 | } |
||
139 | |||
140 | |||
141 | /** |
||
142 | * As above, but transpose the matrix. |
||
143 | */ |
||
144 | static void |
||
145 | load_transpose_matrix(GLfloat registers[][4], GLuint pos, |
||
146 | const GLfloat mat[16]) |
||
147 | { |
||
148 | pos += VP_PROG_REG_START; |
||
149 | MEMCPY(registers[pos], mat, 16 * sizeof(GLfloat)); |
||
150 | } |
||
151 | |||
152 | |||
153 | /** |
||
154 | * Load all currently tracked matrices into the program registers. |
||
155 | * This needs to be done per glBegin/glEnd. |
||
156 | */ |
||
157 | void |
||
158 | _mesa_init_tracked_matrices(GLcontext *ctx) |
||
159 | { |
||
160 | GLuint i; |
||
161 | |||
162 | for (i = 0; i < VP_NUM_PROG_REGS / 4; i++) { |
||
163 | /* point 'mat' at source matrix */ |
||
164 | GLmatrix *mat; |
||
165 | if (ctx->VertexProgram.TrackMatrix[i] == GL_MODELVIEW) { |
||
166 | mat = ctx->ModelviewMatrixStack.Top; |
||
167 | } |
||
168 | else if (ctx->VertexProgram.TrackMatrix[i] == GL_PROJECTION) { |
||
169 | mat = ctx->ProjectionMatrixStack.Top; |
||
170 | } |
||
171 | else if (ctx->VertexProgram.TrackMatrix[i] == GL_TEXTURE) { |
||
172 | mat = ctx->TextureMatrixStack[ctx->Texture.CurrentUnit].Top; |
||
173 | } |
||
174 | else if (ctx->VertexProgram.TrackMatrix[i] == GL_COLOR) { |
||
175 | mat = ctx->ColorMatrixStack.Top; |
||
176 | } |
||
177 | else if (ctx->VertexProgram.TrackMatrix[i]==GL_MODELVIEW_PROJECTION_NV) { |
||
178 | /* XXX verify the combined matrix is up to date */ |
||
179 | mat = &ctx->_ModelProjectMatrix; |
||
180 | } |
||
181 | else if (ctx->VertexProgram.TrackMatrix[i] >= GL_MATRIX0_NV && |
||
182 | ctx->VertexProgram.TrackMatrix[i] <= GL_MATRIX7_NV) { |
||
183 | GLuint n = ctx->VertexProgram.TrackMatrix[i] - GL_MATRIX0_NV; |
||
184 | ASSERT(n < MAX_PROGRAM_MATRICES); |
||
185 | mat = ctx->ProgramMatrixStack[n].Top; |
||
186 | } |
||
187 | else { |
||
188 | /* no matrix is tracked, but we leave the register values as-is */ |
||
189 | assert(ctx->VertexProgram.TrackMatrix[i] == GL_NONE); |
||
190 | continue; |
||
191 | } |
||
192 | |||
193 | /* load the matrix */ |
||
194 | if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_IDENTITY_NV) { |
||
195 | load_matrix(ctx->VertexProgram.Machine.Registers, i*4, mat->m); |
||
196 | } |
||
197 | else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_INVERSE_NV) { |
||
198 | _math_matrix_analyse(mat); /* update the inverse */ |
||
199 | assert((mat->flags & MAT_DIRTY_INVERSE) == 0); |
||
200 | load_matrix(ctx->VertexProgram.Machine.Registers, i*4, mat->inv); |
||
201 | } |
||
202 | else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_TRANSPOSE_NV) { |
||
203 | load_transpose_matrix(ctx->VertexProgram.Machine.Registers, i*4, mat->m); |
||
204 | } |
||
205 | else { |
||
206 | assert(ctx->VertexProgram.TrackMatrixTransform[i] |
||
207 | == GL_INVERSE_TRANSPOSE_NV); |
||
208 | _math_matrix_analyse(mat); /* update the inverse */ |
||
209 | assert((mat->flags & MAT_DIRTY_INVERSE) == 0); |
||
210 | load_transpose_matrix(ctx->VertexProgram.Machine.Registers, |
||
211 | i*4, mat->inv); |
||
212 | } |
||
213 | } |
||
214 | } |
||
215 | |||
216 | |||
217 | |||
218 | /** |
||
219 | * For debugging. Dump the current vertex program machine registers. |
||
220 | */ |
||
221 | void |
||
222 | _mesa_dump_vp_machine( const struct vp_machine *machine ) |
||
223 | { |
||
224 | int i; |
||
225 | _mesa_printf("VertexIn:\n"); |
||
226 | for (i = 0; i < VP_NUM_INPUT_REGS; i++) { |
||
227 | _mesa_printf("%d: %f %f %f %f ", i, |
||
228 | machine->Registers[i + VP_INPUT_REG_START][0], |
||
229 | machine->Registers[i + VP_INPUT_REG_START][1], |
||
230 | machine->Registers[i + VP_INPUT_REG_START][2], |
||
231 | machine->Registers[i + VP_INPUT_REG_START][3]); |
||
232 | } |
||
233 | _mesa_printf("\n"); |
||
234 | |||
235 | _mesa_printf("VertexOut:\n"); |
||
236 | for (i = 0; i < VP_NUM_OUTPUT_REGS; i++) { |
||
237 | _mesa_printf("%d: %f %f %f %f ", i, |
||
238 | machine->Registers[i + VP_OUTPUT_REG_START][0], |
||
239 | machine->Registers[i + VP_OUTPUT_REG_START][1], |
||
240 | machine->Registers[i + VP_OUTPUT_REG_START][2], |
||
241 | machine->Registers[i + VP_OUTPUT_REG_START][3]); |
||
242 | } |
||
243 | _mesa_printf("\n"); |
||
244 | |||
245 | _mesa_printf("Registers:\n"); |
||
246 | for (i = 0; i < VP_NUM_TEMP_REGS; i++) { |
||
247 | _mesa_printf("%d: %f %f %f %f ", i, |
||
248 | machine->Registers[i + VP_TEMP_REG_START][0], |
||
249 | machine->Registers[i + VP_TEMP_REG_START][1], |
||
250 | machine->Registers[i + VP_TEMP_REG_START][2], |
||
251 | machine->Registers[i + VP_TEMP_REG_START][3]); |
||
252 | } |
||
253 | _mesa_printf("\n"); |
||
254 | |||
255 | _mesa_printf("Parameters:\n"); |
||
256 | for (i = 0; i < VP_NUM_PROG_REGS; i++) { |
||
257 | _mesa_printf("%d: %f %f %f %f ", i, |
||
258 | machine->Registers[i + VP_PROG_REG_START][0], |
||
259 | machine->Registers[i + VP_PROG_REG_START][1], |
||
260 | machine->Registers[i + VP_PROG_REG_START][2], |
||
261 | machine->Registers[i + VP_PROG_REG_START][3]); |
||
262 | } |
||
263 | _mesa_printf("\n"); |
||
264 | } |
||
265 | |||
266 | |||
267 | /** |
||
268 | * Fetch a 4-element float vector from the given source register. |
||
269 | * Apply swizzling and negating as needed. |
||
270 | */ |
||
271 | static void |
||
272 | fetch_vector4( const struct vp_src_register *source, |
||
273 | const struct vp_machine *machine, |
||
274 | GLfloat result[4] ) |
||
275 | { |
||
276 | static const GLfloat zero[4] = { 0, 0, 0, 0 }; |
||
277 | const GLfloat *src; |
||
278 | |||
279 | if (source->RelAddr) { |
||
280 | GLint reg = source->Register + machine->AddressReg; |
||
281 | if (reg < VP_PROG_REG_START || reg > VP_PROG_REG_END) |
||
282 | src = zero; |
||
283 | else |
||
284 | src = machine->Registers[reg]; |
||
285 | } |
||
286 | else { |
||
287 | src = machine->Registers[source->Register]; |
||
288 | } |
||
289 | |||
290 | if (source->Negate) { |
||
291 | result[0] = -src[source->Swizzle[0]]; |
||
292 | result[1] = -src[source->Swizzle[1]]; |
||
293 | result[2] = -src[source->Swizzle[2]]; |
||
294 | result[3] = -src[source->Swizzle[3]]; |
||
295 | } |
||
296 | else { |
||
297 | result[0] = src[source->Swizzle[0]]; |
||
298 | result[1] = src[source->Swizzle[1]]; |
||
299 | result[2] = src[source->Swizzle[2]]; |
||
300 | result[3] = src[source->Swizzle[3]]; |
||
301 | } |
||
302 | } |
||
303 | |||
304 | |||
305 | /** |
||
306 | * As above, but only return result[0] element. |
||
307 | */ |
||
308 | static void |
||
309 | fetch_vector1( const struct vp_src_register *source, |
||
310 | const struct vp_machine *machine, |
||
311 | GLfloat result[4] ) |
||
312 | { |
||
313 | static const GLfloat zero[4] = { 0, 0, 0, 0 }; |
||
314 | const GLfloat *src; |
||
315 | |||
316 | if (source->RelAddr) { |
||
317 | GLint reg = source->Register + machine->AddressReg; |
||
318 | if (reg < VP_PROG_REG_START || reg > VP_PROG_REG_END) |
||
319 | src = zero; |
||
320 | else |
||
321 | src = machine->Registers[reg]; |
||
322 | } |
||
323 | else { |
||
324 | src = machine->Registers[source->Register]; |
||
325 | } |
||
326 | |||
327 | if (source->Negate) { |
||
328 | result[0] = -src[source->Swizzle[0]]; |
||
329 | } |
||
330 | else { |
||
331 | result[0] = src[source->Swizzle[0]]; |
||
332 | } |
||
333 | } |
||
334 | |||
335 | |||
336 | /** |
||
337 | * Store 4 floats into a register. |
||
338 | */ |
||
339 | static void |
||
340 | store_vector4( const struct vp_dst_register *dest, struct vp_machine *machine, |
||
341 | const GLfloat value[4] ) |
||
342 | { |
||
343 | GLfloat *dst = machine->Registers[dest->Register]; |
||
344 | |||
345 | if (dest->WriteMask[0]) |
||
346 | dst[0] = value[0]; |
||
347 | if (dest->WriteMask[1]) |
||
348 | dst[1] = value[1]; |
||
349 | if (dest->WriteMask[2]) |
||
350 | dst[2] = value[2]; |
||
351 | if (dest->WriteMask[3]) |
||
352 | dst[3] = value[3]; |
||
353 | } |
||
354 | |||
355 | |||
356 | /** |
||
357 | * Set x to positive or negative infinity. |
||
358 | */ |
||
359 | #ifdef USE_IEEE |
||
360 | #define SET_POS_INFINITY(x) ( *((GLuint *) &x) = 0x7F800000 ) |
||
361 | #define SET_NEG_INFINITY(x) ( *((GLuint *) &x) = 0xFF800000 ) |
||
362 | #elif defined(VMS) |
||
363 | #define SET_POS_INFINITY(x) x = __MAXFLOAT |
||
364 | #define SET_NEG_INFINITY(x) x = -__MAXFLOAT |
||
365 | #else |
||
366 | #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL |
||
367 | #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL |
||
368 | #endif |
||
369 | |||
370 | #define SET_FLOAT_BITS(x, bits) ((fi_type *) &(x))->i = bits |
||
371 | |||
372 | |||
373 | /** |
||
374 | * Execute the given vertex program |
||
375 | */ |
||
376 | void |
||
377 | _mesa_exec_program(GLcontext *ctx, const struct vp_program *program) |
||
378 | { |
||
379 | struct vp_machine *machine = &ctx->VertexProgram.Machine; |
||
380 | const struct vp_instruction *inst; |
||
381 | |||
382 | /* XXX load vertex fields into input registers */ |
||
383 | /* and do other initialization */ |
||
384 | |||
385 | |||
386 | for (inst = program->Instructions; inst->Opcode !=END; inst++) { |
||
387 | switch (inst->Opcode) { |
||
388 | case MOV: |
||
389 | { |
||
390 | GLfloat t[4]; |
||
391 | fetch_vector4( &inst->SrcReg[0], machine, t ); |
||
392 | store_vector4( &inst->DstReg, machine, t ); |
||
393 | } |
||
394 | break; |
||
395 | case LIT: |
||
396 | { |
||
397 | const GLfloat epsilon = 1.0e-5F; /* XXX fix? */ |
||
398 | GLfloat t[4], lit[4]; |
||
399 | fetch_vector4( &inst->SrcReg[0], machine, t ); |
||
400 | if (t[3] < -(128.0F - epsilon)) |
||
401 | t[3] = - (128.0F - epsilon); |
||
402 | else if (t[3] > 128.0F - epsilon) |
||
403 | t[3] = 128.0F - epsilon; |
||
404 | if (t[0] < 0.0) |
||
405 | t[0] = 0.0; |
||
406 | if (t[1] < 0.0) |
||
407 | t[1] = 0.0; |
||
408 | lit[0] = 1.0; |
||
409 | lit[1] = t[0]; |
||
410 | lit[2] = (t[0] > 0.0) ? (GLfloat) exp(t[3] * log(t[1])) : 0.0F; |
||
411 | lit[3] = 1.0; |
||
412 | store_vector4( &inst->DstReg, machine, lit ); |
||
413 | } |
||
414 | break; |
||
415 | case RCP: |
||
416 | { |
||
417 | GLfloat t[4]; |
||
418 | fetch_vector1( &inst->SrcReg[0], machine, t ); |
||
419 | if (t[0] != 1.0F) |
||
420 | t[0] = 1.0F / t[0]; /* div by zero is infinity! */ |
||
421 | t[1] = t[2] = t[3] = t[0]; |
||
422 | store_vector4( &inst->DstReg, machine, t ); |
||
423 | } |
||
424 | break; |
||
425 | case RSQ: |
||
426 | { |
||
427 | GLfloat t[4]; |
||
428 | fetch_vector1( &inst->SrcReg[0], machine, t ); |
||
429 | t[0] = (float) (1.0 / sqrt(fabs(t[0]))); |
||
430 | t[1] = t[2] = t[3] = t[0]; |
||
431 | store_vector4( &inst->DstReg, machine, t ); |
||
432 | } |
||
433 | break; |
||
434 | case EXP: |
||
435 | { |
||
436 | GLfloat t[4], q[4], floor_t0; |
||
437 | fetch_vector1( &inst->SrcReg[0], machine, t ); |
||
438 | floor_t0 = (float) floor(t[0]); |
||
439 | if (floor_t0 > FLT_MAX_EXP) { |
||
440 | SET_POS_INFINITY(q[0]); |
||
441 | q[1] = 0.0F; |
||
442 | SET_POS_INFINITY(q[2]); |
||
443 | q[3] = 1.0F; |
||
444 | } |
||
445 | else if (floor_t0 < FLT_MIN_EXP) { |
||
446 | q[0] = 0.0F; |
||
447 | q[1] = 0.0F; |
||
448 | q[2] = 0.0F; |
||
449 | q[3] = 0.0F; |
||
450 | } |
||
451 | else { |
||
452 | #ifdef USE_IEEE |
||
453 | GLint ii = (GLint) floor_t0; |
||
454 | ii = (ii < 23) + 0x3f800000; |
||
455 | SET_FLOAT_BITS(q[0], ii); |
||
456 | q[0] = *((GLfloat *) &ii); |
||
457 | #else |
||
458 | q[0] = (GLfloat) pow(2.0, floor_t0); |
||
459 | #endif |
||
460 | q[1] = t[0] - floor_t0; |
||
461 | q[2] = (GLfloat) (q[0] * LOG2(q[1])); |
||
462 | q[3] = 1.0F; |
||
463 | } |
||
464 | store_vector4( &inst->DstReg, machine, t ); |
||
465 | } |
||
466 | break; |
||
467 | case LOG: |
||
468 | { |
||
469 | GLfloat t[4], q[4], abs_t0; |
||
470 | fetch_vector1( &inst->SrcReg[0], machine, t ); |
||
471 | abs_t0 = (GLfloat) fabs(t[0]); |
||
472 | if (abs_t0 != 0.0F) { |
||
473 | /* Since we really can't handle infinite values on VMS |
||
474 | * like other OSes we'll use __MAXFLOAT to represent |
||
475 | * infinity. This may need some tweaking. |
||
476 | */ |
||
477 | #ifdef VMS |
||
478 | if (abs_t0 == __MAXFLOAT) { |
||
479 | #else |
||
480 | if (IS_INF_OR_NAN(abs_t0)) { |
||
481 | #endif |
||
482 | SET_POS_INFINITY(q[0]); |
||
483 | q[1] = 1.0F; |
||
484 | SET_POS_INFINITY(q[2]); |
||
485 | } |
||
486 | else { |
||
487 | int exponent; |
||
488 | double mantissa = frexp(t[0], &exponent); |
||
489 | q[0] = (GLfloat) (exponent - 1); |
||
490 | q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */ |
||
491 | q[2] = (GLfloat) (q[0] + LOG2(q[1])); |
||
492 | } |
||
493 | } |
||
494 | else { |
||
495 | SET_NEG_INFINITY(q[0]); |
||
496 | q[1] = 1.0F; |
||
497 | SET_NEG_INFINITY(q[2]); |
||
498 | } |
||
499 | q[3] = 1.0; |
||
500 | store_vector4( &inst->DstReg, machine, q ); |
||
501 | } |
||
502 | break; |
||
503 | case MUL: |
||
504 | { |
||
505 | GLfloat t[4], u[4], prod[4]; |
||
506 | fetch_vector4( &inst->SrcReg[0], machine, t ); |
||
507 | fetch_vector4( &inst->SrcReg[1], machine, u ); |
||
508 | prod[0] = t[0] * u[0]; |
||
509 | prod[1] = t[1] * u[1]; |
||
510 | prod[2] = t[2] * u[2]; |
||
511 | prod[3] = t[3] * u[3]; |
||
512 | store_vector4( &inst->DstReg, machine, prod ); |
||
513 | } |
||
514 | break; |
||
515 | case ADD: |
||
516 | { |
||
517 | GLfloat t[4], u[4], sum[4]; |
||
518 | fetch_vector4( &inst->SrcReg[0], machine, t ); |
||
519 | fetch_vector4( &inst->SrcReg[1], machine, u ); |
||
520 | sum[0] = t[0] + u[0]; |
||
521 | sum[1] = t[1] + u[1]; |
||
522 | sum[2] = t[2] + u[2]; |
||
523 | sum[3] = t[3] + u[3]; |
||
524 | store_vector4( &inst->DstReg, machine, sum ); |
||
525 | } |
||
526 | break; |
||
527 | case DP3: |
||
528 | { |
||
529 | GLfloat t[4], u[4], dot[4]; |
||
530 | fetch_vector4( &inst->SrcReg[0], machine, t ); |
||
531 | fetch_vector4( &inst->SrcReg[1], machine, u ); |
||
532 | dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2]; |
||
533 | dot[1] = dot[2] = dot[3] = dot[0]; |
||
534 | store_vector4( &inst->DstReg, machine, dot ); |
||
535 | } |
||
536 | break; |
||
537 | case DP4: |
||
538 | { |
||
539 | GLfloat t[4], u[4], dot[4]; |
||
540 | fetch_vector4( &inst->SrcReg[0], machine, t ); |
||
541 | fetch_vector4( &inst->SrcReg[1], machine, u ); |
||
542 | dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + t[3] * u[3]; |
||
543 | dot[1] = dot[2] = dot[3] = dot[0]; |
||
544 | store_vector4( &inst->DstReg, machine, dot ); |
||
545 | } |
||
546 | break; |
||
547 | case DST: |
||
548 | { |
||
549 | GLfloat t[4], u[4], dst[4]; |
||
550 | fetch_vector4( &inst->SrcReg[0], machine, t ); |
||
551 | fetch_vector4( &inst->SrcReg[1], machine, u ); |
||
552 | dst[0] = 1.0F; |
||
553 | dst[1] = t[1] * u[1]; |
||
554 | dst[2] = t[2]; |
||
555 | dst[3] = u[3]; |
||
556 | store_vector4( &inst->DstReg, machine, dst ); |
||
557 | } |
||
558 | break; |
||
559 | case MIN: |
||
560 | { |
||
561 | GLfloat t[4], u[4], min[4]; |
||
562 | fetch_vector4( &inst->SrcReg[0], machine, t ); |
||
563 | fetch_vector4( &inst->SrcReg[1], machine, u ); |
||
564 | min[0] = (t[0] < u[0]) ? t[0] : u[0]; |
||
565 | min[1] = (t[1] < u[1]) ? t[1] : u[1]; |
||
566 | min[2] = (t[2] < u[2]) ? t[2] : u[2]; |
||
567 | min[3] = (t[3] < u[3]) ? t[3] : u[3]; |
||
568 | store_vector4( &inst->DstReg, machine, min ); |
||
569 | } |
||
570 | break; |
||
571 | case MAX: |
||
572 | { |
||
573 | GLfloat t[4], u[4], max[4]; |
||
574 | fetch_vector4( &inst->SrcReg[0], machine, t ); |
||
575 | fetch_vector4( &inst->SrcReg[1], machine, u ); |
||
576 | max[0] = (t[0] > u[0]) ? t[0] : u[0]; |
||
577 | max[1] = (t[1] > u[1]) ? t[1] : u[1]; |
||
578 | max[2] = (t[2] > u[2]) ? t[2] : u[2]; |
||
579 | max[3] = (t[3] > u[3]) ? t[3] : u[3]; |
||
580 | store_vector4( &inst->DstReg, machine, max ); |
||
581 | } |
||
582 | break; |
||
583 | case SLT: |
||
584 | { |
||
585 | GLfloat t[4], u[4], slt[4]; |
||
586 | fetch_vector4( &inst->SrcReg[0], machine, t ); |
||
587 | fetch_vector4( &inst->SrcReg[1], machine, u ); |
||
588 | slt[0] = (t[0] < u[0]) ? 1.0F : 0.0F; |
||
589 | slt[1] = (t[1] < u[1]) ? 1.0F : 0.0F; |
||
590 | slt[2] = (t[2] < u[2]) ? 1.0F : 0.0F; |
||
591 | slt[3] = (t[3] < u[3]) ? 1.0F : 0.0F; |
||
592 | store_vector4( &inst->DstReg, machine, slt ); |
||
593 | } |
||
594 | break; |
||
595 | case SGE: |
||
596 | { |
||
597 | GLfloat t[4], u[4], sge[4]; |
||
598 | fetch_vector4( &inst->SrcReg[0], machine, t ); |
||
599 | fetch_vector4( &inst->SrcReg[1], machine, u ); |
||
600 | sge[0] = (t[0] >= u[0]) ? 1.0F : 0.0F; |
||
601 | sge[1] = (t[1] >= u[1]) ? 1.0F : 0.0F; |
||
602 | sge[2] = (t[2] >= u[2]) ? 1.0F : 0.0F; |
||
603 | sge[3] = (t[3] >= u[3]) ? 1.0F : 0.0F; |
||
604 | store_vector4( &inst->DstReg, machine, sge ); |
||
605 | } |
||
606 | break; |
||
607 | case MAD: |
||
608 | { |
||
609 | GLfloat t[4], u[4], v[4], sum[4]; |
||
610 | fetch_vector4( &inst->SrcReg[0], machine, t ); |
||
611 | fetch_vector4( &inst->SrcReg[1], machine, u ); |
||
612 | fetch_vector4( &inst->SrcReg[2], machine, v ); |
||
613 | sum[0] = t[0] * u[0] + v[0]; |
||
614 | sum[1] = t[1] * u[1] + v[1]; |
||
615 | sum[2] = t[2] * u[2] + v[2]; |
||
616 | sum[3] = t[3] * u[3] + v[3]; |
||
617 | store_vector4( &inst->DstReg, machine, sum ); |
||
618 | } |
||
619 | break; |
||
620 | case ARL: |
||
621 | { |
||
622 | GLfloat t[4]; |
||
623 | fetch_vector4( &inst->SrcReg[0], machine, t ); |
||
624 | machine->AddressReg = (GLint) floor(t[0]); |
||
625 | } |
||
626 | break; |
||
627 | case DPH: |
||
628 | { |
||
629 | GLfloat t[4], u[4], dot[4]; |
||
630 | fetch_vector4( &inst->SrcReg[0], machine, t ); |
||
631 | fetch_vector4( &inst->SrcReg[1], machine, u ); |
||
632 | dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + u[3]; |
||
633 | dot[1] = dot[2] = dot[3] = dot[0]; |
||
634 | store_vector4( &inst->DstReg, machine, dot ); |
||
635 | } |
||
636 | break; |
||
637 | case RCC: |
||
638 | { |
||
639 | GLfloat t[4], u; |
||
640 | fetch_vector1( &inst->SrcReg[0], machine, t ); |
||
641 | if (t[0] == 1.0F) |
||
642 | u = 1.0F; |
||
643 | else |
||
644 | u = 1.0F / t[0]; |
||
645 | if (u > 0.0F) { |
||
646 | if (u > 1.884467e+019F) { |
||
647 | u = 1.884467e+019F; /* IEEE 32-bit binary value 0x5F800000 */ |
||
648 | } |
||
649 | else if (u < 5.42101e-020F) { |
||
650 | u = 5.42101e-020F; /* IEEE 32-bit binary value 0x1F800000 */ |
||
651 | } |
||
652 | } |
||
653 | else { |
||
654 | if (u < -1.884467e+019F) { |
||
655 | u = -1.884467e+019F; /* IEEE 32-bit binary value 0xDF800000 */ |
||
656 | } |
||
657 | else if (u > -5.42101e-020F) { |
||
658 | u = -5.42101e-020F; /* IEEE 32-bit binary value 0x9F800000 */ |
||
659 | } |
||
660 | } |
||
661 | t[0] = t[1] = t[2] = t[3] = u; |
||
662 | store_vector4( &inst->DstReg, machine, t ); |
||
663 | } |
||
664 | break; |
||
665 | case SUB: |
||
666 | { |
||
667 | GLfloat t[4], u[4], sum[4]; |
||
668 | fetch_vector4( &inst->SrcReg[0], machine, t ); |
||
669 | fetch_vector4( &inst->SrcReg[1], machine, u ); |
||
670 | sum[0] = t[0] - u[0]; |
||
671 | sum[1] = t[1] - u[1]; |
||
672 | sum[2] = t[2] - u[2]; |
||
673 | sum[3] = t[3] - u[3]; |
||
674 | store_vector4( &inst->DstReg, machine, sum ); |
||
675 | } |
||
676 | break; |
||
677 | case ABS: |
||
678 | { |
||
679 | GLfloat t[4]; |
||
680 | fetch_vector4( &inst->SrcReg[0], machine, t ); |
||
681 | if (t[0] < 0.0) t[0] = -t[0]; |
||
682 | if (t[1] < 0.0) t[1] = -t[1]; |
||
683 | if (t[2] < 0.0) t[2] = -t[2]; |
||
684 | if (t[3] < 0.0) t[3] = -t[3]; |
||
685 | store_vector4( &inst->DstReg, machine, t ); |
||
686 | } |
||
687 | break; |
||
688 | |||
689 | case END: |
||
690 | return; |
||
691 | default: |
||
692 | /* bad instruction opcode */ |
||
693 | _mesa_problem(ctx, "Bad VP Opcode in _mesa_exec_program"); |
||
694 | return; |
||
695 | } |
||
696 | } |
||
697 | } |
||
698 | |||
699 | |||
700 | |||
701 | /** |
||
702 | Thoughts on vertex program optimization: |
||
703 | |||
704 | The obvious thing to do is to compile the vertex program into X86/SSE/3DNow! |
||
705 | assembly code. That will probably be a lot of work. |
||
706 | |||
707 | Another approach might be to replace the vp_instruction->Opcode field with |
||
708 | a pointer to a specialized C function which executes the instruction. |
||
709 | In particular we can write functions which skip swizzling, negating, |
||
710 | masking, relative addressing, etc. when they're not needed. |
||
711 | |||
712 | For example: |
||
713 | |||
714 | void simple_add( struct vp_instruction *inst ) |
||
715 | { |
||
716 | GLfloat *sum = machine->Registers[inst->DstReg.Register]; |
||
717 | GLfloat *a = machine->Registers[inst->SrcReg[0].Register]; |
||
718 | GLfloat *b = machine->Registers[inst->SrcReg[1].Register]; |
||
719 | sum[0] = a[0] + b[0]; |
||
720 | sum[1] = a[1] + b[1]; |
||
721 | sum[2] = a[2] + b[2]; |
||
722 | sum[3] = a[3] + b[3]; |
||
723 | } |
||
724 | |||
725 | */ |
||
726 | |||
727 | /* |
||
728 | |||
729 | KW: |
||
730 | |||
731 | A first step would be to 'vectorize' the programs in the same way as |
||
732 | the normal transformation code in the tnl module. Thus each opcode |
||
733 | takes zero or more input vectors (registers) and produces one or more |
||
734 | output vectors. |
||
735 | |||
736 | These operations would intially be coded in C, with machine-specific |
||
737 | assembly following, as is currently the case for matrix |
||
738 | transformations in the math/ directory. The preprocessing scheme for |
||
739 | selecting simpler operations Brian describes above would also work |
||
740 | here. |
||
741 | |||
742 | This should give reasonable performance without excessive effort. |
||
743 | |||
744 | */ |