Subversion Repositories shark

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
55 pj 1
/* $Id: vpexec.c,v 1.1 2003-02-28 11:42:06 pj Exp $ */
2
 
3
/*
4
 * Mesa 3-D graphics library
5
 * Version:  4.1
6
 *
7
 * Copyright (C) 1999-2002  Brian Paul   All Rights Reserved.
8
 *
9
 * Permission is hereby granted, free of charge, to any person obtaining a
10
 * copy of this software and associated documentation files (the "Software"),
11
 * to deal in the Software without restriction, including without limitation
12
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13
 * and/or sell copies of the Software, and to permit persons to whom the
14
 * Software is furnished to do so, subject to the following conditions:
15
 *
16
 * The above copyright notice and this permission notice shall be included
17
 * in all copies or substantial portions of the Software.
18
 *
19
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22
 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
23
 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
 */
26
 
27
/*
28
 * -------- Regarding NV_vertex_program --------
29
 * Redistribution and use in source and binary forms, with or without
30
 * modification, are permitted provided that the following conditions are met:
31
 *
32
 * o Redistribution of the source code must contain a copyright notice
33
 *   and this list of conditions;
34
 *
35
 * o Redistribution in binary and source code form must contain the
36
 *   following Notice in the software and any documentation and/or other
37
 *   materials provided with the distribution; and
38
 *
39
 * o The name of Nvidia may not be used to promote or endorse software
40
 *   derived from the software.
41
 *
42
 * NOTICE: Nvidia hereby grants to each recipient a non-exclusive worldwide
43
 * royalty free patent license under patent claims that are licensable by
44
 * Nvidia and which are necessarily required and for which no commercially
45
 * viable non infringing alternative exists to make, use, sell, offer to sell,
46
 * import and otherwise transfer the vertex extension for the Mesa 3D Graphics
47
 * Library as distributed in source code and object code form.  No hardware or
48
 * hardware implementation (including a semiconductor implementation and chips)
49
 * are licensed hereunder. If a recipient makes a patent claim or institutes
50
 * patent litigation against Nvidia or Nvidia's customers for use or sale of
51
 * Nvidia products, then this license grant as to such recipient shall
52
 * immediately terminate and recipient immediately agrees to cease use and
53
 * distribution of the Mesa Program and derivatives thereof.
54
 *
55
 * THE MESA 3D GRAPHICS LIBRARY IS PROVIDED ON AN "AS IS BASIS, WITHOUT
56
 * WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING,
57
 * WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-NFRINGEMENT
58
 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
59
 *
60
 * NVIDIA SHALL NOT HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL,
61
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION
62
 * LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
63
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
64
 * ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE MESA 3D GRAPHICS
65
 * LIBRARY OR EVIDENCE OR THE EXERCISE OF ANY RIGHTS GRANTED HEREUNDR, EVEN
66
 * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
67
 *
68
 * If you do not comply with this agreement, then Nvidia may cancel the license
69
 * and rights granted herein.
70
 * ---------------------------------------------
71
 */
72
 
73
/**
74
 * \file vpexec.c
75
 * \brief Code to execute vertex programs.
76
 * \author Brian Paul
77
 */
78
 
79
#include "glheader.h"
80
#include "context.h"
81
#include "imports.h"
82
#include "macros.h"
83
#include "mtypes.h"
84
#include "vpexec.h"
85
#include "mmath.h"
86
#include "math/m_matrix.h"
87
 
88
 
89
/**
90
 * Load/initialize the vertex program registers.
91
 * This needs to be done per vertex.
92
 */
93
void
94
_mesa_init_vp_registers(GLcontext *ctx)
95
{
96
   struct vp_machine *machine = &(ctx->VertexProgram.Machine);
97
   GLuint i;
98
 
99
   /* Input registers get initialized from the current vertex attribs */
100
   MEMCPY(machine->Registers[VP_INPUT_REG_START],
101
          ctx->Current.Attrib,
102
          16 * 4 * sizeof(GLfloat));
103
 
104
   /* Output and temp regs are initialized to [0,0,0,1] */
105
   for (i = VP_OUTPUT_REG_START; i <= VP_OUTPUT_REG_END; i++) {
106
      machine->Registers[i][0] = 0.0F;
107
      machine->Registers[i][1] = 0.0F;
108
      machine->Registers[i][2] = 0.0F;
109
      machine->Registers[i][3] = 1.0F;
110
   }
111
   for (i = VP_TEMP_REG_START; i <= VP_TEMP_REG_END; i++) {
112
      machine->Registers[i][0] = 0.0F;
113
      machine->Registers[i][1] = 0.0F;
114
      machine->Registers[i][2] = 0.0F;
115
      machine->Registers[i][3] = 1.0F;
116
   }
117
 
118
   /* The program regs aren't touched */
119
}
120
 
121
 
122
 
123
/**
124
 * Copy the 16 elements of a matrix into four consecutive program
125
 * registers starting at 'pos'.
126
 */
127
static void
128
load_matrix(GLfloat registers[][4], GLuint pos, const GLfloat mat[16])
129
{
130
   GLuint i;
131
   pos += VP_PROG_REG_START;
132
   for (i = 0; i < 4; i++) {
133
      registers[pos + i][0] = mat[0 + i];
134
      registers[pos + i][1] = mat[4 + i];
135
      registers[pos + i][2] = mat[8 + i];
136
      registers[pos + i][3] = mat[12 + i];
137
   }
138
}
139
 
140
 
141
/**
142
 * As above, but transpose the matrix.
143
 */
144
static void
145
load_transpose_matrix(GLfloat registers[][4], GLuint pos,
146
                      const GLfloat mat[16])
147
{
148
   pos += VP_PROG_REG_START;
149
   MEMCPY(registers[pos], mat, 16 * sizeof(GLfloat));
150
}
151
 
152
 
153
/**
154
 * Load all currently tracked matrices into the program registers.
155
 * This needs to be done per glBegin/glEnd.
156
 */
157
void
158
_mesa_init_tracked_matrices(GLcontext *ctx)
159
{
160
   GLuint i;
161
 
162
   for (i = 0; i < VP_NUM_PROG_REGS / 4; i++) {
163
      /* point 'mat' at source matrix */
164
      GLmatrix *mat;
165
      if (ctx->VertexProgram.TrackMatrix[i] == GL_MODELVIEW) {
166
         mat = ctx->ModelviewMatrixStack.Top;
167
      }
168
      else if (ctx->VertexProgram.TrackMatrix[i] == GL_PROJECTION) {
169
         mat = ctx->ProjectionMatrixStack.Top;
170
      }
171
      else if (ctx->VertexProgram.TrackMatrix[i] == GL_TEXTURE) {
172
         mat = ctx->TextureMatrixStack[ctx->Texture.CurrentUnit].Top;
173
      }
174
      else if (ctx->VertexProgram.TrackMatrix[i] == GL_COLOR) {
175
         mat = ctx->ColorMatrixStack.Top;
176
      }
177
      else if (ctx->VertexProgram.TrackMatrix[i]==GL_MODELVIEW_PROJECTION_NV) {
178
         /* XXX verify the combined matrix is up to date */
179
         mat = &ctx->_ModelProjectMatrix;
180
      }
181
      else if (ctx->VertexProgram.TrackMatrix[i] >= GL_MATRIX0_NV &&
182
               ctx->VertexProgram.TrackMatrix[i] <= GL_MATRIX7_NV) {
183
         GLuint n = ctx->VertexProgram.TrackMatrix[i] - GL_MATRIX0_NV;
184
         ASSERT(n < MAX_PROGRAM_MATRICES);
185
         mat = ctx->ProgramMatrixStack[n].Top;
186
      }
187
      else {
188
         /* no matrix is tracked, but we leave the register values as-is */
189
         assert(ctx->VertexProgram.TrackMatrix[i] == GL_NONE);
190
         continue;
191
      }
192
 
193
      /* load the matrix */
194
      if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_IDENTITY_NV) {
195
         load_matrix(ctx->VertexProgram.Machine.Registers, i*4, mat->m);
196
      }
197
      else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_INVERSE_NV) {
198
         _math_matrix_analyse(mat); /* update the inverse */
199
         assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
200
         load_matrix(ctx->VertexProgram.Machine.Registers, i*4, mat->inv);
201
      }
202
      else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_TRANSPOSE_NV) {
203
         load_transpose_matrix(ctx->VertexProgram.Machine.Registers, i*4, mat->m);
204
      }
205
      else {
206
         assert(ctx->VertexProgram.TrackMatrixTransform[i]
207
                == GL_INVERSE_TRANSPOSE_NV);
208
         _math_matrix_analyse(mat); /* update the inverse */
209
         assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
210
         load_transpose_matrix(ctx->VertexProgram.Machine.Registers,
211
                               i*4, mat->inv);
212
      }
213
   }
214
}
215
 
216
 
217
 
218
/**
219
 * For debugging.  Dump the current vertex program machine registers.
220
 */
221
void
222
_mesa_dump_vp_machine( const struct vp_machine *machine )
223
{
224
   int i;
225
   _mesa_printf("VertexIn:\n");
226
   for (i = 0; i < VP_NUM_INPUT_REGS; i++) {
227
      _mesa_printf("%d: %f %f %f %f   ", i,
228
             machine->Registers[i + VP_INPUT_REG_START][0],
229
             machine->Registers[i + VP_INPUT_REG_START][1],
230
             machine->Registers[i + VP_INPUT_REG_START][2],
231
             machine->Registers[i + VP_INPUT_REG_START][3]);
232
   }
233
   _mesa_printf("\n");
234
 
235
   _mesa_printf("VertexOut:\n");
236
   for (i = 0; i < VP_NUM_OUTPUT_REGS; i++) {
237
      _mesa_printf("%d: %f %f %f %f   ", i,
238
             machine->Registers[i + VP_OUTPUT_REG_START][0],
239
             machine->Registers[i + VP_OUTPUT_REG_START][1],
240
             machine->Registers[i + VP_OUTPUT_REG_START][2],
241
             machine->Registers[i + VP_OUTPUT_REG_START][3]);
242
   }
243
   _mesa_printf("\n");
244
 
245
   _mesa_printf("Registers:\n");
246
   for (i = 0; i < VP_NUM_TEMP_REGS; i++) {
247
      _mesa_printf("%d: %f %f %f %f   ", i,
248
             machine->Registers[i + VP_TEMP_REG_START][0],
249
             machine->Registers[i + VP_TEMP_REG_START][1],
250
             machine->Registers[i + VP_TEMP_REG_START][2],
251
             machine->Registers[i + VP_TEMP_REG_START][3]);
252
   }
253
   _mesa_printf("\n");
254
 
255
   _mesa_printf("Parameters:\n");
256
   for (i = 0; i < VP_NUM_PROG_REGS; i++) {
257
      _mesa_printf("%d: %f %f %f %f   ", i,
258
             machine->Registers[i + VP_PROG_REG_START][0],
259
             machine->Registers[i + VP_PROG_REG_START][1],
260
             machine->Registers[i + VP_PROG_REG_START][2],
261
             machine->Registers[i + VP_PROG_REG_START][3]);
262
   }
263
   _mesa_printf("\n");
264
}
265
 
266
 
267
/**
268
 * Fetch a 4-element float vector from the given source register.
269
 * Apply swizzling and negating as needed.
270
 */
271
static void
272
fetch_vector4( const struct vp_src_register *source,
273
               const struct vp_machine *machine,
274
               GLfloat result[4] )
275
{
276
   static const GLfloat zero[4] = { 0, 0, 0, 0 };
277
   const GLfloat *src;
278
 
279
   if (source->RelAddr) {
280
      GLint reg = source->Register + machine->AddressReg;
281
      if (reg < VP_PROG_REG_START || reg > VP_PROG_REG_END)
282
         src = zero;
283
      else
284
         src = machine->Registers[reg];
285
   }
286
   else {
287
      src = machine->Registers[source->Register];
288
   }
289
 
290
   if (source->Negate) {
291
      result[0] = -src[source->Swizzle[0]];
292
      result[1] = -src[source->Swizzle[1]];
293
      result[2] = -src[source->Swizzle[2]];
294
      result[3] = -src[source->Swizzle[3]];
295
   }
296
   else {
297
      result[0] = src[source->Swizzle[0]];
298
      result[1] = src[source->Swizzle[1]];
299
      result[2] = src[source->Swizzle[2]];
300
      result[3] = src[source->Swizzle[3]];
301
   }
302
}
303
 
304
 
305
/**
306
 * As above, but only return result[0] element.
307
 */
308
static void
309
fetch_vector1( const struct vp_src_register *source,
310
               const struct vp_machine *machine,
311
               GLfloat result[4] )
312
{
313
   static const GLfloat zero[4] = { 0, 0, 0, 0 };
314
   const GLfloat *src;
315
 
316
   if (source->RelAddr) {
317
      GLint reg = source->Register + machine->AddressReg;
318
      if (reg < VP_PROG_REG_START || reg > VP_PROG_REG_END)
319
         src = zero;
320
      else
321
         src = machine->Registers[reg];
322
   }
323
   else {
324
      src = machine->Registers[source->Register];
325
   }
326
 
327
   if (source->Negate) {
328
      result[0] = -src[source->Swizzle[0]];
329
   }
330
   else {
331
      result[0] = src[source->Swizzle[0]];
332
   }
333
}
334
 
335
 
336
/**
337
 * Store 4 floats into a register.
338
 */
339
static void
340
store_vector4( const struct vp_dst_register *dest, struct vp_machine *machine,
341
               const GLfloat value[4] )
342
{
343
   GLfloat *dst = machine->Registers[dest->Register];
344
 
345
   if (dest->WriteMask[0])
346
      dst[0] = value[0];
347
   if (dest->WriteMask[1])
348
      dst[1] = value[1];
349
   if (dest->WriteMask[2])
350
      dst[2] = value[2];
351
   if (dest->WriteMask[3])
352
      dst[3] = value[3];
353
}
354
 
355
 
356
/**
357
 * Set x to positive or negative infinity.
358
 */
359
#ifdef USE_IEEE
360
#define SET_POS_INFINITY(x)  ( *((GLuint *) &x) = 0x7F800000 )
361
#define SET_NEG_INFINITY(x)  ( *((GLuint *) &x) = 0xFF800000 )
362
#elif defined(VMS)
363
#define SET_POS_INFINITY(x)  x = __MAXFLOAT
364
#define SET_NEG_INFINITY(x)  x = -__MAXFLOAT
365
#else
366
#define SET_POS_INFINITY(x)  x = (GLfloat) HUGE_VAL
367
#define SET_NEG_INFINITY(x)  x = (GLfloat) -HUGE_VAL
368
#endif
369
 
370
#define SET_FLOAT_BITS(x, bits) ((fi_type *) &(x))->i = bits
371
 
372
 
373
/**
374
 * Execute the given vertex program
375
 */
376
void
377
_mesa_exec_program(GLcontext *ctx, const struct vp_program *program)
378
{
379
   struct vp_machine *machine = &ctx->VertexProgram.Machine;
380
   const struct vp_instruction *inst;
381
 
382
   /* XXX load vertex fields into input registers */
383
   /* and do other initialization */
384
 
385
 
386
   for (inst = program->Instructions; inst->Opcode !=END; inst++) {
387
      switch (inst->Opcode) {
388
         case MOV:
389
            {
390
               GLfloat t[4];
391
               fetch_vector4( &inst->SrcReg[0], machine, t );
392
               store_vector4( &inst->DstReg, machine, t );
393
            }
394
            break;
395
         case LIT:
396
            {
397
               const GLfloat epsilon = 1.0e-5F; /* XXX fix? */
398
               GLfloat t[4], lit[4];
399
               fetch_vector4( &inst->SrcReg[0], machine, t );
400
               if (t[3] < -(128.0F - epsilon))
401
                   t[3] = - (128.0F - epsilon);
402
               else if (t[3] > 128.0F - epsilon)
403
                  t[3] = 128.0F - epsilon;
404
               if (t[0] < 0.0)
405
                  t[0] = 0.0;
406
               if (t[1] < 0.0)
407
                  t[1] = 0.0;
408
               lit[0] = 1.0;
409
               lit[1] = t[0];
410
               lit[2] = (t[0] > 0.0) ? (GLfloat) exp(t[3] * log(t[1])) : 0.0F;
411
               lit[3] = 1.0;
412
               store_vector4( &inst->DstReg, machine, lit );
413
            }
414
            break;
415
         case RCP:
416
            {
417
               GLfloat t[4];
418
               fetch_vector1( &inst->SrcReg[0], machine, t );
419
               if (t[0] != 1.0F)
420
                  t[0] = 1.0F / t[0];  /* div by zero is infinity! */
421
               t[1] = t[2] = t[3] = t[0];
422
               store_vector4( &inst->DstReg, machine, t );
423
            }
424
            break;
425
         case RSQ:
426
            {
427
               GLfloat t[4];
428
               fetch_vector1( &inst->SrcReg[0], machine, t );
429
               t[0] = (float) (1.0 / sqrt(fabs(t[0])));
430
               t[1] = t[2] = t[3] = t[0];
431
               store_vector4( &inst->DstReg, machine, t );
432
            }
433
            break;
434
         case EXP:
435
            {
436
               GLfloat t[4], q[4], floor_t0;
437
               fetch_vector1( &inst->SrcReg[0], machine, t );
438
               floor_t0 = (float) floor(t[0]);
439
               if (floor_t0 > FLT_MAX_EXP) {
440
                  SET_POS_INFINITY(q[0]);
441
                  q[1] = 0.0F;
442
                  SET_POS_INFINITY(q[2]);
443
                  q[3] = 1.0F;
444
               }
445
               else if (floor_t0 < FLT_MIN_EXP) {
446
                  q[0] = 0.0F;
447
                  q[1] = 0.0F;
448
                  q[2] = 0.0F;
449
                  q[3] = 0.0F;
450
               }
451
               else {
452
#ifdef USE_IEEE
453
                  GLint ii = (GLint) floor_t0;
454
                  ii = (ii < 23) + 0x3f800000;
455
                  SET_FLOAT_BITS(q[0], ii);
456
                  q[0] = *((GLfloat *) &ii);
457
#else
458
                  q[0] = (GLfloat) pow(2.0, floor_t0);
459
#endif
460
                  q[1] = t[0] - floor_t0;
461
                  q[2] = (GLfloat) (q[0] * LOG2(q[1]));
462
                  q[3] = 1.0F;
463
               }
464
               store_vector4( &inst->DstReg, machine, t );
465
            }
466
            break;
467
         case LOG:
468
            {
469
               GLfloat t[4], q[4], abs_t0;
470
               fetch_vector1( &inst->SrcReg[0], machine, t );
471
               abs_t0 = (GLfloat) fabs(t[0]);
472
               if (abs_t0 != 0.0F) {
473
                  /* Since we really can't handle infinite values on VMS
474
                   * like other OSes we'll use __MAXFLOAT to represent
475
                   * infinity.  This may need some tweaking.
476
                   */
477
#ifdef VMS
478
                  if (abs_t0 == __MAXFLOAT) {
479
#else
480
                  if (IS_INF_OR_NAN(abs_t0)) {
481
#endif
482
                     SET_POS_INFINITY(q[0]);
483
                     q[1] = 1.0F;
484
                     SET_POS_INFINITY(q[2]);
485
                  }
486
                  else {
487
                     int exponent;
488
                     double mantissa = frexp(t[0], &exponent);
489
                     q[0] = (GLfloat) (exponent - 1);
490
                     q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
491
                     q[2] = (GLfloat) (q[0] + LOG2(q[1]));
492
                  }
493
               }
494
               else {
495
                  SET_NEG_INFINITY(q[0]);
496
                  q[1] = 1.0F;
497
                  SET_NEG_INFINITY(q[2]);
498
               }
499
               q[3] = 1.0;
500
               store_vector4( &inst->DstReg, machine, q );
501
            }
502
            break;
503
         case MUL:
504
            {
505
               GLfloat t[4], u[4], prod[4];
506
               fetch_vector4( &inst->SrcReg[0], machine, t );
507
               fetch_vector4( &inst->SrcReg[1], machine, u );
508
               prod[0] = t[0] * u[0];
509
               prod[1] = t[1] * u[1];
510
               prod[2] = t[2] * u[2];
511
               prod[3] = t[3] * u[3];
512
               store_vector4( &inst->DstReg, machine, prod );
513
            }
514
            break;
515
         case ADD:
516
            {
517
               GLfloat t[4], u[4], sum[4];
518
               fetch_vector4( &inst->SrcReg[0], machine, t );
519
               fetch_vector4( &inst->SrcReg[1], machine, u );
520
               sum[0] = t[0] + u[0];
521
               sum[1] = t[1] + u[1];
522
               sum[2] = t[2] + u[2];
523
               sum[3] = t[3] + u[3];
524
               store_vector4( &inst->DstReg, machine, sum );
525
            }
526
            break;
527
         case DP3:
528
            {
529
               GLfloat t[4], u[4], dot[4];
530
               fetch_vector4( &inst->SrcReg[0], machine, t );
531
               fetch_vector4( &inst->SrcReg[1], machine, u );
532
               dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2];
533
               dot[1] = dot[2] = dot[3] = dot[0];
534
               store_vector4( &inst->DstReg, machine, dot );
535
            }
536
            break;
537
         case DP4:
538
            {
539
               GLfloat t[4], u[4], dot[4];
540
               fetch_vector4( &inst->SrcReg[0], machine, t );
541
               fetch_vector4( &inst->SrcReg[1], machine, u );
542
               dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + t[3] * u[3];
543
               dot[1] = dot[2] = dot[3] = dot[0];
544
               store_vector4( &inst->DstReg, machine, dot );
545
            }
546
            break;
547
         case DST:
548
            {
549
               GLfloat t[4], u[4], dst[4];
550
               fetch_vector4( &inst->SrcReg[0], machine, t );
551
               fetch_vector4( &inst->SrcReg[1], machine, u );
552
               dst[0] = 1.0F;
553
               dst[1] = t[1] * u[1];
554
               dst[2] = t[2];
555
               dst[3] = u[3];
556
               store_vector4( &inst->DstReg, machine, dst );
557
            }
558
            break;
559
         case MIN:
560
            {
561
               GLfloat t[4], u[4], min[4];
562
               fetch_vector4( &inst->SrcReg[0], machine, t );
563
               fetch_vector4( &inst->SrcReg[1], machine, u );
564
               min[0] = (t[0] < u[0]) ? t[0] : u[0];
565
               min[1] = (t[1] < u[1]) ? t[1] : u[1];
566
               min[2] = (t[2] < u[2]) ? t[2] : u[2];
567
               min[3] = (t[3] < u[3]) ? t[3] : u[3];
568
               store_vector4( &inst->DstReg, machine, min );
569
            }
570
            break;
571
         case MAX:
572
            {
573
               GLfloat t[4], u[4], max[4];
574
               fetch_vector4( &inst->SrcReg[0], machine, t );
575
               fetch_vector4( &inst->SrcReg[1], machine, u );
576
               max[0] = (t[0] > u[0]) ? t[0] : u[0];
577
               max[1] = (t[1] > u[1]) ? t[1] : u[1];
578
               max[2] = (t[2] > u[2]) ? t[2] : u[2];
579
               max[3] = (t[3] > u[3]) ? t[3] : u[3];
580
               store_vector4( &inst->DstReg, machine, max );
581
            }
582
            break;
583
         case SLT:
584
            {
585
               GLfloat t[4], u[4], slt[4];
586
               fetch_vector4( &inst->SrcReg[0], machine, t );
587
               fetch_vector4( &inst->SrcReg[1], machine, u );
588
               slt[0] = (t[0] < u[0]) ? 1.0F : 0.0F;
589
               slt[1] = (t[1] < u[1]) ? 1.0F : 0.0F;
590
               slt[2] = (t[2] < u[2]) ? 1.0F : 0.0F;
591
               slt[3] = (t[3] < u[3]) ? 1.0F : 0.0F;
592
               store_vector4( &inst->DstReg, machine, slt );
593
            }
594
            break;
595
         case SGE:
596
            {
597
               GLfloat t[4], u[4], sge[4];
598
               fetch_vector4( &inst->SrcReg[0], machine, t );
599
               fetch_vector4( &inst->SrcReg[1], machine, u );
600
               sge[0] = (t[0] >= u[0]) ? 1.0F : 0.0F;
601
               sge[1] = (t[1] >= u[1]) ? 1.0F : 0.0F;
602
               sge[2] = (t[2] >= u[2]) ? 1.0F : 0.0F;
603
               sge[3] = (t[3] >= u[3]) ? 1.0F : 0.0F;
604
               store_vector4( &inst->DstReg, machine, sge );
605
            }
606
            break;
607
         case MAD:
608
            {
609
               GLfloat t[4], u[4], v[4], sum[4];
610
               fetch_vector4( &inst->SrcReg[0], machine, t );
611
               fetch_vector4( &inst->SrcReg[1], machine, u );
612
               fetch_vector4( &inst->SrcReg[2], machine, v );
613
               sum[0] = t[0] * u[0] + v[0];
614
               sum[1] = t[1] * u[1] + v[1];
615
               sum[2] = t[2] * u[2] + v[2];
616
               sum[3] = t[3] * u[3] + v[3];
617
               store_vector4( &inst->DstReg, machine, sum );
618
            }
619
            break;
620
         case ARL:
621
            {
622
               GLfloat t[4];
623
               fetch_vector4( &inst->SrcReg[0], machine, t );
624
               machine->AddressReg = (GLint) floor(t[0]);
625
            }
626
            break;
627
         case DPH:
628
            {
629
               GLfloat t[4], u[4], dot[4];
630
               fetch_vector4( &inst->SrcReg[0], machine, t );
631
               fetch_vector4( &inst->SrcReg[1], machine, u );
632
               dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + u[3];
633
               dot[1] = dot[2] = dot[3] = dot[0];
634
               store_vector4( &inst->DstReg, machine, dot );
635
            }
636
            break;
637
         case RCC:
638
            {
639
               GLfloat t[4], u;
640
               fetch_vector1( &inst->SrcReg[0], machine, t );
641
               if (t[0] == 1.0F)
642
                  u = 1.0F;
643
               else
644
                  u = 1.0F / t[0];
645
               if (u > 0.0F) {
646
                  if (u > 1.884467e+019F) {
647
                     u = 1.884467e+019F;  /* IEEE 32-bit binary value 0x5F800000 */
648
                  }
649
                  else if (u < 5.42101e-020F) {
650
                     u = 5.42101e-020F;   /* IEEE 32-bit binary value 0x1F800000 */
651
                  }
652
               }
653
               else {
654
                  if (u < -1.884467e+019F) {
655
                     u = -1.884467e+019F; /* IEEE 32-bit binary value 0xDF800000 */
656
                  }
657
                  else if (u > -5.42101e-020F) {
658
                     u = -5.42101e-020F;  /* IEEE 32-bit binary value 0x9F800000 */
659
                  }
660
               }
661
               t[0] = t[1] = t[2] = t[3] = u;
662
               store_vector4( &inst->DstReg, machine, t );
663
            }
664
            break;
665
         case SUB:
666
            {
667
               GLfloat t[4], u[4], sum[4];
668
               fetch_vector4( &inst->SrcReg[0], machine, t );
669
               fetch_vector4( &inst->SrcReg[1], machine, u );
670
               sum[0] = t[0] - u[0];
671
               sum[1] = t[1] - u[1];
672
               sum[2] = t[2] - u[2];
673
               sum[3] = t[3] - u[3];
674
               store_vector4( &inst->DstReg, machine, sum );
675
            }
676
            break;
677
         case ABS:
678
            {
679
               GLfloat t[4];
680
               fetch_vector4( &inst->SrcReg[0], machine, t );
681
               if (t[0] < 0.0)  t[0] = -t[0];
682
               if (t[1] < 0.0)  t[1] = -t[1];
683
               if (t[2] < 0.0)  t[2] = -t[2];
684
               if (t[3] < 0.0)  t[3] = -t[3];
685
               store_vector4( &inst->DstReg, machine, t );
686
            }
687
            break;
688
 
689
         case END:
690
            return;
691
         default:
692
            /* bad instruction opcode */
693
            _mesa_problem(ctx, "Bad VP Opcode in _mesa_exec_program");
694
            return;
695
      }
696
   }
697
}
698
 
699
 
700
 
701
/**
702
Thoughts on vertex program optimization:
703
 
704
The obvious thing to do is to compile the vertex program into X86/SSE/3DNow!
705
assembly code.  That will probably be a lot of work.
706
 
707
Another approach might be to replace the vp_instruction->Opcode field with
708
a pointer to a specialized C function which executes the instruction.
709
In particular we can write functions which skip swizzling, negating,
710
masking, relative addressing, etc. when they're not needed.
711
 
712
For example:
713
 
714
void simple_add( struct vp_instruction *inst )
715
{
716
   GLfloat *sum = machine->Registers[inst->DstReg.Register];
717
   GLfloat *a = machine->Registers[inst->SrcReg[0].Register];
718
   GLfloat *b = machine->Registers[inst->SrcReg[1].Register];
719
   sum[0] = a[0] + b[0];
720
   sum[1] = a[1] + b[1];
721
   sum[2] = a[2] + b[2];
722
   sum[3] = a[3] + b[3];
723
}
724
 
725
*/
726
 
727
/*
728
 
729
KW:
730
 
731
A first step would be to 'vectorize' the programs in the same way as
732
the normal transformation code in the tnl module.  Thus each opcode
733
takes zero or more input vectors (registers) and produces one or more
734
output vectors.
735
 
736
These operations would intially be coded in C, with machine-specific
737
assembly following, as is currently the case for matrix
738
transformations in the math/ directory.  The preprocessing scheme for
739
selecting simpler operations Brian describes above would also work
740
here.
741
 
742
This should give reasonable performance without excessive effort.
743
 
744
*/