WebSVN - shark - Blame - Rev 1618 - /shark/branches/xen/ports/mesa/src/vpexec.c

Rev	Author	Line No.	Line
55	pj	1	/* $Id: vpexec.c,v 1.1 2003-02-28 11:42:06 pj Exp $ */
		2
		3	/*
		4	* Mesa 3-D graphics library
		5	* Version: 4.1
		6	*
		7	* Copyright (C) 1999-2002 Brian Paul All Rights Reserved.
		8	*
		9	* Permission is hereby granted, free of charge, to any person obtaining a
		10	* copy of this software and associated documentation files (the "Software"),
		11	* to deal in the Software without restriction, including without limitation
		12	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		13	* and/or sell copies of the Software, and to permit persons to whom the
		14	* Software is furnished to do so, subject to the following conditions:
		15	*
		16	* The above copyright notice and this permission notice shall be included
		17	* in all copies or substantial portions of the Software.
		18	*
		19	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
		20	* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		21	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		22	* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
		23	* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
		24	* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
		25	*/
		26
		27	/*
		28	* -------- Regarding NV_vertex_program --------
		29	* Redistribution and use in source and binary forms, with or without
		30	* modification, are permitted provided that the following conditions are met:
		31	*
		32	* o Redistribution of the source code must contain a copyright notice
		33	* and this list of conditions;
		34	*
		35	* o Redistribution in binary and source code form must contain the
		36	* following Notice in the software and any documentation and/or other
		37	* materials provided with the distribution; and
		38	*
		39	* o The name of Nvidia may not be used to promote or endorse software
		40	* derived from the software.
		41	*
		42	* NOTICE: Nvidia hereby grants to each recipient a non-exclusive worldwide
		43	* royalty free patent license under patent claims that are licensable by
		44	* Nvidia and which are necessarily required and for which no commercially
		45	* viable non infringing alternative exists to make, use, sell, offer to sell,
		46	* import and otherwise transfer the vertex extension for the Mesa 3D Graphics
		47	* Library as distributed in source code and object code form. No hardware or
		48	* hardware implementation (including a semiconductor implementation and chips)
		49	* are licensed hereunder. If a recipient makes a patent claim or institutes
		50	* patent litigation against Nvidia or Nvidia's customers for use or sale of
		51	* Nvidia products, then this license grant as to such recipient shall
		52	* immediately terminate and recipient immediately agrees to cease use and
		53	* distribution of the Mesa Program and derivatives thereof.
		54	*
		55	* THE MESA 3D GRAPHICS LIBRARY IS PROVIDED ON AN "AS IS BASIS, WITHOUT
		56	* WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING,
		57	* WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-NFRINGEMENT
		58	* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
		59	*
		60	* NVIDIA SHALL NOT HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL,
		61	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION
		62	* LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
		63	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
		64	* ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE MESA 3D GRAPHICS
		65	* LIBRARY OR EVIDENCE OR THE EXERCISE OF ANY RIGHTS GRANTED HEREUNDR, EVEN
		66	* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
		67	*
		68	* If you do not comply with this agreement, then Nvidia may cancel the license
		69	* and rights granted herein.
		70	* ---------------------------------------------
		71	*/
		72
		73	/**
		74	* \file vpexec.c
		75	* \brief Code to execute vertex programs.
		76	* \author Brian Paul
		77	*/
		78
		79	#include "glheader.h"
		80	#include "context.h"
		81	#include "imports.h"
		82	#include "macros.h"
		83	#include "mtypes.h"
		84	#include "vpexec.h"
		85	#include "mmath.h"
		86	#include "math/m_matrix.h"
		87
		88
		89	/**
		90	* Load/initialize the vertex program registers.
		91	* This needs to be done per vertex.
		92	*/
		93	void
		94	_mesa_init_vp_registers(GLcontext *ctx)
		95	{
		96	struct vp_machine *machine = &(ctx->VertexProgram.Machine);
		97	GLuint i;
		98
		99	/* Input registers get initialized from the current vertex attribs */
		100	MEMCPY(machine->Registers[VP_INPUT_REG_START],
		101	ctx->Current.Attrib,
		102	16 * 4 * sizeof(GLfloat));
		103
		104	/* Output and temp regs are initialized to [0,0,0,1] */
		105	for (i = VP_OUTPUT_REG_START; i <= VP_OUTPUT_REG_END; i++) {
		106	machine->Registers[i][0] = 0.0F;
		107	machine->Registers[i][1] = 0.0F;
		108	machine->Registers[i][2] = 0.0F;
		109	machine->Registers[i][3] = 1.0F;
		110	}
		111	for (i = VP_TEMP_REG_START; i <= VP_TEMP_REG_END; i++) {
		112	machine->Registers[i][0] = 0.0F;
		113	machine->Registers[i][1] = 0.0F;
		114	machine->Registers[i][2] = 0.0F;
		115	machine->Registers[i][3] = 1.0F;
		116	}
		117
		118	/* The program regs aren't touched */
		119	}
		120
		121
		122
		123	/**
		124	* Copy the 16 elements of a matrix into four consecutive program
		125	* registers starting at 'pos'.
		126	*/
		127	static void
		128	load_matrix(GLfloat registers[][4], GLuint pos, const GLfloat mat[16])
		129	{
		130	GLuint i;
		131	pos += VP_PROG_REG_START;
		132	for (i = 0; i < 4; i++) {
		133	registers[pos + i][0] = mat[0 + i];
		134	registers[pos + i][1] = mat[4 + i];
		135	registers[pos + i][2] = mat[8 + i];
		136	registers[pos + i][3] = mat[12 + i];
		137	}
		138	}
		139
		140
		141	/**
		142	* As above, but transpose the matrix.
		143	*/
		144	static void
		145	load_transpose_matrix(GLfloat registers[][4], GLuint pos,
		146	const GLfloat mat[16])
		147	{
		148	pos += VP_PROG_REG_START;
		149	MEMCPY(registers[pos], mat, 16 * sizeof(GLfloat));
		150	}
		151
		152
		153	/**
		154	* Load all currently tracked matrices into the program registers.
		155	* This needs to be done per glBegin/glEnd.
		156	*/
		157	void
		158	_mesa_init_tracked_matrices(GLcontext *ctx)
		159	{
		160	GLuint i;
		161
		162	for (i = 0; i < VP_NUM_PROG_REGS / 4; i++) {
		163	/* point 'mat' at source matrix */
		164	GLmatrix *mat;
		165	if (ctx->VertexProgram.TrackMatrix[i] == GL_MODELVIEW) {
		166	mat = ctx->ModelviewMatrixStack.Top;
		167	}
		168	else if (ctx->VertexProgram.TrackMatrix[i] == GL_PROJECTION) {
		169	mat = ctx->ProjectionMatrixStack.Top;
		170	}
		171	else if (ctx->VertexProgram.TrackMatrix[i] == GL_TEXTURE) {
		172	mat = ctx->TextureMatrixStack[ctx->Texture.CurrentUnit].Top;
		173	}
		174	else if (ctx->VertexProgram.TrackMatrix[i] == GL_COLOR) {
		175	mat = ctx->ColorMatrixStack.Top;
		176	}
		177	else if (ctx->VertexProgram.TrackMatrix[i]==GL_MODELVIEW_PROJECTION_NV) {
		178	/* XXX verify the combined matrix is up to date */
		179	mat = &ctx->_ModelProjectMatrix;
		180	}
		181	else if (ctx->VertexProgram.TrackMatrix[i] >= GL_MATRIX0_NV &&
		182	ctx->VertexProgram.TrackMatrix[i] <= GL_MATRIX7_NV) {
		183	GLuint n = ctx->VertexProgram.TrackMatrix[i] - GL_MATRIX0_NV;
		184	ASSERT(n < MAX_PROGRAM_MATRICES);
		185	mat = ctx->ProgramMatrixStack[n].Top;
		186	}
		187	else {
		188	/* no matrix is tracked, but we leave the register values as-is */
		189	assert(ctx->VertexProgram.TrackMatrix[i] == GL_NONE);
		190	continue;
		191	}
		192
		193	/* load the matrix */
		194	if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_IDENTITY_NV) {
		195	load_matrix(ctx->VertexProgram.Machine.Registers, i*4, mat->m);
		196	}
		197	else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_INVERSE_NV) {
		198	_math_matrix_analyse(mat); /* update the inverse */
		199	assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
		200	load_matrix(ctx->VertexProgram.Machine.Registers, i*4, mat->inv);
		201	}
		202	else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_TRANSPOSE_NV) {
		203	load_transpose_matrix(ctx->VertexProgram.Machine.Registers, i*4, mat->m);
		204	}
		205	else {
		206	assert(ctx->VertexProgram.TrackMatrixTransform[i]
		207	== GL_INVERSE_TRANSPOSE_NV);
		208	_math_matrix_analyse(mat); /* update the inverse */
		209	assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
		210	load_transpose_matrix(ctx->VertexProgram.Machine.Registers,
		211	i*4, mat->inv);
		212	}
		213	}
		214	}
		215
		216
		217
		218	/**
		219	* For debugging. Dump the current vertex program machine registers.
		220	*/
		221	void
		222	_mesa_dump_vp_machine( const struct vp_machine *machine )
		223	{
		224	int i;
		225	_mesa_printf("VertexIn:\n");
		226	for (i = 0; i < VP_NUM_INPUT_REGS; i++) {
		227	_mesa_printf("%d: %f %f %f %f ", i,
		228	machine->Registers[i + VP_INPUT_REG_START][0],
		229	machine->Registers[i + VP_INPUT_REG_START][1],
		230	machine->Registers[i + VP_INPUT_REG_START][2],
		231	machine->Registers[i + VP_INPUT_REG_START][3]);
		232	}
		233	_mesa_printf("\n");
		234
		235	_mesa_printf("VertexOut:\n");
		236	for (i = 0; i < VP_NUM_OUTPUT_REGS; i++) {
		237	_mesa_printf("%d: %f %f %f %f ", i,
		238	machine->Registers[i + VP_OUTPUT_REG_START][0],
		239	machine->Registers[i + VP_OUTPUT_REG_START][1],
		240	machine->Registers[i + VP_OUTPUT_REG_START][2],
		241	machine->Registers[i + VP_OUTPUT_REG_START][3]);
		242	}
		243	_mesa_printf("\n");
		244
		245	_mesa_printf("Registers:\n");
		246	for (i = 0; i < VP_NUM_TEMP_REGS; i++) {
		247	_mesa_printf("%d: %f %f %f %f ", i,
		248	machine->Registers[i + VP_TEMP_REG_START][0],
		249	machine->Registers[i + VP_TEMP_REG_START][1],
		250	machine->Registers[i + VP_TEMP_REG_START][2],
		251	machine->Registers[i + VP_TEMP_REG_START][3]);
		252	}
		253	_mesa_printf("\n");
		254
		255	_mesa_printf("Parameters:\n");
		256	for (i = 0; i < VP_NUM_PROG_REGS; i++) {
		257	_mesa_printf("%d: %f %f %f %f ", i,
		258	machine->Registers[i + VP_PROG_REG_START][0],
		259	machine->Registers[i + VP_PROG_REG_START][1],
		260	machine->Registers[i + VP_PROG_REG_START][2],
		261	machine->Registers[i + VP_PROG_REG_START][3]);
		262	}
		263	_mesa_printf("\n");
		264	}
		265
		266
		267	/**
		268	* Fetch a 4-element float vector from the given source register.
		269	* Apply swizzling and negating as needed.
		270	*/
		271	static void
		272	fetch_vector4( const struct vp_src_register *source,
		273	const struct vp_machine *machine,
		274	GLfloat result[4] )
		275	{
		276	static const GLfloat zero[4] = { 0, 0, 0, 0 };
		277	const GLfloat *src;
		278
		279	if (source->RelAddr) {
		280	GLint reg = source->Register + machine->AddressReg;
		281	if (reg < VP_PROG_REG_START \|\| reg > VP_PROG_REG_END)
		282	src = zero;
		283	else
		284	src = machine->Registers[reg];
		285	}
		286	else {
		287	src = machine->Registers[source->Register];
		288	}
		289
		290	if (source->Negate) {
		291	result[0] = -src[source->Swizzle[0]];
		292	result[1] = -src[source->Swizzle[1]];
		293	result[2] = -src[source->Swizzle[2]];
		294	result[3] = -src[source->Swizzle[3]];
		295	}
		296	else {
		297	result[0] = src[source->Swizzle[0]];
		298	result[1] = src[source->Swizzle[1]];
		299	result[2] = src[source->Swizzle[2]];
		300	result[3] = src[source->Swizzle[3]];
		301	}
		302	}
		303
		304
		305	/**
		306	* As above, but only return result[0] element.
		307	*/
		308	static void
		309	fetch_vector1( const struct vp_src_register *source,
		310	const struct vp_machine *machine,
		311	GLfloat result[4] )
		312	{
		313	static const GLfloat zero[4] = { 0, 0, 0, 0 };
		314	const GLfloat *src;
		315
		316	if (source->RelAddr) {
		317	GLint reg = source->Register + machine->AddressReg;
		318	if (reg < VP_PROG_REG_START \|\| reg > VP_PROG_REG_END)
		319	src = zero;
		320	else
		321	src = machine->Registers[reg];
		322	}
		323	else {
		324	src = machine->Registers[source->Register];
		325	}
		326
		327	if (source->Negate) {
		328	result[0] = -src[source->Swizzle[0]];
		329	}
		330	else {
		331	result[0] = src[source->Swizzle[0]];
		332	}
		333	}
		334
		335
		336	/**
		337	* Store 4 floats into a register.
		338	*/
		339	static void
		340	store_vector4( const struct vp_dst_register dest, struct vp_machine machine,
		341	const GLfloat value[4] )
		342	{
		343	GLfloat *dst = machine->Registers[dest->Register];
		344
		345	if (dest->WriteMask[0])
		346	dst[0] = value[0];
		347	if (dest->WriteMask[1])
		348	dst[1] = value[1];
		349	if (dest->WriteMask[2])
		350	dst[2] = value[2];
		351	if (dest->WriteMask[3])
		352	dst[3] = value[3];
		353	}
		354
		355
		356	/**
		357	* Set x to positive or negative infinity.
		358	*/
		359	#ifdef USE_IEEE
		360	#define SET_POS_INFINITY(x) ( ((GLuint ) &x) = 0x7F800000 )
		361	#define SET_NEG_INFINITY(x) ( ((GLuint ) &x) = 0xFF800000 )
		362	#elif defined(VMS)
		363	#define SET_POS_INFINITY(x) x = __MAXFLOAT
		364	#define SET_NEG_INFINITY(x) x = -__MAXFLOAT
		365	#else
		366	#define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
		367	#define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
		368	#endif
		369
		370	#define SET_FLOAT_BITS(x, bits) ((fi_type *) &(x))->i = bits
		371
		372
		373	/**
		374	* Execute the given vertex program
		375	*/
		376	void
		377	_mesa_exec_program(GLcontext ctx, const struct vp_program program)
		378	{
		379	struct vp_machine *machine = &ctx->VertexProgram.Machine;
		380	const struct vp_instruction *inst;
		381
		382	/* XXX load vertex fields into input registers */
		383	/* and do other initialization */
		384
		385
		386	for (inst = program->Instructions; inst->Opcode !=END; inst++) {
		387	switch (inst->Opcode) {
		388	case MOV:
		389	{
		390	GLfloat t[4];
		391	fetch_vector4( &inst->SrcReg[0], machine, t );
		392	store_vector4( &inst->DstReg, machine, t );
		393	}
		394	break;
		395	case LIT:
		396	{
		397	const GLfloat epsilon = 1.0e-5F; /* XXX fix? */
		398	GLfloat t[4], lit[4];
		399	fetch_vector4( &inst->SrcReg[0], machine, t );
		400	if (t[3] < -(128.0F - epsilon))
		401	t[3] = - (128.0F - epsilon);
		402	else if (t[3] > 128.0F - epsilon)
		403	t[3] = 128.0F - epsilon;
		404	if (t[0] < 0.0)
		405	t[0] = 0.0;
		406	if (t[1] < 0.0)
		407	t[1] = 0.0;
		408	lit[0] = 1.0;
		409	lit[1] = t[0];
		410	lit[2] = (t[0] > 0.0) ? (GLfloat) exp(t[3] * log(t[1])) : 0.0F;
		411	lit[3] = 1.0;
		412	store_vector4( &inst->DstReg, machine, lit );
		413	}
		414	break;
		415	case RCP:
		416	{
		417	GLfloat t[4];
		418	fetch_vector1( &inst->SrcReg[0], machine, t );
		419	if (t[0] != 1.0F)
		420	t[0] = 1.0F / t[0]; /* div by zero is infinity! */
		421	t[1] = t[2] = t[3] = t[0];
		422	store_vector4( &inst->DstReg, machine, t );
		423	}
		424	break;
		425	case RSQ:
		426	{
		427	GLfloat t[4];
		428	fetch_vector1( &inst->SrcReg[0], machine, t );
		429	t[0] = (float) (1.0 / sqrt(fabs(t[0])));
		430	t[1] = t[2] = t[3] = t[0];
		431	store_vector4( &inst->DstReg, machine, t );
		432	}
		433	break;
		434	case EXP:
		435	{
		436	GLfloat t[4], q[4], floor_t0;
		437	fetch_vector1( &inst->SrcReg[0], machine, t );
		438	floor_t0 = (float) floor(t[0]);
		439	if (floor_t0 > FLT_MAX_EXP) {
		440	SET_POS_INFINITY(q[0]);
		441	q[1] = 0.0F;
		442	SET_POS_INFINITY(q[2]);
		443	q[3] = 1.0F;
		444	}
		445	else if (floor_t0 < FLT_MIN_EXP) {
		446	q[0] = 0.0F;
		447	q[1] = 0.0F;
		448	q[2] = 0.0F;
		449	q[3] = 0.0F;
		450	}
		451	else {
		452	#ifdef USE_IEEE
		453	GLint ii = (GLint) floor_t0;
		454	ii = (ii < 23) + 0x3f800000;
		455	SET_FLOAT_BITS(q[0], ii);
		456	q[0] = ((GLfloat ) &ii);
		457	#else
		458	q[0] = (GLfloat) pow(2.0, floor_t0);
		459	#endif
		460	q[1] = t[0] - floor_t0;
		461	q[2] = (GLfloat) (q[0] * LOG2(q[1]));
		462	q[3] = 1.0F;
		463	}
		464	store_vector4( &inst->DstReg, machine, t );
		465	}
		466	break;
		467	case LOG:
		468	{
		469	GLfloat t[4], q[4], abs_t0;
		470	fetch_vector1( &inst->SrcReg[0], machine, t );
		471	abs_t0 = (GLfloat) fabs(t[0]);
		472	if (abs_t0 != 0.0F) {
		473	/* Since we really can't handle infinite values on VMS
		474	* like other OSes we'll use __MAXFLOAT to represent
		475	* infinity. This may need some tweaking.
		476	*/
		477	#ifdef VMS
		478	if (abs_t0 == __MAXFLOAT) {
		479	#else
		480	if (IS_INF_OR_NAN(abs_t0)) {
		481	#endif
		482	SET_POS_INFINITY(q[0]);
		483	q[1] = 1.0F;
		484	SET_POS_INFINITY(q[2]);
		485	}
		486	else {
		487	int exponent;
		488	double mantissa = frexp(t[0], &exponent);
		489	q[0] = (GLfloat) (exponent - 1);
		490	q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
		491	q[2] = (GLfloat) (q[0] + LOG2(q[1]));
		492	}
		493	}
		494	else {
		495	SET_NEG_INFINITY(q[0]);
		496	q[1] = 1.0F;
		497	SET_NEG_INFINITY(q[2]);
		498	}
		499	q[3] = 1.0;
		500	store_vector4( &inst->DstReg, machine, q );
		501	}
		502	break;
		503	case MUL:
		504	{
		505	GLfloat t[4], u[4], prod[4];
		506	fetch_vector4( &inst->SrcReg[0], machine, t );
		507	fetch_vector4( &inst->SrcReg[1], machine, u );
		508	prod[0] = t[0] * u[0];
		509	prod[1] = t[1] * u[1];
		510	prod[2] = t[2] * u[2];
		511	prod[3] = t[3] * u[3];
		512	store_vector4( &inst->DstReg, machine, prod );
		513	}
		514	break;
		515	case ADD:
		516	{
		517	GLfloat t[4], u[4], sum[4];
		518	fetch_vector4( &inst->SrcReg[0], machine, t );
		519	fetch_vector4( &inst->SrcReg[1], machine, u );
		520	sum[0] = t[0] + u[0];
		521	sum[1] = t[1] + u[1];
		522	sum[2] = t[2] + u[2];
		523	sum[3] = t[3] + u[3];
		524	store_vector4( &inst->DstReg, machine, sum );
		525	}
		526	break;
		527	case DP3:
		528	{
		529	GLfloat t[4], u[4], dot[4];
		530	fetch_vector4( &inst->SrcReg[0], machine, t );
		531	fetch_vector4( &inst->SrcReg[1], machine, u );
		532	dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2];
		533	dot[1] = dot[2] = dot[3] = dot[0];
		534	store_vector4( &inst->DstReg, machine, dot );
		535	}
		536	break;
		537	case DP4:
		538	{
		539	GLfloat t[4], u[4], dot[4];
		540	fetch_vector4( &inst->SrcReg[0], machine, t );
		541	fetch_vector4( &inst->SrcReg[1], machine, u );
		542	dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + t[3] * u[3];
		543	dot[1] = dot[2] = dot[3] = dot[0];
		544	store_vector4( &inst->DstReg, machine, dot );
		545	}
		546	break;
		547	case DST:
		548	{
		549	GLfloat t[4], u[4], dst[4];
		550	fetch_vector4( &inst->SrcReg[0], machine, t );
		551	fetch_vector4( &inst->SrcReg[1], machine, u );
		552	dst[0] = 1.0F;
		553	dst[1] = t[1] * u[1];
		554	dst[2] = t[2];
		555	dst[3] = u[3];
		556	store_vector4( &inst->DstReg, machine, dst );
		557	}
		558	break;
		559	case MIN:
		560	{
		561	GLfloat t[4], u[4], min[4];
		562	fetch_vector4( &inst->SrcReg[0], machine, t );
		563	fetch_vector4( &inst->SrcReg[1], machine, u );
		564	min[0] = (t[0] < u[0]) ? t[0] : u[0];
		565	min[1] = (t[1] < u[1]) ? t[1] : u[1];
		566	min[2] = (t[2] < u[2]) ? t[2] : u[2];
		567	min[3] = (t[3] < u[3]) ? t[3] : u[3];
		568	store_vector4( &inst->DstReg, machine, min );
		569	}
		570	break;
		571	case MAX:
		572	{
		573	GLfloat t[4], u[4], max[4];
		574	fetch_vector4( &inst->SrcReg[0], machine, t );
		575	fetch_vector4( &inst->SrcReg[1], machine, u );
		576	max[0] = (t[0] > u[0]) ? t[0] : u[0];
		577	max[1] = (t[1] > u[1]) ? t[1] : u[1];
		578	max[2] = (t[2] > u[2]) ? t[2] : u[2];
		579	max[3] = (t[3] > u[3]) ? t[3] : u[3];
		580	store_vector4( &inst->DstReg, machine, max );
		581	}
		582	break;
		583	case SLT:
		584	{
		585	GLfloat t[4], u[4], slt[4];
		586	fetch_vector4( &inst->SrcReg[0], machine, t );
		587	fetch_vector4( &inst->SrcReg[1], machine, u );
		588	slt[0] = (t[0] < u[0]) ? 1.0F : 0.0F;
		589	slt[1] = (t[1] < u[1]) ? 1.0F : 0.0F;
		590	slt[2] = (t[2] < u[2]) ? 1.0F : 0.0F;
		591	slt[3] = (t[3] < u[3]) ? 1.0F : 0.0F;
		592	store_vector4( &inst->DstReg, machine, slt );
		593	}
		594	break;
		595	case SGE:
		596	{
		597	GLfloat t[4], u[4], sge[4];
		598	fetch_vector4( &inst->SrcReg[0], machine, t );
		599	fetch_vector4( &inst->SrcReg[1], machine, u );
		600	sge[0] = (t[0] >= u[0]) ? 1.0F : 0.0F;
		601	sge[1] = (t[1] >= u[1]) ? 1.0F : 0.0F;
		602	sge[2] = (t[2] >= u[2]) ? 1.0F : 0.0F;
		603	sge[3] = (t[3] >= u[3]) ? 1.0F : 0.0F;
		604	store_vector4( &inst->DstReg, machine, sge );
		605	}
		606	break;
		607	case MAD:
		608	{
		609	GLfloat t[4], u[4], v[4], sum[4];
		610	fetch_vector4( &inst->SrcReg[0], machine, t );
		611	fetch_vector4( &inst->SrcReg[1], machine, u );
		612	fetch_vector4( &inst->SrcReg[2], machine, v );
		613	sum[0] = t[0] * u[0] + v[0];
		614	sum[1] = t[1] * u[1] + v[1];
		615	sum[2] = t[2] * u[2] + v[2];
		616	sum[3] = t[3] * u[3] + v[3];
		617	store_vector4( &inst->DstReg, machine, sum );
		618	}
		619	break;
		620	case ARL:
		621	{
		622	GLfloat t[4];
		623	fetch_vector4( &inst->SrcReg[0], machine, t );
		624	machine->AddressReg = (GLint) floor(t[0]);
		625	}
		626	break;
		627	case DPH:
		628	{
		629	GLfloat t[4], u[4], dot[4];
		630	fetch_vector4( &inst->SrcReg[0], machine, t );
		631	fetch_vector4( &inst->SrcReg[1], machine, u );
		632	dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + u[3];
		633	dot[1] = dot[2] = dot[3] = dot[0];
		634	store_vector4( &inst->DstReg, machine, dot );
		635	}
		636	break;
		637	case RCC:
		638	{
		639	GLfloat t[4], u;
		640	fetch_vector1( &inst->SrcReg[0], machine, t );
		641	if (t[0] == 1.0F)
		642	u = 1.0F;
		643	else
		644	u = 1.0F / t[0];
		645	if (u > 0.0F) {
		646	if (u > 1.884467e+019F) {
		647	u = 1.884467e+019F; /* IEEE 32-bit binary value 0x5F800000 */
		648	}
		649	else if (u < 5.42101e-020F) {
		650	u = 5.42101e-020F; /* IEEE 32-bit binary value 0x1F800000 */
		651	}
		652	}
		653	else {
		654	if (u < -1.884467e+019F) {
		655	u = -1.884467e+019F; /* IEEE 32-bit binary value 0xDF800000 */
		656	}
		657	else if (u > -5.42101e-020F) {
		658	u = -5.42101e-020F; /* IEEE 32-bit binary value 0x9F800000 */
		659	}
		660	}
		661	t[0] = t[1] = t[2] = t[3] = u;
		662	store_vector4( &inst->DstReg, machine, t );
		663	}
		664	break;
		665	case SUB:
		666	{
		667	GLfloat t[4], u[4], sum[4];
		668	fetch_vector4( &inst->SrcReg[0], machine, t );
		669	fetch_vector4( &inst->SrcReg[1], machine, u );
		670	sum[0] = t[0] - u[0];
		671	sum[1] = t[1] - u[1];
		672	sum[2] = t[2] - u[2];
		673	sum[3] = t[3] - u[3];
		674	store_vector4( &inst->DstReg, machine, sum );
		675	}
		676	break;
		677	case ABS:
		678	{
		679	GLfloat t[4];
		680	fetch_vector4( &inst->SrcReg[0], machine, t );
		681	if (t[0] < 0.0) t[0] = -t[0];
		682	if (t[1] < 0.0) t[1] = -t[1];
		683	if (t[2] < 0.0) t[2] = -t[2];
		684	if (t[3] < 0.0) t[3] = -t[3];
		685	store_vector4( &inst->DstReg, machine, t );
		686	}
		687	break;
		688
		689	case END:
		690	return;
		691	default:
		692	/* bad instruction opcode */
		693	_mesa_problem(ctx, "Bad VP Opcode in _mesa_exec_program");
		694	return;
		695	}
		696	}
		697	}
		698
		699
		700
		701	/**
		702	Thoughts on vertex program optimization:
		703
		704	The obvious thing to do is to compile the vertex program into X86/SSE/3DNow!
		705	assembly code. That will probably be a lot of work.
		706
		707	Another approach might be to replace the vp_instruction->Opcode field with
		708	a pointer to a specialized C function which executes the instruction.
		709	In particular we can write functions which skip swizzling, negating,
		710	masking, relative addressing, etc. when they're not needed.
		711
		712	For example:
		713
		714	void simple_add( struct vp_instruction *inst )
		715	{
		716	GLfloat *sum = machine->Registers[inst->DstReg.Register];
		717	GLfloat *a = machine->Registers[inst->SrcReg[0].Register];
		718	GLfloat *b = machine->Registers[inst->SrcReg[1].Register];
		719	sum[0] = a[0] + b[0];
		720	sum[1] = a[1] + b[1];
		721	sum[2] = a[2] + b[2];
		722	sum[3] = a[3] + b[3];
		723	}
		724
		725	*/
		726
		727	/*
		728
		729	KW:
		730
		731	A first step would be to 'vectorize' the programs in the same way as
		732	the normal transformation code in the tnl module. Thus each opcode
		733	takes zero or more input vectors (registers) and produces one or more
		734	output vectors.
		735
		736	These operations would intially be coded in C, with machine-specific
		737	assembly following, as is currently the case for matrix
		738	transformations in the math/ directory. The preprocessing scheme for
		739	selecting simpler operations Brian describes above would also work
		740	here.
		741
		742	This should give reasonable performance without excessive effort.
		743
		744	*/

Subversion Repositories shark

(root)/shark/branches/xen/ports/mesa/src/vpexec.c - Rev 1618