Subversion Repositories shark

Compare Revisions

Ignore whitespace Rev 135 → Rev 134

/shark/trunk/ports/mesa/src/tmp_texformat.h
File deleted
/shark/trunk/ports/mesa/src/makefile
27,12 → 27,12
./swrast/s_span.o ./swrast/s_stencil.o ./swrast/s_texstore.o ./swrast/s_texture.o\
./swrast/s_triangle.o ./swrast/s_zoom.o ./swrast_setup/ss_context.o\
./swrast_setup/ss_triangle.o ./swrast_setup/ss_vb.o ./tnl/t_array_api.o\
./tnl/t_import_array.o ./tnl/t_context.o ./tnl/t_eval_api.o ./tnl/t_imm_alloc.o\
./tnl/t_array_import.o ./tnl/t_context.o ./tnl/t_eval_api.o ./tnl/t_imm_alloc.o\
./tnl/t_imm_api.o ./tnl/t_imm_debug.o ./tnl/t_imm_dlist.o ./tnl/t_imm_elt.o\
./tnl/t_imm_eval.o ./tnl/t_imm_exec.o ./tnl/t_imm_fixup.o ./tnl/t_pipeline.o\
./tnl/t_vb_fog.o ./tnl/t_vb_light.o\
./tnl/t_vb_normals.o ./tnl/t_vb_points.o ./tnl/t_vb_program.o ./tnl/t_vb_render.o\
./tnl/t_vb_gentex.o ./tnl/t_vb_texmat.o ./tnl/t_vb_vertex.o\
./tnl/t_vb_texgen.o ./tnl/t_vb_texmat.o ./tnl/t_vb_vertex.o\
./math/m_clip_debug.o ./math/m_norm_debug.o\
./math/m_xform_debug.o ./math/m_eval.o ./math/m_matrix.o ./math/m_translate.o\
./math/m_vector.o ./math/m_xform.o\
/shark/trunk/ports/mesa/src/x86/sse.h
0,0 → 1,39
/* $Id: sse.h,v 1.1 2003-03-13 12:11:49 giacomo Exp $ */
 
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
 
/*
* PentiumIII-SIMD (SSE) optimizations contributed by
* Andre Werthmann <wertmann@cs.uni-potsdam.de>
*/
 
#ifndef __SSE_H__
#define __SSE_H__
 
#include "math/m_xform.h"
 
void _mesa_init_sse_transform_asm( void );
 
#endif
/shark/trunk/ports/mesa/src/x86/3dnow_xform1.s
0,0 → 1,423
/* $Id: 3dnow_xform1.s,v 1.1 2003-03-13 12:11:47 giacomo Exp $ */
 
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "matypes.h"
#include "xform_args.h"
 
SEG_TEXT
 
#define FRAME_OFFSET 4
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points1_general )
GLNAME( _mesa_3dnow_transform_points1_general ):
 
PUSH_L ( ESI )
 
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(4, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPGR_3 ) )
 
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */
MOVQ ( REGOFF(8, ECX), MM1 ) /* m03 | m02 */
 
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
MOVQ ( REGOFF(56, ECX), MM3 ) /* m33 | m32 */
 
ALIGNTEXT16
LLBL( G3TPGR_2 ):
 
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */
 
MOVQ ( MM4, MM5 ) /* x0 | x0 */
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */
 
PFMUL ( MM1, MM5 ) /* x0*m03 | x0*m02 */
PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */
 
PFADD ( MM3, MM5 ) /* x0*m03+m33 | x0*m02+m32 */
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
 
MOVQ ( MM5, REGOFF(8, EDX) ) /* write r3, r2 */
ADD_L ( EDI, EAX ) /* next vertex */
 
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
 
JNZ ( LLBL( G3TPGR_2 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TPGR_3 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points1_identity )
GLNAME( _mesa_3dnow_transform_points1_identity ):
 
PUSH_L ( ESI )
 
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(1), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_1), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(4, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPIR_4) )
 
ALIGNTEXT16
LLBL( G3TPIR_3 ):
 
MOVD ( REGIND(EAX), MM0 ) /* | x0 */
ADD_L ( EDI, EAX ) /* next vertex */
 
MOVD ( MM0, REGIND(EDX) ) /* | r0 */
ADD_L ( CONST(16), EDX ) /* next r */
 
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPIR_3 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TPIR_4 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points1_3d_no_rot )
GLNAME( _mesa_3dnow_transform_points1_3d_no_rot ):
 
PUSH_L ( ESI )
 
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(4, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP3NRR_3 ) )
 
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
 
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
 
ALIGNTEXT16
LLBL( G3TP3NRR_2 ):
 
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
PFMUL ( MM0, MM4 ) /* | x0*m00 */
 
PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
 
MOVD ( MM3, REGOFF(8, EDX) ) /* write r2 */
ADD_L ( EDI, EAX ) /* next vertex */
 
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
 
JNZ ( LLBL( G3TP3NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TP3NRR_3 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points1_perspective )
GLNAME( _mesa_3dnow_transform_points1_perspective ):
 
PUSH_L ( ESI )
 
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(4, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPPR_3 ) )
 
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
 
ALIGNTEXT16
LLBL( G3TPPR_2 ):
 
MOVD ( REGIND(EAX), MM4 ) /* 0 | x0 */
PFMUL ( MM0, MM4 ) /* 0 | x0*m00 */
 
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
MOVQ ( MM3, REGOFF(8, EDX) ) /* write r2 (=m32), r3 (=0) */
 
ADD_L ( EDI, EAX ) /* next vertex */
ADD_L ( CONST(16), EDX ) /* next r */
 
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPPR_2 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TPPR_3 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points1_2d )
GLNAME( _mesa_3dnow_transform_points1_2d ):
 
PUSH_L ( ESI )
 
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(4, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP2R_3 ) )
 
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
 
ALIGNTEXT16
LLBL( G3TP2R_2 ):
 
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */
 
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */
PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */
 
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
ADD_L ( EDI, EAX ) /* next vertex */
 
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
 
JNZ ( LLBL( G3TP2R_2 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TP2R_3 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points1_2d_no_rot )
GLNAME( _mesa_3dnow_transform_points1_2d_no_rot ):
 
PUSH_L ( ESI )
 
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(4, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP2NRR_3 ) )
 
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
 
ALIGNTEXT16
LLBL( G3TP2NRR_2 ):
 
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
ADD_L ( EDI, EAX ) /* next vertex */
 
PFMUL ( MM0, MM4 ) /* | x0*m00 */
PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */
 
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
ADD_L ( CONST(16), EDX ) /* next r */
 
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TP2NRR_3 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points1_3d )
GLNAME( _mesa_3dnow_transform_points1_3d ):
 
PUSH_L ( ESI )
 
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(4, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP3R_3 ) )
 
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */
MOVD ( REGOFF(8, ECX), MM1 ) /* | m02 */
 
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
 
ALIGNTEXT16
LLBL( G3TP3R_2 ):
 
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */
 
MOVQ ( MM4, MM5 ) /* | x0 */
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */
 
PFMUL ( MM1, MM5 ) /* | x0*m02 */
PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */
 
PFADD ( MM3, MM5 ) /* | x0*m02+m32 */
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
 
MOVD ( MM5, REGOFF(8, EDX) ) /* write r2 */
ADD_L ( EDI, EAX ) /* next vertex */
 
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
 
JNZ ( LLBL( G3TP3R_2 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TP3R_3 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
/shark/trunk/ports/mesa/src/x86/3dnow_xform2.s
0,0 → 1,464
/* $Id: 3dnow_xform2.s,v 1.1 2003-03-13 12:11:48 giacomo Exp $ */
 
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "matypes.h"
#include "xform_args.h"
 
SEG_TEXT
 
#define FRAME_OFFSET 4
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points2_general )
GLNAME( _mesa_3dnow_transform_points2_general ):
 
PUSH_L ( ESI )
 
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPGR_3 ) )
 
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */
 
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */
 
MOVD ( REGOFF(8, ECX), MM2 ) /* | m02 */
PUNPCKLDQ ( REGOFF(24, ECX), MM2 ) /* m12 | m02 */
 
MOVD ( REGOFF(12, ECX), MM3 ) /* | m03 */
PUNPCKLDQ ( REGOFF(28, ECX), MM3 ) /* m13 | m03 */
 
MOVQ ( REGOFF(48, ECX), MM4 ) /* m31 | m30 */
MOVQ ( REGOFF(56, ECX), MM5 ) /* m33 | m32 */
 
ALIGNTEXT16
LLBL( G3TPGR_2 ):
 
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */
MOVQ ( MM6, MM7 ) /* x1 | x0 */
 
PFMUL ( MM0, MM6 ) /* x1*m10 | x0*m00 */
PFMUL ( MM1, MM7 ) /* x1*m11 | x0*m01 */
 
PFACC ( MM7, MM6 ) /* x0*m01+x1*m11 | x0*x00+x1*m10 */
PFADD ( MM4, MM6 ) /* x0*...*m11+m31 | x0*...*m10+m30 */
 
MOVQ ( MM6, REGIND(EDX) ) /* write r1, r0 */
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */
 
MOVQ ( MM6, MM7 ) /* x1 | x0 */
PFMUL ( MM2, MM6 ) /* x1*m12 | x0*m02 */
 
PFMUL ( MM3, MM7 ) /* x1*m13 | x0*m03 */
ADD_L ( EDI, EAX ) /* next vertex */
 
PFACC ( MM7, MM6 ) /* x0*m03+x1*m13 | x0*x02+x1*m12 */
PFADD ( MM5, MM6 ) /* x0*...*m13+m33 | x0*...*m12+m32 */
 
MOVQ ( MM6, REGOFF(8, EDX) ) /* write r3, r2 */
ADD_L ( CONST(16), EDX ) /* next r */
 
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPGR_2 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TPGR_3 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points2_perspective )
GLNAME( _mesa_3dnow_transform_points2_perspective ):
 
PUSH_L ( ESI )
 
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPPR_3 ) )
 
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
 
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
 
ALIGNTEXT16
LLBL( G3TPPR_2 ):
 
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
 
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
MOVQ ( MM3, REGOFF(8, EDX) ) /* write r2 (=m32), r3 (=0) */
 
ADD_L ( EDI, EAX ) /* next vertex */
ADD_L ( CONST(16), EDX ) /* next r */
 
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPPR_2 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TPPR_3 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points2_3d )
GLNAME( _mesa_3dnow_transform_points2_3d ):
 
PUSH_L ( ESI )
 
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_3 ), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP3R_3 ) )
 
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */
 
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */
 
MOVD ( REGOFF(8, ECX), MM2 ) /* | m02 */
PUNPCKLDQ ( REGOFF(24, ECX), MM2 ) /* m12 | m02 */
 
MOVQ ( REGOFF(48, ECX), MM4 ) /* m31 | m30 */
MOVD ( REGOFF(56, ECX), MM5 ) /* | m32 */
 
ALIGNTEXT16
LLBL( G3TP3R_2 ):
 
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */
MOVQ ( MM6, MM7 ) /* x1 | x0 */
 
PFMUL ( MM0, MM6 ) /* x1*m10 | x0*m00 */
PFMUL ( MM1, MM7 ) /* x1*m11 | x0*m01 */
 
PFACC ( MM7, MM6 ) /* x0*m01+x1*m11 | x0*x00+x1*m10 */
PFADD ( MM4, MM6 ) /* x0*...*m11+m31 | x0*...*m10+m30 */
 
MOVQ ( MM6, REGIND(EDX) ) /* write r1, r0 */
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */
 
MOVQ ( MM6, MM7 ) /* x1 | x0 */
PFMUL ( MM2, MM6 ) /* x1*m12 | x0*m02 */
 
PFACC ( MM7, MM6 ) /* ***trash*** | x0*x02+x1*m12 */
PFADD ( MM5, MM6 ) /* ***trash*** | x0*...*m12+m32 */
 
MOVD ( MM6, REGOFF(8, EDX) ) /* write r2 */
ADD_L ( EDI, EAX ) /* next vertex */
 
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
 
JNZ ( LLBL( G3TP3R_2 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TP3R_3 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points2_3d_no_rot )
GLNAME( _mesa_3dnow_transform_points2_3d_no_rot ):
 
PUSH_L ( ESI )
 
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_3 ), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP3NRR_3 ) )
 
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
 
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
 
ALIGNTEXT16
LLBL( G3TP3NRR_2 ):
 
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
 
PFADD ( MM2, MM4 ) /* x1*m11+m31 | x0*m00+m30 */
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
 
MOVD ( MM3, REGOFF(8, EDX) ) /* write r2 */
ADD_L ( EDI, EAX ) /* next vertex */
 
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
 
JNZ ( LLBL( G3TP3NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TP3NRR_3 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points2_2d )
GLNAME( _mesa_3dnow_transform_points2_2d ):
 
PUSH_L ( ESI )
 
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP2R_3 ) )
 
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */
MOVQ ( REGOFF(16, ECX), MM1 ) /* m11 | m10 */
 
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
 
ALIGNTEXT16
LLBL( G3TP2R_2 ):
 
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
MOVD ( REGOFF(4, EAX), MM5 ) /* | x1 */
 
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */
ADD_L ( EDI, EAX ) /* next vertex */
 
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */
PUNPCKLDQ ( MM5, MM5 ) /* x1 | x1 */
 
PFMUL ( MM1, MM5 ) /* x1*m11 | x1*m10 */
PFADD ( MM2, MM4 ) /* x...x1*m11+31 | x0*..*m10+m30 */
 
PFADD ( MM5, MM4 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
 
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
 
JNZ ( LLBL( G3TP2R_2 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TP2R_3 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points2_2d_no_rot )
GLNAME( _mesa_3dnow_transform_points2_2d_no_rot ):
 
PUSH_L ( ESI )
 
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP2NRR_3 ) )
 
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
 
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
 
ALIGNTEXT16
LLBL( G3TP2NRR_2 ):
 
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
ADD_L ( EDI, EAX ) /* next vertex */
 
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */
 
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
ADD_L ( CONST(16), EDX ) /* next r */
 
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TP2NRR_3 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points2_identity )
GLNAME( _mesa_3dnow_transform_points2_identity ):
 
PUSH_L ( ESI )
 
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPIR_3 ) )
 
ALIGNTEXT16
LLBL( G3TPIR_3 ):
 
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
ADD_L ( EDI, EAX ) /* next vertex */
 
MOVQ ( MM0, REGIND(EDX) ) /* r1 | r0 */
ADD_L ( CONST(16), EDX ) /* next r */
 
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPIR_3 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TPIR_4 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
/shark/trunk/ports/mesa/src/x86/3dnow_xform3.s
0,0 → 1,548
/* $Id: 3dnow_xform3.s,v 1.1 2003-03-13 12:11:48 giacomo Exp $ */
 
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "matypes.h"
#include "xform_args.h"
 
SEG_TEXT
 
#define FRAME_OFFSET 4
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points3_general )
GLNAME( _mesa_3dnow_transform_points3_general ):
 
PUSH_L ( ESI )
 
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPGR_2 ) )
 
PREFETCHW ( REGIND(EDX) )
 
ALIGNTEXT16
LLBL( G3TPGR_1 ):
 
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
 
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM2 ) /* | x2 */
 
ADD_L ( EDI, EAX ) /* next vertex */
PREFETCH ( REGIND(EAX) )
 
MOVQ ( MM0, MM1 ) /* x1 | x0 */
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
 
PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */
MOVQ ( MM2, MM5 ) /* x2 | x2 */
 
PUNPCKHDQ ( MM1, MM1 ) /* x1 | x1 */
PFMUL ( REGOFF(32, ECX), MM2 ) /* x2*m9 | x2*m8 */
 
MOVQ ( MM0, MM3 ) /* x0 | x0 */
PFMUL ( REGOFF(40, ECX), MM5 ) /* x2*m11 | x2*m10 */
 
MOVQ ( MM1, MM4 ) /* x1 | x1 */
PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */
 
PFADD ( REGOFF(48, ECX), MM2 ) /* x2*m9+m13 | x2*m8+m12 */
PFMUL ( REGOFF(16, ECX), MM1 ) /* x1*m5 | x1*m4 */
 
PFADD ( REGOFF(56, ECX), MM5 ) /* x2*m11+m15 | x2*m10+m14 */
PFADD ( MM0, MM1 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */
 
PFMUL ( REGOFF(8, ECX), MM3 ) /* x0*m3 | x0*m2 */
PFADD ( MM1, MM2 ) /* r1 | r0 */
 
PFMUL ( REGOFF(24, ECX), MM4 ) /* x1*m7 | x1*m6 */
ADD_L ( CONST(16), EDX ) /* next output vertex */
 
PFADD ( MM3, MM4 ) /* x0*m3+x1*m7 | x0*m2+x1*m6 */
MOVQ ( MM2, REGOFF(-16, EDX) ) /* write r0, r1 */
 
PFADD ( MM4, MM5 ) /* r3 | r2 */
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
 
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPGR_1 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TPGR_2 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points3_perspective )
GLNAME( _mesa_3dnow_transform_points3_perspective ):
 
PUSH_L ( ESI )
 
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPPR_2 ) )
 
PREFETCH ( REGIND(EAX) )
PREFETCHW ( REGIND(EDX) )
 
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
 
MOVQ ( REGOFF(32, ECX), MM1 ) /* m21 | m20 */
MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */
 
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
 
ALIGNTEXT16
LLBL( G3TPPR_1 ):
 
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
 
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
 
ADD_L ( EDI, EAX ) /* next vertex */
PREFETCH ( REGIND(EAX) )
 
PXOR ( MM7, MM7 ) /* 0 | 0 */
MOVQ ( MM5, MM6 ) /* | x2 */
 
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
PFSUB ( MM5, MM7 ) /* | -x2 */
 
PFMUL ( MM2, MM6 ) /* | x2*m22 */
PUNPCKLDQ ( MM5, MM5 ) /* x2 | x2 */
 
ADD_L ( CONST(16), EDX ) /* next r */
PFMUL ( MM1, MM5 ) /* x2*m21 | x2*m20 */
 
PFADD ( MM3, MM6 ) /* | x2*m22+m32 */
PFADD ( MM4, MM5 ) /* x1*m11+x2*m21 | x0*m00+x2*m20 */
 
MOVQ ( MM5, REGOFF(-16, EDX) ) /* write r0, r1 */
MOVD ( MM6, REGOFF(-8, EDX) ) /* write r2 */
 
MOVD ( MM7, REGOFF(-4, EDX) ) /* write r3 */
 
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPPR_1 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TPPR_2 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points3_3d )
GLNAME( _mesa_3dnow_transform_points3_3d ):
 
PUSH_L ( ESI )
 
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP3R_2 ) )
 
PREFETCH ( REGIND(EAX) )
PREFETCH ( REGIND(EDX) )
 
MOVD ( REGOFF(8, ECX), MM7 ) /* | m2 */
PUNPCKLDQ ( REGOFF(24, ECX), MM7 ) /* m6 | m2 */
 
 
ALIGNTEXT16
LLBL( G3TP3R_1 ):
 
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
 
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
 
ADD_L ( EDI, EAX ) /* next vertex */
PREFETCH ( REGIND(EAX) )
 
MOVQ ( MM0, MM2 ) /* x1 | x0 */
ADD_L ( CONST(16), EDX ) /* next r */
 
PUNPCKLDQ ( MM2, MM2 ) /* x0 | x0 */
MOVQ ( MM0, MM3 ) /* x1 | x0 */
 
PFMUL ( REGIND(ECX), MM2 ) /* x0*m1 | x0*m0 */
PUNPCKHDQ ( MM3, MM3 ) /* x1 | x1 */
 
MOVQ ( MM1, MM4 ) /* | x2 */
PFMUL ( REGOFF(16, ECX), MM3 ) /* x1*m5 | x1*m4 */
 
PUNPCKLDQ ( MM4, MM4 ) /* x2 | x2 */
PFADD ( MM2, MM3 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */
 
PFMUL ( REGOFF(32, ECX), MM4 ) /* x2*m9 | x2*m8 */
PFADD ( REGOFF(48, ECX), MM3 ) /* x0*m1+...+m11 | x0*m0+x1*m4+m12 */
 
PFMUL ( MM7, MM0 ) /* x1*m6 | x0*m2 */
PFADD ( MM4, MM3 ) /* r1 | r0 */
 
PFMUL ( REGOFF(40, ECX), MM1 ) /* | x2*m10 */
PUNPCKLDQ ( REGOFF(56, ECX), MM1 ) /* m14 | x2*m10 */
 
PFACC ( MM0, MM1 )
 
MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */
PFACC ( MM1, MM1 ) /* | r2 */
 
MOVD ( MM1, REGOFF(-8, EDX) ) /* write r2 */
 
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP3R_1 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TP3R_2 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points3_3d_no_rot )
GLNAME( _mesa_3dnow_transform_points3_3d_no_rot ):
 
PUSH_L ( ESI )
 
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP3NRR_2 ) )
 
PREFETCH ( REGIND(EAX) )
PREFETCHW ( REGIND(EDX) )
 
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
 
MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */
PUNPCKLDQ ( MM2, MM2 ) /* m22 | m22 */
 
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
 
PUNPCKLDQ ( MM3, MM3 ) /* m32 | m32 */
 
 
ALIGNTEXT16
LLBL( G3TP3NRR_1 ):
 
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
 
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
 
ADD_L ( EDI, EAX ) /* next vertex */
PREFETCHW ( REGIND(EAX) )
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
 
PFADD ( MM1, MM4 ) /* x1*m11+m31 | x0*m00+m30 */
PFMUL ( MM2, MM5 ) /* | x2*m22 */
 
PFADD ( MM3, MM5 ) /* | x2*m22+m32 */
MOVQ ( MM4, REGIND(EDX) ) /* write r0, r1 */
 
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
 
MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 */
JNZ ( LLBL( G3TP3NRR_1 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TP3NRR_2 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points3_2d )
GLNAME( _mesa_3dnow_transform_points3_2d ):
 
PUSH_L ( ESI )
 
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP2R_3) )
 
PREFETCH ( REGIND(EAX) )
PREFETCHW ( REGIND(EDX) )
 
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */
 
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */
 
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
 
ALIGNTEXT16
LLBL( G3TP2R_2 ):
 
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
 
MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
 
ADD_L ( EDI, EAX ) /* next vertex */
PREFETCH ( REGIND(EAX) )
 
MOVQ ( MM3, MM4 ) /* x1 | x0 */
PFMUL ( MM0, MM3 ) /* x1*m10 | x0*m00 */
 
ADD_L ( CONST(16), EDX ) /* next r */
PFMUL ( MM1, MM4 ) /* x1*m11 | x0*m01 */
 
PFACC ( MM4, MM3 ) /* x0*m00+x1*m10 | x0*m01+x1*m11 */
MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 (=x2) */
 
PFADD ( MM2, MM3 ) /* x0*...*m10+m30 | x0*...*m11+m31 */
MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */
 
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP2R_2 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TP2R_3 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points3_2d_no_rot )
GLNAME( _mesa_3dnow_transform_points3_2d_no_rot ):
 
PUSH_L ( ESI )
 
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP2NRR_2 ) )
 
PREFETCH ( REGIND(EAX) )
PREFETCHW ( REGIND(EDX) )
 
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
 
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
 
 
ALIGNTEXT16
LLBL( G3TP2NRR_1 ):
 
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
 
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
 
ADD_L ( EDI, EAX ) /* next vertex */
PREFETCH ( REGIND(EAX) )
 
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
ADD_L ( CONST(16), EDX ) /* next r */
 
PFADD ( MM1, MM4 ) /* x1*m11+m31 | x0*m00+m30 */
 
MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */
MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 (=x2) */
 
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP2NRR_1 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TP2NRR_2 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points3_identity )
GLNAME( _mesa_3dnow_transform_points3_identity ):
 
PUSH_L ( ESI )
 
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPIR_2 ) )
 
PREFETCHW ( REGIND(EDX) )
 
ALIGNTEXT16
LLBL( G3TPIR_1 ):
 
PREFETCHW ( REGOFF(32, EDX) )
 
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
 
ADD_L ( EDI, EAX ) /* next vertex */
ADD_L ( CONST(16), EDX ) /* next r */
 
DEC_L ( ESI ) /* decrement vertex counter */
MOVQ ( MM0, REGOFF(-16, EDX) ) /* r1 | r0 */
 
MOVD ( MM1, REGOFF(-8, EDX) ) /* | r2 */
JNZ ( LLBL( G3TPIR_1 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TPIR_2 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
/shark/trunk/ports/mesa/src/x86/3dnow_xform4.s
0,0 → 1,557
/* $Id: 3dnow_xform4.s,v 1.1 2003-03-13 12:11:48 giacomo Exp $ */
 
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "matypes.h"
#include "xform_args.h"
 
SEG_TEXT
 
#define FRAME_OFFSET 4
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points4_general )
GLNAME( _mesa_3dnow_transform_points4_general ):
 
PUSH_L ( ESI )
 
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPGR_2 ) )
 
PREFETCHW ( REGIND(EDX) )
 
ALIGNTEXT16
LLBL( G3TPGR_1 ):
 
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
 
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM4 ) /* x3 | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
PREFETCH ( REGIND(EAX) )
MOVQ ( MM0, MM2 ) /* x1 | x0 */
MOVQ ( MM4, MM6 ) /* x3 | x2 */
 
PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */
PUNPCKHDQ ( MM2, MM2 ) /* x1 | x1 */
 
MOVQ ( MM0, MM1 ) /* x0 | x0 */
ADD_L ( CONST(16), EDX ) /* next r */
 
PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */
MOVQ ( MM2, MM3 ) /* x1 | x1 */
 
PFMUL ( REGOFF(8, ECX), MM1 ) /* x0*m3 | x0*m2 */
PUNPCKLDQ ( MM4, MM4 ) /* x2 | x2 */
 
PFMUL ( REGOFF(16, ECX), MM2 ) /* x1*m5 | x1*m4 */
MOVQ ( MM4, MM5 ) /* x2 | x2 */
 
PFMUL ( REGOFF(24, ECX), MM3 ) /* x1*m7 | x1*m6 */
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
 
PFMUL ( REGOFF(32, ECX), MM4 ) /* x2*m9 | x2*m8 */
MOVQ ( MM6, MM7 ) /* x3 | x3 */
 
PFMUL ( REGOFF(40, ECX), MM5 ) /* x2*m11 | x2*m10 */
PFADD ( MM0, MM2 )
 
PFMUL ( REGOFF(48, ECX), MM6 ) /* x3*m13 | x3*m12 */
PFADD ( MM1, MM3 )
 
PFMUL ( REGOFF(56, ECX), MM7 ) /* x3*m15 | x3*m14 */
PFADD ( MM4, MM6 )
 
PFADD ( MM5, MM7 )
PFADD ( MM2, MM6 )
 
PFADD ( MM3, MM7 )
MOVQ ( MM6, REGOFF(-16, EDX) )
 
MOVQ ( MM7, REGOFF(-8, EDX) )
 
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPGR_1 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TPGR_2 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points4_perspective )
GLNAME( _mesa_3dnow_transform_points4_perspective ):
 
PUSH_L ( ESI )
 
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPPR_2 ) )
 
PREFETCH ( REGIND(EAX) )
PREFETCHW ( REGIND(EDX) )
 
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
 
MOVD ( REGOFF(40, ECX), MM1 ) /* | m22 */
PUNPCKLDQ ( REGOFF(56, ECX), MM1 ) /* m32 | m22 */
 
MOVQ ( REGOFF(32, ECX), MM2 ) /* m21 | m20 */
PXOR ( MM7, MM7 ) /* 0 | 0 */
 
ALIGNTEXT16
LLBL( G3TPPR_1 ):
 
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
 
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
MOVD ( REGOFF(8, EAX), MM3 ) /* | x2 */
 
ADD_L ( EDI, EAX ) /* next vertex */
PREFETCH ( REGOFF(32, EAX) ) /* hopefully stride is zero */
 
MOVQ ( MM5, MM6 ) /* x3 | x2 */
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
 
PUNPCKLDQ ( MM5, MM5 ) /* x2 | x2 */
ADD_L ( CONST(16), EDX ) /* next r */
 
PFMUL ( MM2, MM5 ) /* x2*m21 | x2*m20 */
PFSUBR ( MM7, MM3 ) /* | -x2 */
 
PFMUL ( MM1, MM6 ) /* x3*m32 | x2*m22 */
PFADD ( MM4, MM5 ) /* x1*m11+x2*m21 | x0*m00+x2*m20 */
 
PFACC ( MM3, MM6 ) /* -x2 | x2*m22+x3*m32 */
MOVQ ( MM5, REGOFF(-16, EDX) ) /* write r0, r1 */
 
MOVQ ( MM6, REGOFF(-8, EDX) ) /* write r2, r3 */
DEC_L ( ESI ) /* decrement vertex counter */
 
JNZ ( LLBL( G3TPPR_1 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TPPR_2 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points4_3d )
GLNAME( _mesa_3dnow_transform_points4_3d ):
 
PUSH_L ( ESI )
 
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP3R_2 ) )
 
MOVD ( REGOFF(8, ECX), MM6 ) /* | m2 */
PUNPCKLDQ ( REGOFF(24, ECX), MM6 ) /* m6 | m2 */
 
MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
PUNPCKLDQ ( REGOFF(56, ECX), MM7 ) /* m14 | m10 */
 
ALIGNTEXT16
LLBL( G3TP3R_1 ):
 
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
PREFETCH ( REGOFF(32, EAX) ) /* hopefully array is tightly packed */
 
MOVQ ( REGIND(EAX), MM2 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM3 ) /* x3 | x2 */
 
MOVQ ( MM2, MM0 ) /* x1 | x0 */
MOVQ ( MM3, MM4 ) /* x3 | x2 */
 
MOVQ ( MM0, MM1 ) /* x1 | x0 */
MOVQ ( MM4, MM5 ) /* x3 | x2 */
 
PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */
PUNPCKHDQ ( MM1, MM1 ) /* x1 | x1 */
 
PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */
PUNPCKLDQ ( MM3, MM3 ) /* x2 | x2 */
 
PFMUL ( REGOFF(16, ECX), MM1 ) /* x1*m5 | x1*m4 */
PUNPCKHDQ ( MM4, MM4 ) /* x3 | x3 */
 
PFMUL ( MM6, MM2 ) /* x1*m6 | x0*m2 */
PFADD ( MM0, MM1 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */
 
PFMUL ( REGOFF(32, ECX), MM3 ) /* x2*m9 | x2*m8 */
ADD_L ( CONST(16), EDX ) /* next r */
 
PFMUL ( REGOFF(48, ECX), MM4 ) /* x3*m13 | x3*m12 */
PFADD ( MM1, MM3 ) /* x0*m1+..+x2*m9 | x0*m0+...+x2*m8 */
 
PFMUL ( MM7, MM5 ) /* x3*m14 | x2*m10 */
PFADD ( MM3, MM4 ) /* r1 | r0 */
 
PFACC ( MM2, MM5 ) /* x0*m2+x1*m6 | x2*m10+x3*m14 */
MOVD ( REGOFF(12, EAX), MM0 ) /* | x3 */
 
ADD_L ( EDI, EAX ) /* next vertex */
PFACC ( MM0, MM5 ) /* r3 | r2 */
 
MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
 
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP3R_1 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TP3R_2 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points4_3d_no_rot )
GLNAME( _mesa_3dnow_transform_points4_3d_no_rot ):
 
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP3NRR_2 ) )
 
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
 
MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */
PUNPCKLDQ ( REGOFF(56, ECX), MM2 ) /* m32 | m22 */
 
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
 
ALIGNTEXT16
LLBL( G3TP3NRR_1 ):
 
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
 
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
MOVD ( REGOFF(12, EAX), MM7 ) /* | x3 */
 
ADD_L ( EDI, EAX ) /* next vertex */
PREFETCH ( REGOFF(32, EAX) ) /* hopefully stride is zero */
 
MOVQ ( MM5, MM6 ) /* x3 | x2 */
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
 
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
PFMUL ( MM2, MM5 ) /* x3*m32 | x2*m22 */
 
PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */
PFACC ( MM7, MM5 ) /* x3 | x2*m22+x3*m32 */
 
PFADD ( MM6, MM4 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */
ADD_L ( CONST(16), EDX ) /* next r */
 
MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
 
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP3NRR_1 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TP3NRR_2 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points4_2d )
GLNAME( _mesa_3dnow_transform_points4_2d ):
 
PUSH_L ( ESI )
 
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP2R_2 ) )
 
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */
 
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */
 
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
 
ALIGNTEXT16
LLBL( G3TP2R_1 ):
 
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
 
MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
 
ADD_L ( EDI, EAX ) /* next vertex */
PREFETCH ( REGIND(EAX) )
 
MOVQ ( MM3, MM4 ) /* x1 | x0 */
MOVQ ( MM5, MM6 ) /* x3 | x2 */
 
PFMUL ( MM1, MM4 ) /* x1*m11 | x0*m01 */
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
 
PFMUL ( MM0, MM3 ) /* x1*m10 | x0*m00 */
ADD_L ( CONST(16), EDX ) /* next r */
 
PFACC ( MM4, MM3 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */
PFMUL ( MM2, MM6 ) /* x3*m31 | x3*m30 */
 
PFADD ( MM6, MM3 ) /* r1 | r0 */
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
 
MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */
 
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP2R_1 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TP2R_2 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points4_2d_no_rot )
GLNAME( _mesa_3dnow_transform_points4_2d_no_rot ):
 
PUSH_L ( ESI )
 
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP2NRR_3 ) )
 
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
 
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
 
ALIGNTEXT16
LLBL( G3TP2NRR_2 ):
 
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
 
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
 
ADD_L ( EDI, EAX ) /* next vertex */
PREFETCH ( REGIND(EAX) )
 
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
MOVQ ( MM5, MM6 ) /* x3 | x2 */
 
ADD_L ( CONST(16), EDX ) /* next r */
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
 
PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */
PFADD ( MM4, MM6 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */
 
MOVQ ( MM6, REGOFF(-16, EDX) ) /* write r0, r1 */
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
 
DEC_L ( ESI ) /* decrement vertex counter */
 
JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TP2NRR_3 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points4_identity )
GLNAME( _mesa_3dnow_transform_points4_identity ):
 
PUSH_L ( ESI )
 
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
 
PUSH_L ( EDI )
 
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
 
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPIR_2 ) )
 
ALIGNTEXT16
LLBL( G3TPIR_1 ):
 
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM1 ) /* x3 | x2 */
 
ADD_L ( EDI, EAX ) /* next vertex */
PREFETCH ( REGIND(EAX) )
 
ADD_L ( CONST(16), EDX ) /* next r */
MOVQ ( MM0, REGOFF(-16, EDX) ) /* r1 | r0 */
 
MOVQ ( MM1, REGOFF(-8, EDX) ) /* r3 | r2 */
 
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPIR_1 ) ) /* cnt > 0 ? -> process next vertex */
 
LLBL( G3TPIR_2 ):
 
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
/shark/trunk/ports/mesa/src/x86/3dnow_normal.s
0,0 → 1,836
/* $Id: 3dnow_normal.s,v 1.1 2003-03-13 12:11:47 giacomo Exp $ */
 
/*
* Mesa 3-D graphics library
* Version: 4.1
*
* Copyright (C) 1999-2002 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
 
/*
* 3Dnow assembly code by Holger Waechtler
*/
 
#include "matypes.h"
#include "norm_args.h"
 
SEG_TEXT
 
#define M(i) REGOFF(i * 4, ECX)
#define STRIDE REGOFF(12, ESI)
 
 
ALIGNTEXT16
GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals)
GLNAME(_mesa_3dnow_transform_normalize_normals):
 
#define FRAME_OFFSET 12
 
PUSH_L ( EDI )
PUSH_L ( ESI )
PUSH_L ( EBP )
 
MOV_L ( ARG_LENGTHS, EDI )
MOV_L ( ARG_IN, ESI )
MOV_L ( ARG_DEST, EAX )
MOV_L ( REGOFF(V3F_COUNT, ESI), EBP ) /* dest->count = in->count */
MOV_L ( EBP, REGOFF(V3F_COUNT, EAX) )
MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
MOV_L ( ARG_MAT, ECX )
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
 
CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
JE ( LLBL (G3TN_end) )
 
MOV_L ( REGOFF (V3F_COUNT, ESI), EBP )
FEMMS
 
PUSH_L ( EBP )
PUSH_L ( EAX )
PUSH_L ( EDX ) /* save counter & pointer for */
/* the normalize pass */
#undef FRAME_OFFSET
#define FRAME_OFFSET 24
 
MOVQ ( M(0), MM3 ) /* m1 | m0 */
MOVQ ( M(4), MM4 ) /* m5 | m4 */
 
MOVD ( M(2), MM5 ) /* | m2 */
PUNPCKLDQ ( M(6), MM5 ) /* m6 | m2 */
 
MOVQ ( M(8), MM6 ) /* m9 | m8 */
MOVQ ( M(10), MM7 ) /* | m10 */
 
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
JNE ( LLBL (G3TN_scale_end ) )
 
MOVD ( ARG_SCALE, MM0 ) /* | scale */
PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
 
PFMUL ( MM0, MM3 ) /* scale * m1 | scale * m0 */
PFMUL ( MM0, MM4 ) /* scale * m5 | scale * m4 */
PFMUL ( MM0, MM5 ) /* scale * m6 | scale * m2 */
PFMUL ( MM0, MM6 ) /* scale * m9 | scale * m8 */
PFMUL ( MM0, MM7 ) /* | scale * m10 */
 
ALIGNTEXT32
LLBL (G3TN_scale_end):
LLBL (G3TN_transform):
MOVQ ( REGIND (EDX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
 
MOVQ ( MM0, MM1 ) /* x1 | x0 */
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
 
PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
ADD_L ( CONST(16), EAX ) /* next r */
 
PREFETCHW ( REGIND(EAX) )
 
PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
 
PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
PFADD ( MM2, MM0 ) /* x0*m4+x1*m5+x2*m6| x0*m0+...+x2**/
 
MOVQ ( REGIND (EDX), MM1 ) /* x1 | x0 */
MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
 
PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
 
PFMUL ( MM7, MM2 ) /* | x2*m10 */
PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
 
PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m*/
ADD_L ( STRIDE, EDX ) /* next normal */
 
PREFETCH ( REGIND(EDX) )
 
MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
DEC_L ( EBP ) /* decrement normal counter */
JA ( LLBL (G3TN_transform) )
 
 
POP_L ( EDX ) /* end of transform --- */
POP_L ( EAX ) /* now normalizing ... */
POP_L ( EBP )
 
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
JE ( LLBL (G3TN_norm ) ) /* calculate lengths */
 
 
ALIGNTEXT32
LLBL (G3TN_norm_w_lengths):
 
PREFETCHW ( REGOFF(12,EAX) )
 
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
 
MOVD ( REGIND (EDI), MM3 ) /* | length (x) */
PFMUL ( MM3, MM1 ) /* | x2 (normalize*/
 
PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalize*/
 
ADD_L ( STRIDE, EDX ) /* next normal */
ADD_L ( CONST(4), EDI ) /* next length */
 
PREFETCH ( REGIND(EDI) )
 
MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
 
ADD_L ( CONST(16), EAX ) /* next r */
DEC_L ( EBP ) /* decrement normal counter */
 
JA ( LLBL (G3TN_norm_w_lengths) )
JMP ( LLBL (G3TN_exit_3dnow) )
 
ALIGNTEXT32
LLBL (G3TN_norm):
 
PREFETCHW ( REGIND(EAX) )
 
MOVQ ( MM0, MM3 ) /* x1 | x0 */
MOVQ ( MM1, MM4 ) /* | x2 */
 
PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
ADD_L ( CONST(16), EAX ) /* next r */
 
PFMUL ( MM1, MM4 ) /* | x2*x2 */
PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
 
PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1+x2**/
PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
 
MOVQ ( MM5, MM4 )
PUNPCKLDQ ( MM3, MM3 )
 
DEC_L ( EBP ) /* decrement normal counter */
PFMUL ( MM5, MM5 )
 
PFRSQIT1 ( MM3, MM5 )
PFRCPIT2 ( MM4, MM5 )
 
PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalize*/
 
MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */
PFMUL ( MM5, MM1 ) /* | x2 (normalize*/
 
MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */
MOVQ ( REGIND (EAX), MM0 ) /* x1 | x0 */
 
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
JA ( LLBL (G3TN_norm) )
 
LLBL (G3TN_exit_3dnow):
FEMMS
 
LLBL (G3TN_end):
POP_L ( EBP )
POP_L ( ESI )
POP_L ( EDI )
RET
 
 
 
ALIGNTEXT16
GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot)
GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot):
 
#undef FRAME_OFFSET
#define FRAME_OFFSET 12
 
PUSH_L ( EDI )
PUSH_L ( ESI )
PUSH_L ( EBP )
 
MOV_L ( ARG_LENGTHS, EDI )
MOV_L ( ARG_IN, ESI )
MOV_L ( ARG_DEST, EAX )
MOV_L ( REGOFF(V3F_COUNT, ESI), EBP ) /* dest->count = in->count */
MOV_L ( EBP, REGOFF(V3F_COUNT, EAX) )
MOV_L ( ARG_MAT, ECX )
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
 
CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
JE ( LLBL (G3TNNR_end) )
 
FEMMS
 
MOVD ( M(0), MM0 ) /* | m0 */
PUNPCKLDQ ( M(5), MM0 ) /* m5 | m0 */
 
MOVD ( M(10), MM2 ) /* | m10 */
PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
 
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
JNE ( LLBL (G3TNNR_scale_end ) )
 
MOVD ( ARG_SCALE, MM7 ) /* | scale */
PUNPCKLDQ ( MM7, MM7 ) /* scale | scale */
 
PFMUL ( MM7, MM0 ) /* scale * m5 | scale * m0 */
PFMUL ( MM7, MM2 ) /* scale * m10 | scale * m10 */
 
ALIGNTEXT32
LLBL (G3TNNR_scale_end):
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
JE ( LLBL (G3TNNR_norm) ) /* need to calculate lengths */
 
MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
 
 
ALIGNTEXT32
LLBL (G3TNNR_norm_w_lengths): /* use precalculated lengths */
 
PREFETCHW ( REGIND(EAX) )
 
MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
 
PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
ADD_L ( STRIDE, EDX ) /* next normal */
 
PREFETCH ( REGIND(EDX) )
 
PFMUL ( MM2, MM7 ) /* | x2*m10 */
ADD_L ( CONST(16), EAX ) /* next r */
 
PFMUL ( MM3, MM7 ) /* | x2 (normalized) */
PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
 
ADD_L ( CONST(4), EDI ) /* next length */
PFMUL ( MM3, MM6 ) /* x1 (normalized) | x0 (normalized) */
 
DEC_L ( EBP ) /* decrement normal counter */
MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */
 
MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */
MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
 
JA ( LLBL (G3TNNR_norm_w_lengths) )
JMP ( LLBL (G3TNNR_exit_3dnow) )
 
ALIGNTEXT32
LLBL (G3TNNR_norm): /* need to calculate lengths */
 
PREFETCHW ( REGIND(EAX) )
 
MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
 
PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
ADD_L ( CONST(16), EAX ) /* next r */
 
PFMUL ( MM2, MM7 ) /* | x2*m10 */
MOVQ ( MM6, MM3 ) /* x1 (transformed)| x0 (transformed) */
 
MOVQ ( MM7, MM4 ) /* | x2 (transformed) */
PFMUL ( MM6, MM3 ) /* x1*x1 | x0*x0 */
 
 
PFMUL ( MM7, MM4 ) /* | x2*x2 */
PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1 */
 
PFADD ( MM4, MM3 ) /* | x0*x0+x1*x1+x2*x2*/
ADD_L ( STRIDE, EDX ) /* next normal */
 
PREFETCH ( REGIND(EDX) )
 
PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
MOVQ ( MM5, MM4 )
 
PUNPCKLDQ ( MM3, MM3 )
PFMUL ( MM5, MM5 )
 
PFRSQIT1 ( MM3, MM5 )
DEC_L ( EBP ) /* decrement normal counter */
 
PFRCPIT2 ( MM4, MM5 )
PFMUL ( MM5, MM6 ) /* x1 (normalized) | x0 (normalized) */
 
MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */
PFMUL ( MM5, MM7 ) /* | x2 (normalized) */
 
MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */
JA ( LLBL (G3TNNR_norm) )
 
 
LLBL (G3TNNR_exit_3dnow):
FEMMS
 
LLBL (G3TNNR_end):
POP_L ( EBP )
POP_L ( ESI )
POP_L ( EDI )
RET
 
 
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot)
GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot):
 
#undef FRAME_OFFSET
#define FRAME_OFFSET 12
 
PUSH_L ( EDI )
PUSH_L ( ESI )
PUSH_L ( EBP )
 
MOV_L ( ARG_IN, EAX )
MOV_L ( ARG_DEST, EDX )
MOV_L ( REGOFF(V3F_COUNT, EAX), EBP ) /* dest->count = in->count */
MOV_L ( EBP, REGOFF(V3F_COUNT, EDX) )
MOV_L ( ARG_IN, ESI )
MOV_L ( ARG_MAT, ECX )
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
MOV_L ( REGOFF(V3F_START, EDX), EAX ) /* dest->start */
MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
 
CMP_L ( CONST(0), EBP )
JE ( LLBL (G3TRNR_end) )
 
FEMMS
 
MOVD ( ARG_SCALE, MM6 ) /* | scale */
PUNPCKLDQ ( MM6, MM6 ) /* scale | scale */
 
MOVD ( REGIND(ECX), MM0 ) /* | m0 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
 
PFMUL ( MM6, MM0 ) /* scale*m5 | scale*m0 */
MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
 
PFMUL ( MM6, MM2 ) /* | scale*m10 */
 
ALIGNTEXT32
LLBL (G3TRNR_rescale):
 
PREFETCHW ( REGIND(EAX) )
MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
ADD_L ( STRIDE, EDX ) /* next normal */
 
PREFETCH ( REGIND(EDX) )
 
PFMUL ( MM2, MM5 ) /* | x2*m10 */
ADD_L ( CONST(16), EAX ) /* next r */
 
DEC_L ( EBP ) /* decrement normal counter */
MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */
 
MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */
JA ( LLBL (G3TRNR_rescale) ) /* cnt > 0 ? -> process next normal */
 
FEMMS
 
LLBL (G3TRNR_end):
POP_L ( EBP )
POP_L ( ESI )
POP_L ( EDI )
RET
 
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals)
GLNAME(_mesa_3dnow_transform_rescale_normals):
 
#undef FRAME_OFFSET
#define FRAME_OFFSET 8
 
PUSH_L ( EDI )
PUSH_L ( ESI )
 
MOV_L ( ARG_IN, ESI )
MOV_L ( ARG_DEST, EAX )
MOV_L ( ARG_MAT, ECX )
MOV_L ( REGOFF(V3F_COUNT, ESI), EDI ) /* dest->count = in->count */
MOV_L ( EDI, REGOFF(V3F_COUNT, EAX) )
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
 
CMP_L ( CONST(0), EDI )
JE ( LLBL (G3TR_end) )
 
FEMMS
 
MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
 
MOVQ ( REGOFF(16,ECX), MM4 ) /* m5 | m4 */
MOVD ( ARG_SCALE, MM0 ) /* scale */
 
MOVD ( REGOFF(8,ECX), MM5 ) /* | m2 */
PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
 
PUNPCKLDQ ( REGOFF(24, ECX), MM5 )
PFMUL ( MM0, MM3 ) /* scale*m1 | scale*m0 */
 
MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8*/
PFMUL ( MM0, MM4 ) /* scale*m5 | scale*m4 */
 
MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
PFMUL ( MM0, MM5 ) /* scale*m6 | scale*m2 */
 
PFMUL ( MM0, MM6 ) /* scale*m9 | scale*m8 */
 
PFMUL ( MM0, MM7 ) /* | scale*m10 */
 
ALIGNTEXT32
LLBL (G3TR_rescale):
 
PREFETCHW ( REGIND(EAX) )
 
MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
 
MOVQ ( MM0, MM1 ) /* x1 | x0 */
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
 
PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
ADD_L ( CONST(16), EAX ) /* next r */
 
PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
 
MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
 
PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
 
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
ADD_L ( STRIDE, EDX ) /* next normal */
 
PREFETCH ( REGIND(EDX) )
 
MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
 
PFMUL ( MM7, MM2 ) /* | x2*m10 */
PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
 
PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
 
DEC_L ( EDI ) /* decrement normal counter */
JA ( LLBL (G3TR_rescale) )
 
FEMMS
 
LLBL (G3TR_end):
POP_L ( ESI )
POP_L ( EDI )
RET
 
 
 
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME(_mesa_3dnow_transform_normals_no_rot)
GLNAME(_mesa_3dnow_transform_normals_no_rot):
 
#undef FRAME_OFFSET
#define FRAME_OFFSET 8
 
PUSH_L ( EDI )
PUSH_L ( ESI )
 
MOV_L ( ARG_IN, ESI )
MOV_L ( ARG_DEST, EAX )
MOV_L ( ARG_MAT, ECX )
MOV_L ( REGOFF(V3F_COUNT, ESI), EDI ) /* dest->count = in->count */
MOV_L ( EDI, REGOFF(V3F_COUNT, EAX) )
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
 
CMP_L ( CONST(0), EDI )
JE ( LLBL (G3TNR_end) )
 
FEMMS
 
MOVD ( REGIND(ECX), MM0 ) /* | m0 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
 
MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
 
ALIGNTEXT32
LLBL (G3TNR_transform):
 
PREFETCHW ( REGIND(EAX) )
 
MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
 
PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
ADD_L ( STRIDE, EDX) /* next normal */
 
PREFETCH ( REGIND(EDX) )
 
PFMUL ( MM2, MM5 ) /* | x2*m10 */
ADD_L ( CONST(16), EAX ) /* next r */
 
DEC_L ( EDI ) /* decrement normal counter */
MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */
 
MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */
JA ( LLBL (G3TNR_transform) )
 
FEMMS
 
LLBL (G3TNR_end):
POP_L ( ESI )
POP_L ( EDI )
RET
 
 
 
 
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME(_mesa_3dnow_transform_normals)
GLNAME(_mesa_3dnow_transform_normals):
 
#undef FRAME_OFFSET
#define FRAME_OFFSET 8
 
PUSH_L ( EDI )
PUSH_L ( ESI )
 
MOV_L ( ARG_IN, ESI )
MOV_L ( ARG_DEST, EAX )
MOV_L ( ARG_MAT, ECX )
MOV_L ( REGOFF(V3F_COUNT, ESI), EDI ) /* dest->count = in->count */
MOV_L ( EDI, REGOFF(V3F_COUNT, EAX) )
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
 
CMP_L ( CONST(0), EDI ) /* count > 0 ?? */
JE ( LLBL (G3T_end) )
 
FEMMS
 
MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
MOVQ ( REGOFF(16, ECX), MM4 ) /* m5 | m4 */
 
MOVD ( REGOFF(8, ECX), MM5 ) /* | m2 */
PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) /* m6 | m2 */
 
MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8 */
MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
 
ALIGNTEXT32
LLBL (G3T_transform):
 
PREFETCHW ( REGIND(EAX) )
 
MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
 
MOVQ ( MM0, MM1 ) /* x1 | x0 */
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
 
PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
ADD_L ( CONST(16), EAX ) /* next r */
 
PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
 
PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
 
MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
 
PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
 
PFMUL ( MM7, MM2 ) /* | x2*m10 */
ADD_L ( STRIDE, EDX ) /* next normal */
 
PREFETCH ( REGIND(EDX) )
 
PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
 
MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
DEC_L ( EDI ) /* decrement normal counter */
 
JA ( LLBL (G3T_transform) )
 
FEMMS
 
LLBL (G3T_end):
POP_L ( ESI )
POP_L ( EDI )
RET
 
 
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME(_mesa_3dnow_normalize_normals)
GLNAME(_mesa_3dnow_normalize_normals):
 
#undef FRAME_OFFSET
#define FRAME_OFFSET 12
 
PUSH_L ( EDI )
PUSH_L ( ESI )
PUSH_L ( EBP )
 
MOV_L ( ARG_IN, ESI )
MOV_L ( ARG_DEST, EAX )
MOV_L ( REGOFF(V3F_COUNT, ESI), EBP ) /* dest->count = in->count */
MOV_L ( EBP, REGOFF(V3F_COUNT, EAX) )
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
MOV_L ( REGOFF(V3F_START, ESI), ECX ) /* in->start */
MOV_L ( ARG_LENGTHS, EDX )
 
CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
JE ( LLBL (G3N_end) )
 
FEMMS
 
CMP_L ( CONST(0), EDX ) /* lengths == 0 ? */
JE ( LLBL (G3N_norm2) ) /* calculate lengths */
 
ALIGNTEXT32
LLBL (G3N_norm1): /* use precalculated lengths */
 
PREFETCH ( REGIND(EAX) )
 
MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
 
MOVD ( REGIND(EDX), MM3 ) /* | length (x) */
PFMUL ( MM3, MM1 ) /* | x2 (normalized) */
 
PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
ADD_L ( STRIDE, ECX ) /* next normal */
 
PREFETCH ( REGIND(ECX) )
 
PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalized) */
MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
 
MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
ADD_L ( CONST(16), EAX ) /* next r */
 
ADD_L ( CONST(4), EDX ) /* next length */
DEC_L ( EBP ) /* decrement normal counter */
 
JA ( LLBL (G3N_norm1) )
 
JMP ( LLBL (G3N_end1) )
 
ALIGNTEXT32
LLBL (G3N_norm2): /* need to calculate lengths */
 
PREFETCHW ( REGIND(EAX) )
 
MOVQ ( MM0, MM3 ) /* x1 | x0 */
ADD_L ( STRIDE, ECX ) /* next normal */
 
PREFETCH ( REGIND(ECX) )
 
MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
 
PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
MOVQ ( MM1, MM4 ) /* | x2 */
 
ADD_L ( CONST(16), EAX ) /* next r */
PFMUL ( MM1, MM4 ) /* | x2*x2 */
 
PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
PFACC ( MM3, MM3 ) /* x0*x0+...+x2*x2 | x0*x0+x1*x1+x2*x2*/
 
PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
MOVQ ( MM5, MM4 )
 
PUNPCKLDQ ( MM3, MM3 )
PFMUL ( MM5, MM5 )
 
PFRSQIT1 ( MM3, MM5 )
DEC_L ( EBP ) /* decrement normal counter */
 
PFRCPIT2 ( MM4, MM5 )
 
PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalized) */
MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */
 
PFMUL ( MM5, MM1 ) /* | x2 (normalized) */
MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */
 
JA ( LLBL (G3N_norm2) )
 
LLBL (G3N_end1):
FEMMS
 
LLBL (G3N_end):
POP_L ( EBP )
POP_L ( ESI )
POP_L ( EDI )
RET
 
 
 
 
 
 
ALIGNTEXT16
GLOBL GLNAME(_mesa_3dnow_rescale_normals)
GLNAME(_mesa_3dnow_rescale_normals):
 
#undef FRAME_OFFSET
#define FRAME_OFFSET 8
PUSH_L ( EDI )
PUSH_L ( ESI )
 
MOV_L ( ARG_IN, ESI )
MOV_L ( ARG_DEST, EAX )
MOV_L ( REGOFF(V3F_COUNT, ESI), EDX ) /* dest->count = in->count */
MOV_L ( EDX, REGOFF(V3F_COUNT, EAX) )
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
MOV_L ( REGOFF(V3F_START, ESI), ECX ) /* in->start */
 
CMP_L ( CONST(0), EDX )
JE ( LLBL (G3R_end) )
 
FEMMS
 
MOVD ( ARG_SCALE, MM0 ) /* scale */
PUNPCKLDQ ( MM0, MM0 )
 
ALIGNTEXT32
LLBL (G3R_rescale):
 
PREFETCHW ( REGIND(EAX) )
 
MOVQ ( REGIND(ECX), MM1 ) /* x1 | x0 */
MOVD ( REGOFF(8, ECX), MM2 ) /* | x2 */
 
PFMUL ( MM0, MM1 ) /* x1*scale | x0*scale */
ADD_L ( STRIDE, ECX ) /* next normal */
 
PREFETCH ( REGIND(ECX) )
 
PFMUL ( MM0, MM2 ) /* | x2*scale */
ADD_L ( CONST(16), EAX ) /* next r */
 
MOVQ ( MM1, REGOFF(-16, EAX) ) /* write r0, r1 */
MOVD ( MM2, REGOFF(-8, EAX) ) /* write r2 */
 
DEC_L ( EDX ) /* decrement normal counter */
JA ( LLBL (G3R_rescale) )
 
FEMMS
 
LLBL (G3R_end):
POP_L ( ESI )
POP_L ( EDI )
RET
/shark/trunk/ports/mesa/src/x86/3dnow.c
0,0 → 1,89
/* $Id: 3dnow.c,v 1.1 2003-03-13 12:11:47 giacomo Exp $ */
 
/*
* Mesa 3-D graphics library
* Version: 4.1
*
* Copyright (C) 1999-2002 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
 
/*
* 3DNow! optimizations contributed by
* Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
*/
 
#include "glheader.h"
#include "context.h"
#include "math/m_xform.h"
#include "tnl/t_context.h"
 
#include "3dnow.h"
#include "common_x86_macros.h"
 
#ifdef DEBUG
#include "math/m_debug.h"
#endif
 
 
#ifdef USE_3DNOW_ASM
DECLARE_XFORM_GROUP( 3dnow, 2 )
DECLARE_XFORM_GROUP( 3dnow, 3 )
DECLARE_XFORM_GROUP( 3dnow, 4 )
 
DECLARE_NORM_GROUP( 3dnow )
 
 
extern void _ASMAPI
_mesa_v16_3dnow_general_xform( GLfloat *first_vert,
const GLfloat *m,
const GLfloat *src,
GLuint src_stride,
GLuint count );
 
extern void _ASMAPI
_mesa_3dnow_project_vertices( GLfloat *first,
GLfloat *last,
const GLfloat *m,
GLuint stride );
 
extern void _ASMAPI
_mesa_3dnow_project_clipped_vertices( GLfloat *first,
GLfloat *last,
const GLfloat *m,
GLuint stride,
const GLubyte *clipmask );
#endif
 
 
void _mesa_init_3dnow_transform_asm( void )
{
#ifdef USE_3DNOW_ASM
ASSIGN_XFORM_GROUP( 3dnow, 2 );
ASSIGN_XFORM_GROUP( 3dnow, 3 );
ASSIGN_XFORM_GROUP( 3dnow, 4 );
 
ASSIGN_NORM_GROUP( 3dnow );
 
#ifdef DEBUG
_math_test_all_transform_functions( "3DNow!" );
_math_test_all_normal_transform_functions( "3DNow!" );
#endif
#endif
}
/shark/trunk/ports/mesa/src/x86/3dnow.h
0,0 → 1,39
/* $Id: 3dnow.h,v 1.1 2003-03-13 12:11:47 giacomo Exp $ */
 
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
 
/*
* 3DNow! optimizations contributed by
* Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
*/
 
#ifndef __3DNOW_H__
#define __3DNOW_H__
 
#include "math/m_xform.h"
 
void _mesa_init_3dnow_transform_asm( void );
 
#endif
/shark/trunk/ports/mesa/src/x86/sse_xform1.s
0,0 → 1,433
/* $Id: sse_xform1.s,v 1.1 2003-03-13 12:11:49 giacomo Exp $ */
 
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
 
/** TODO:
* - insert PREFETCH instructions to avoid cache-misses !
* - some more optimizations are possible...
* - for 40-50% more performance in the SSE-functions, the
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
*/
 
#include "matypes.h"
#include "xform_args.h"
 
SEG_TEXT
 
#define S(i) REGOFF(i * 4, ESI)
#define D(i) REGOFF(i * 4, EDI)
#define M(i) REGOFF(i * 4, EDX)
 
 
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points1_general)
GLNAME( _mesa_sse_transform_points1_general ):
 
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
 
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
 
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
 
CMP_L( CONST(0), ECX ) /* count == 0 ? */
JE( LLBL(K_GTP1GR_finish) ) /* yes -> nothing to do. */
 
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
 
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
 
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
 
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
 
 
ALIGNTEXT32
MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */
MOVAPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */
 
ALIGNTEXT32
LLBL(K_GTP1GR_top):
MOVSS( S(0), XMM2 ) /* ox */
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */
MULPS( XMM0, XMM2 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
ADDPS( XMM1, XMM2 ) /* + | + | + | + */
MOVUPS( XMM2, D(0) )
 
LLBL(K_GTP1GR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP1GR_top) )
 
LLBL(K_GTP1GR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
 
 
 
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points1_identity)
GLNAME( _mesa_sse_transform_points1_identity ):
 
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
 
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
 
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
 
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP1IR_finish) ) /* count was zero; go to finish */
 
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_1), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
 
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(1), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
 
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
 
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
 
CMP_L( ESI, EDI )
JE( LLBL(K_GTP1IR_finish) )
 
 
ALIGNTEXT32
LLBL(K_GTP1IR_top):
MOV_L( S(0), EDX )
MOV_L( EDX, D(0) )
 
LLBL(K_GTP1IR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP1IR_top) )
 
LLBL(K_GTP1IR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
 
 
 
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points1_3d_no_rot)
GLNAME(_mesa_sse_transform_points1_3d_no_rot):
 
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
 
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
 
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
 
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP13DNRR_finish) ) /* count was zero; go to finish */
 
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
 
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
 
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
 
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
 
 
ALIGNTEXT32
MOVSS( M(0), XMM0 ) /* m0 */
MOVSS( M(12), XMM1 ) /* m12 */
MOVSS( M(13), XMM2 ) /* m13 */
MOVSS( M(14), XMM3 ) /* m14 */
 
ALIGNTEXT32
LLBL(K_GTP13DNRR_top):
MOVSS( S(0), XMM4 ) /* ox */
MULSS( XMM0, XMM4 ) /* ox*m0 */
ADDSS( XMM1, XMM4 ) /* ox*m0+m12 */
MOVSS( XMM4, D(0) )
 
MOVSS( XMM2, D(1) )
MOVSS( XMM3, D(2) )
 
LLBL(K_GTP13DNRR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP13DNRR_top) )
 
LLBL(K_GTP13DNRR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
 
 
 
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points1_perspective)
GLNAME(_mesa_sse_transform_points1_perspective):
 
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
 
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
 
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
 
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP13PR_finish) ) /* count was zero; go to finish */
 
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
 
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
 
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
 
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
 
 
ALIGNTEXT32
XORPS( XMM0, XMM0 ) /* 0 | 0 | 0 | 0 */
MOVSS( M(0), XMM1 ) /* m0 */
MOVSS( M(14), XMM2 ) /* m14 */
 
ALIGNTEXT32
LLBL(K_GTP13PR_top):
MOVSS( S(0), XMM3 ) /* ox */
MULSS( XMM1, XMM3 ) /* ox*m0 */
MOVSS( XMM3, D(0) ) /* ox*m0->D(0) */
MOVSS( XMM2, D(2) ) /* m14->D(2) */
 
MOVSS( XMM0, D(1) )
MOVSS( XMM0, D(3) )
 
LLBL(K_GTP13PR_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP13PR_top) )
 
LLBL(K_GTP13PR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
 
 
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points1_2d)
GLNAME(_mesa_sse_transform_points1_2d):
 
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
 
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
 
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
 
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP13P2DR_finish) ) /* count was zero; go to finish */
 
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
 
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
 
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
 
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
 
ALIGNTEXT32
MOVLPS( M(0), XMM0 ) /* m1 | m0 */
MOVLPS( M(12), XMM1 ) /* m13 | m12 */
 
ALIGNTEXT32
LLBL(K_GTP13P2DR_top):
MOVSS( S(0), XMM2 ) /* ox */
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */
MULPS( XMM0, XMM2 ) /* - | - | ox*m1 | ox*m0 */
ADDPS( XMM1, XMM2 ) /* - | - | ox*m1+m13 | ox*m0+m12 */
MOVLPS( XMM2, D(0) )
 
LLBL(K_GTP13P2DR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP13P2DR_top) )
 
LLBL(K_GTP13P2DR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
 
 
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points1_2d_no_rot)
GLNAME(_mesa_sse_transform_points1_2d_no_rot):
 
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
 
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
 
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
 
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP13P2DNRR_finish) ) /* count was zero; go to finish */
 
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
 
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
 
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
 
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
 
ALIGNTEXT32
MOVSS( M(0), XMM0 ) /* m0 */
MOVSS( M(12), XMM1 ) /* m12 */
MOVSS( M(13), XMM2 ) /* m13 */
 
ALIGNTEXT32
LLBL(K_GTP13P2DNRR_top):
MOVSS( S(0), XMM3 ) /* ox */
MULSS( XMM0, XMM3 ) /* ox*m0 */
ADDSS( XMM1, XMM3 ) /* ox*m0+m12 */
MOVSS( XMM3, D(0) )
MOVSS( XMM2, D(1) )
 
LLBL(K_GTP13P2DNRR_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP13P2DNRR_top) )
 
LLBL(K_GTP13P2DNRR_finish):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
 
 
 
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points1_3d)
GLNAME(_mesa_sse_transform_points1_3d):
 
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
 
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
 
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
 
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP13P3DR_finish) ) /* count was zero; go to finish */
 
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
 
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
 
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
 
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
 
 
ALIGNTEXT32
MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */
MOVAPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */
 
ALIGNTEXT32
LLBL(K_GTP13P3DR_top):
MOVSS( S(0), XMM2 ) /* ox */
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */
MULPS( XMM0, XMM2 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
ADDPS( XMM1, XMM2 ) /* +m15 | +m14 | +m13 | +m12 */
MOVLPS( XMM2, D(0) ) /* - | - | ->D(1)| ->D(0)*/
UNPCKHPS( XMM2, XMM2 ) /* ox*m3+m15 | ox*m3+m15 | ox*m2+m14 | ox*m2+m14 */
MOVSS( XMM2, D(2) )
 
LLBL(K_GTP13P3DR_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP13P3DR_top) )
 
LLBL(K_GTP13P3DR_finish):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
/shark/trunk/ports/mesa/src/x86/sse_xform2.s
0,0 → 1,452
/* $Id: sse_xform2.s,v 1.1 2003-03-13 12:11:49 giacomo Exp $ */
 
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
 
/** TODO:
* - insert PREFETCH instructions to avoid cache-misses !
* - some more optimizations are possible...
* - for 40-50% more performance in the SSE-functions, the
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
*/
 
#include "matypes.h"
#include "xform_args.h"
 
SEG_TEXT
 
#define S(i) REGOFF(i * 4, ESI)
#define D(i) REGOFF(i * 4, EDI)
#define M(i) REGOFF(i * 4, EDX)
 
 
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points2_general)
GLNAME( _mesa_sse_transform_points2_general ):
 
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
 
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
 
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
 
TEST_L( ECX, ECX )
JZ( LLBL(K_GTP2GR_finish) ) /* count was zero; go to finish */
 
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
 
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
 
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
 
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
 
ALIGNTEXT32
MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */
MOVAPS( M(4), XMM1 ) /* m7 | m6 | m5 | m4 */
MOVAPS( M(12), XMM2 ) /* m15 | m14 | m13 | m12 */
 
ALIGNTEXT32
LLBL(K_GTP2GR_top):
MOVSS( S(0), XMM3 ) /* ox */
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox | ox | ox */
MULPS( XMM0, XMM3 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
MOVSS( S(1), XMM4 ) /* oy */
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* oy | oy | oy | oy */
MULPS( XMM1, XMM4 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */
 
ADDPS( XMM4, XMM3 )
ADDPS( XMM2, XMM3 )
MOVAPS( XMM3, D(0) )
 
LLBL(K_GTP2GR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP2GR_top) )
 
LLBL(K_GTP2GR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
 
 
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points2_identity)
GLNAME( _mesa_sse_transform_points2_identity ):
 
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
 
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
 
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
 
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP2IR_finish) ) /* count was zero; go to finish */
 
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
 
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
 
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
 
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
 
CMP_L( ESI, EDI )
JE( LLBL(K_GTP2IR_finish) )
 
 
ALIGNTEXT32
LLBL(K_GTP2IR_top):
MOV_L ( S(0), EDX )
MOV_L ( EDX, D(0) )
MOV_L ( S(1), EDX )
MOV_L ( EDX, D(1) )
 
LLBL(K_GTP2IR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP2IR_top) )
 
LLBL(K_GTP2IR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
 
 
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points2_3d_no_rot)
GLNAME(_mesa_sse_transform_points2_3d_no_rot):
 
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
 
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
 
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
 
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP23DNRR_finish) ) /* count was zero; go to finish */
 
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
 
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
 
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
 
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
 
 
ALIGNTEXT32
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */
MOVLPS ( M(12), XMM2 ) /* - | - | m13 | m12 */
MOVSS ( M(14), XMM3 ) /* - | - | - | m14 */
 
ALIGNTEXT32
LLBL(K_GTP23DNRR_top):
MOVLPS ( S(0), XMM0 ) /* - | - | oy | ox */
MULPS ( XMM1, XMM0 ) /* - | - | oy*m5 | ox*m0 */
ADDPS ( XMM2, XMM0 ) /* - | - | +m13 | +m12 */
MOVLPS ( XMM0, D(0) ) /* -> D(1) | -> D(0) */
 
MOVSS ( XMM3, D(2) ) /* -> D(2) */
 
LLBL(K_GTP23DNRR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP23DNRR_top) )
 
LLBL(K_GTP23DNRR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
 
 
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points2_perspective)
GLNAME(_mesa_sse_transform_points2_perspective):
 
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
 
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
 
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
 
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP23PR_finish) ) /* count was zero; go to finish */
 
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
 
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
 
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
 
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
 
ALIGNTEXT32
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */
MOVSS ( M(14), XMM3 ) /* m14 */
XORPS ( XMM0, XMM0 ) /* 0 | 0 | 0 | 0 */
 
ALIGNTEXT32
LLBL(K_GTP23PR_top):
MOVLPS( S(0), XMM4 ) /* oy | ox */
MULPS( XMM1, XMM4 ) /* oy*m5 | ox*m0 */
MOVLPS( XMM4, D(0) ) /* ->D(1) | ->D(0) */
MOVSS( XMM3, D(2) ) /* ->D(2) */
MOVSS( XMM0, D(3) ) /* ->D(3) */
 
LLBL(K_GTP23PR_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP23PR_top) )
 
LLBL(K_GTP23PR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
 
 
 
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points2_2d)
GLNAME(_mesa_sse_transform_points2_2d):
 
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
 
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
 
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
 
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP23P2DR_finish) ) /* count was zero; go to finish */
 
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
 
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
 
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
 
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
 
ALIGNTEXT32
MOVLPS( M(0), XMM0 ) /* m1 | m0 */
MOVLPS( M(4), XMM1 ) /* m5 | m4 */
MOVLPS( M(12), XMM2 ) /* m13 | m12 */
 
ALIGNTEXT32
LLBL(K_GTP23P2DR_top):
MOVSS( S(0), XMM3 ) /* ox */
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox */
MULPS( XMM0, XMM3 ) /* ox*m1 | ox*m0 */
 
MOVSS( S(1), XMM4 ) /* oy */
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* oy | oy */
MULPS( XMM1, XMM4 ) /* oy*m5 | oy*m4 */
 
ADDPS( XMM4, XMM3 )
ADDPS( XMM2, XMM3 )
MOVLPS( XMM3, D(0) ) /* ->D(1) | ->D(0) */
 
LLBL(K_GTP23P2DR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP23P2DR_top) )
 
LLBL(K_GTP23P2DR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
 
 
 
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points2_2d_no_rot)
GLNAME(_mesa_sse_transform_points2_2d_no_rot):
 
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
 
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
 
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
 
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP23P2DNRR_finish) ) /* count was zero; go to finish */
 
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
 
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
 
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
 
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
 
ALIGNTEXT32
MOVSS ( M(0), XMM1 ) /* m0 */
MOVSS ( M(5), XMM2 ) /* m5 */
UNPCKLPS ( XMM2, XMM1 ) /* m5 | m0 */
MOVLPS ( M(12), XMM2 ) /* m13 | m12 */
 
ALIGNTEXT32
LLBL(K_GTP23P2DNRR_top):
MOVLPS( S(0), XMM0 ) /* oy | ox */
MULPS( XMM1, XMM0 ) /* oy*m5 | ox*m0 */
ADDPS( XMM2, XMM0 ) /* +m13 | +m12 */
MOVLPS( XMM0, D(0) ) /* ->D(1) | ->D(0) */
 
LLBL(K_GTP23P2DNRR_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP23P2DNRR_top) )
 
LLBL(K_GTP23P2DNRR_finish):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
 
 
 
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points2_3d)
GLNAME(_mesa_sse_transform_points2_3d):
 
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
 
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
 
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
 
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP23P3DR_finish) ) /* count was zero; go to finish */
 
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
 
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
 
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
 
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
 
ALIGNTEXT32
MOVAPS( M(0), XMM0 ) /* m2 | m1 | m0 */
MOVAPS( M(4), XMM1 ) /* m6 | m5 | m4 */
MOVAPS( M(12), XMM2 ) /* m14 | m13 | m12 */
 
ALIGNTEXT32
LLBL(K_GTP23P3DR_top):
MOVSS( S(0), XMM3 ) /* ox */
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox | ox */
MULPS( XMM0, XMM3 ) /* ox*m2 | ox*m1 | ox*m0 */
 
MOVSS( S(1), XMM4 ) /* oy */
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* oy | oy | oy */
MULPS( XMM1, XMM4 ) /* oy*m6 | oy*m5 | oy*m4 */
 
ADDPS( XMM4, XMM3 )
ADDPS( XMM2, XMM3 )
 
MOVLPS( XMM3, D(0) ) /* ->D(1) | ->D(0) */
UNPCKHPS( XMM3, XMM3 )
MOVSS( XMM3, D(2) ) /* ->D(2) */
 
LLBL(K_GTP23P3DR_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP23P3DR_top) )
 
LLBL(K_GTP23P3DR_finish):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
/shark/trunk/ports/mesa/src/x86/sse_xform3.s
0,0 → 1,498
/* $Id: sse_xform3.s,v 1.1 2003-03-13 12:11:49 giacomo Exp $ */
 
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
 
/** TODO:
* - insert PREFETCH instructions to avoid cache-misses !
* - some more optimizations are possible...
* - for 40-50% more performance in the SSE-functions, the
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
*/
 
#include "matypes.h"
#include "xform_args.h"
 
SEG_TEXT
 
#define S(i) REGOFF(i * 4, ESI)
#define D(i) REGOFF(i * 4, EDI)
#define M(i) REGOFF(i * 4, EDX)
 
 
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points3_general)
GLNAME( _mesa_sse_transform_points3_general ):
 
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
 
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
 
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
 
CMP_L ( CONST(0), ECX ) /* count == 0 ? */
JE ( LLBL(K_GTPGR_finish) ) /* yes -> nothing to do. */
 
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
 
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
 
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
 
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
 
 
ALIGNTEXT32
MOVAPS ( REGOFF(0, EDX), XMM0 ) /* m0 | m1 | m2 | m3 */
MOVAPS ( REGOFF(16, EDX), XMM1 ) /* m4 | m5 | m6 | m7 */
MOVAPS ( REGOFF(32, EDX), XMM2 ) /* m8 | m9 | m10 | m11 */
MOVAPS ( REGOFF(48, EDX), XMM3 ) /* m12 | m13 | m14 | m15 */
 
 
ALIGNTEXT32
LLBL(K_GTPGR_top):
MOVSS ( REGOFF(0, ESI), XMM4 ) /* | | | ox */
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox | ox */
MOVSS ( REGOFF(4, ESI), XMM5 ) /* | | | oy */
SHUFPS ( CONST(0x0), XMM5, XMM5 ) /* oy | oy | oy | oy */
MOVSS ( REGOFF(8, ESI), XMM6 ) /* | | | oz */
SHUFPS ( CONST(0x0), XMM6, XMM6 ) /* oz | oz | oz | oz */
 
MULPS ( XMM0, XMM4 ) /* m3*ox | m2*ox | m1*ox | m0*ox */
MULPS ( XMM1, XMM5 ) /* m7*oy | m6*oy | m5*oy | m4*oy */
MULPS ( XMM2, XMM6 ) /* m11*oz | m10*oz | m9*oz | m8*oz */
 
ADDPS ( XMM5, XMM4 )
ADDPS ( XMM6, XMM4 )
ADDPS ( XMM3, XMM4 )
 
MOVAPS ( XMM4, REGOFF(0, EDI) )
 
LLBL(K_GTPGR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTPGR_top) )
 
LLBL(K_GTPGR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
 
 
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points3_identity)
GLNAME( _mesa_sse_transform_points3_identity ):
 
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
 
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
 
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
 
TEST_L( ECX, ECX)
JZ( LLBL(K_GTPIR_finish) ) /* count was zero; go to finish */
 
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
 
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
 
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
 
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
 
CMP_L( ESI, EDI )
JE( LLBL(K_GTPIR_finish) )
 
 
ALIGNTEXT32
LLBL(K_GTPIR_top):
MOVLPS ( S(0), XMM0 )
MOVLPS ( XMM0, D(0) )
MOVSS ( S(2), XMM0 )
MOVSS ( XMM0, D(2) )
 
LLBL(K_GTPIR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTPIR_top) )
 
LLBL(K_GTPIR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
 
 
 
 
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points3_3d_no_rot)
GLNAME(_mesa_sse_transform_points3_3d_no_rot):
 
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
 
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
 
 
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
 
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP3DNRR_finish) ) /* count was zero; go to finish */
 
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
 
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
 
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
 
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
 
 
ALIGNTEXT32
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */
MOVLPS ( M(12), XMM2 ) /* - | - | m13 | m12 */
MOVSS ( M(10), XMM3 ) /* - | - | - | m10 */
MOVSS ( M(14), XMM4 ) /* - | - | - | m14 */
 
ALIGNTEXT32
LLBL(K_GTP3DNRR_top):
 
MOVLPS ( S(0), XMM0 ) /* - | - | s1 | s0 */
MULPS ( XMM1, XMM0 ) /* - | - | s1*m5 | s0*m0 */
ADDPS ( XMM2, XMM0 ) /* - | - | +m13 | +m12 */
MOVLPS ( XMM0, D(0) ) /* -> D(1) | -> D(0) */
 
MOVSS ( S(2), XMM0 ) /* sz */
MULSS ( XMM3, XMM0 ) /* sz*m10 */
ADDSS ( XMM4, XMM0 ) /* +m14 */
MOVSS ( XMM0, D(2) ) /* -> D(2) */
 
LLBL(K_GTP3DNRR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP3DNRR_top) )
 
LLBL(K_GTP3DNRR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
 
 
 
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points3_perspective)
GLNAME(_mesa_sse_transform_points3_perspective):
 
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
 
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
 
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
 
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP3PR_finish) ) /* count was zero; go to finish */
 
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
 
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
 
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
 
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
 
ALIGNTEXT32
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */
MOVLPS ( M(8), XMM2 ) /* - | - | m9 | m8 */
MOVSS ( M(10), XMM3 ) /* m10 */
MOVSS ( M(14), XMM4 ) /* m14 */
XORPS ( XMM6, XMM6 ) /* 0 */
 
ALIGNTEXT32
LLBL(K_GTP3PR_top):
MOVLPS ( S(0), XMM0 ) /* oy | ox */
MULPS ( XMM1, XMM0 ) /* oy*m5 | ox*m0 */
MOVSS ( S(2), XMM5 ) /* oz */
SHUFPS ( CONST(0x0), XMM5, XMM5 ) /* oz | oz */
MULPS ( XMM2, XMM5 ) /* oz*m9 | oz*m8 */
ADDPS ( XMM5, XMM0 ) /* +oy*m5 | +ox*m0 */
MOVLPS ( XMM0, D(0) ) /* ->D(1) | ->D(0) */
 
MOVSS ( S(2), XMM0 ) /* oz */
MULSS ( XMM3, XMM0 ) /* oz*m10 */
ADDSS ( XMM4, XMM0 ) /* +m14 */
MOVSS ( XMM0, D(2) ) /* ->D(2) */
 
MOVSS ( S(2), XMM0 ) /* oz */
MOVSS ( XMM6, XMM5 ) /* 0 */
SUBPS ( XMM0, XMM5 ) /* -oz */
MOVSS ( XMM5, D(3) ) /* ->D(3) */
 
LLBL(K_GTP3PR_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP3PR_top) )
 
LLBL(K_GTP3PR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
 
 
 
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points3_2d)
GLNAME(_mesa_sse_transform_points3_2d):
 
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
 
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
 
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
 
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP3P2DR_finish) ) /* count was zero; go to finish */
 
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
 
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
 
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
 
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
 
ALIGNTEXT32
MOVLPS( M(0), XMM0 ) /* m1 | m0 */
MOVLPS( M(4), XMM1 ) /* m5 | m4 */
MOVLPS( M(12), XMM2 ) /* m13 | m12 */
 
ALIGNTEXT32
LLBL(K_GTP3P2DR_top):
MOVSS ( S(0), XMM3 ) /* ox */
SHUFPS ( CONST(0x0), XMM3, XMM3 ) /* ox | ox */
MULPS ( XMM0, XMM3 ) /* ox*m1 | ox*m0 */
MOVSS ( S(1), XMM4 ) /* oy */
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* oy | oy */
MULPS ( XMM1, XMM4 ) /* oy*m5 | oy*m4 */
 
ADDPS ( XMM4, XMM3 )
ADDPS ( XMM2, XMM3 )
MOVLPS ( XMM3, D(0) )
 
MOVSS ( S(2), XMM3 )
MOVSS ( XMM3, D(2) )
 
LLBL(K_GTP3P2DR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP3P2DR_top) )
 
LLBL(K_GTP3P2DR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
 
 
 
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points3_2d_no_rot)
GLNAME(_mesa_sse_transform_points3_2d_no_rot):
 
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
 
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
 
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
 
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP3P2DNRR_finish) ) /* count was zero; go to finish */
 
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
 
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
 
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
 
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
 
ALIGNTEXT32
MOVSS ( M(0), XMM1 ) /* m0 */
MOVSS ( M(5), XMM2 ) /* m5 */
UNPCKLPS ( XMM2, XMM1 ) /* m5 | m0 */
MOVLPS ( M(12), XMM2 ) /* m13 | m12 */
 
ALIGNTEXT32
LLBL(K_GTP3P2DNRR_top):
MOVLPS( S(0), XMM0 ) /* oy | ox */
MULPS( XMM1, XMM0 ) /* oy*m5 | ox*m0 */
ADDPS( XMM2, XMM0 ) /* +m13 | +m12 */
MOVLPS( XMM0, D(0) ) /* ->D(1) | ->D(0) */
 
MOVSS( S(2), XMM0 )
MOVSS( XMM0, D(2) )
 
LLBL(K_GTP3P2DNRR_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP3P2DNRR_top) )
 
LLBL(K_GTP3P2DNRR_finish):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
 
 
 
 
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points3_3d)
GLNAME(_mesa_sse_transform_points3_3d):
 
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
 
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
 
 
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
 
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP3P3DR_finish) ) /* count was zero; go to finish */
 
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
 
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
 
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
 
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
 
 
ALIGNTEXT32
MOVAPS( M(0), XMM0 ) /* m2 | m1 | m0 */
MOVAPS( M(4), XMM1 ) /* m6 | m5 | m4 */
MOVAPS( M(8), XMM2 ) /* m10 | m9 | m8 */
MOVAPS( M(12), XMM3 ) /* m14 | m13 | m12 */
 
ALIGNTEXT32
LLBL(K_GTP3P3DR_top):
MOVSS( S(0), XMM4 )
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox */
MULPS( XMM0, XMM4 ) /* ox*m2 | ox*m1 | ox*m0 */
 
MOVSS( S(1), XMM5 )
SHUFPS( CONST(0x0), XMM5, XMM5 ) /* oy | oy | oy */
MULPS( XMM1, XMM5 ) /* oy*m6 | oy*m5 | oy*m4 */
 
MOVSS( S(2), XMM6 )
SHUFPS( CONST(0x0), XMM6, XMM6 ) /* oz | oz | oz */
MULPS( XMM2, XMM6 ) /* oz*m10 | oz*m9 | oz*m8 */
 
ADDPS( XMM5, XMM4 ) /* + | + | + */
ADDPS( XMM6, XMM4 ) /* + | + | + */
ADDPS( XMM3, XMM4 ) /* + | + | + */
 
MOVLPS( XMM4, D(0) ) /* => D(1) | => D(0) */
UNPCKHPS( XMM4, XMM4 )
MOVSS( XMM4, D(2) )
 
LLBL(K_GTP3P3DR_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP3P3DR_top) )
 
LLBL(K_GTP3P3DR_finish):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
/shark/trunk/ports/mesa/src/x86/sse_xform4.s
0,0 → 1,226
/* $Id: sse_xform4.s,v 1.1 2003-03-13 12:11:49 giacomo Exp $ */
 
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "matypes.h"
#include "xform_args.h"
 
SEG_TEXT
 
#define FRAME_OFFSET 8
 
#define SRC(i) REGOFF(i * 4, ESI)
#define DST(i) REGOFF(i * 4, EDI)
#define MAT(i) REGOFF(i * 4, EDX)
 
#define SELECT(r0, r1, r2, r3) CONST( r0 * 64 + r1 * 16 + r2 * 4 + r3 )
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_sse_transform_points4_general )
GLNAME( _mesa_sse_transform_points4_general ):
 
PUSH_L( ESI )
PUSH_L( EDI )
 
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
 
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
 
TEST_L( ECX, ECX ) /* verify non-zero count */
JE( LLBL( sse_general_done ) )
 
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
 
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )/* set dest size */
 
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
 
PREFETCHT0( REGIND(ESI) )
 
MOVAPS( MAT(0), XMM4 ) /* m3 | m2 | m1 | m0 */
MOVAPS( MAT(4), XMM5 ) /* m7 | m6 | m5 | m4 */
MOVAPS( MAT(8), XMM6 ) /* m11 | m10 | m9 | m8 */
MOVAPS( MAT(12), XMM7 ) /* m15 | m14 | m13 | m12 */
 
ALIGNTEXT16
LLBL( sse_general_loop ):
 
MOVSS( SRC(0), XMM0 ) /* ox */
SHUFPS( CONST(0x0), XMM0, XMM0 ) /* ox | ox | ox | ox */
MULPS( XMM4, XMM0 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
 
MOVSS( SRC(1), XMM1 ) /* oy */
SHUFPS( CONST(0x0), XMM1, XMM1 ) /* oy | oy | oy | oy */
MULPS( XMM5, XMM1 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */
 
MOVSS( SRC(2), XMM2 ) /* oz */
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* oz | oz | oz | oz */
MULPS( XMM6, XMM2 ) /* oz*m11 | oz*m10 | oz*m9 | oz*m8 */
 
MOVSS( SRC(3), XMM3 ) /* ow */
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ow | ow | ow | ow */
MULPS( XMM7, XMM3 ) /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
 
ADDPS( XMM1, XMM0 ) /* ox*m3+oy*m7 | ... */
ADDPS( XMM2, XMM0 ) /* ox*m3+oy*m7+oz*m11 | ... */
ADDPS( XMM3, XMM0 ) /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
MOVAPS( XMM0, DST(0) ) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
 
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
 
DEC_L( ECX )
JNZ( LLBL( sse_general_loop ) )
 
LLBL( sse_general_done ):
 
POP_L( EDI )
POP_L( ESI )
RET
 
 
 
 
ALIGNTEXT4
GLOBL GLNAME( _mesa_sse_transform_points4_3d )
GLNAME( _mesa_sse_transform_points4_3d ):
 
PUSH_L( ESI )
PUSH_L( EDI )
 
MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */
MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */
 
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
 
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP43P3DR_finish) ) /* count was zero; go to finish */
 
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
 
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )/* set dest size */
 
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
 
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
 
MOVAPS( MAT(0), XMM0 ) /* m3 | m2 | m1 | m0 */
MOVAPS( MAT(4), XMM1 ) /* m7 | m6 | m5 | m4 */
MOVAPS( MAT(8), XMM2 ) /* m11 | m10 | m9 | m8 */
MOVAPS( MAT(12), XMM3 ) /* m15 | m14 | m13 | m12 */
 
ALIGNTEXT32
LLBL( K_GTP43P3DR_top ):
MOVSS( SRC(0), XMM4 ) /* ox */
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox | ox */
MULPS( XMM0, XMM4 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
 
MOVSS( SRC(1), XMM5 ) /* oy */
SHUFPS( CONST(0x0), XMM5, XMM5 ) /* oy | oy | oy | oy */
MULPS( XMM1, XMM5 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */
 
MOVSS( SRC(2), XMM6 ) /* oz */
SHUFPS( CONST(0x0), XMM6, XMM6 ) /* oz | oz | oz | oz */
MULPS( XMM2, XMM6 ) /* oz*m11 | oz*m10 | oz*m9 | oz*m8 */
 
MOVSS( SRC(3), XMM7 ) /* ow */
SHUFPS( CONST(0x0), XMM7, XMM7 ) /* ow | ow | ow | ow */
MULPS( XMM3, XMM7 ) /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
 
ADDPS( XMM5, XMM4 ) /* ox*m3+oy*m7 | ... */
ADDPS( XMM6, XMM4 ) /* ox*m3+oy*m7+oz*m11 | ... */
ADDPS( XMM7, XMM4 ) /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
MOVAPS( XMM4, DST(0) ) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
 
MOVSS( SRC(3), XMM4 ) /* ow */
MOVSS( XMM4, DST(3) ) /* ->D(3) */
 
LLBL( K_GTP43P3DR_skip ):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP43P3DR_top) )
 
LLBL( K_GTP43P3DR_finish ):
POP_L( EDI )
POP_L( ESI )
RET
 
 
ALIGNTEXT16
GLOBL GLNAME( _mesa_sse_transform_points4_identity )
GLNAME( _mesa_sse_transform_points4_identity ):
 
PUSH_L( ESI )
PUSH_L( EDI )
 
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
 
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
 
TEST_L( ECX, ECX ) /* verify non-zero count */
JE( LLBL( sse_identity_done ) )
 
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
 
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )/* set dest size */
 
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
 
ALIGNTEXT16
LLBL( sse_identity_loop ):
 
PREFETCHNTA( REGOFF(32, ESI) )
 
MOVAPS( REGIND(ESI), XMM0 )
ADD_L( EAX, ESI )
 
MOVAPS( XMM0, REGIND(EDI) )
ADD_L( CONST(16), EDI )
 
DEC_L( ECX )
JNZ( LLBL( sse_identity_loop ) )
 
LLBL( sse_identity_done ):
 
POP_L( EDI )
POP_L( ESI )
RET
/shark/trunk/ports/mesa/src/x86/sse_normal.s
0,0 → 1,252
/* $Id: sse_normal.s,v 1.1 2003-03-13 12:11:49 giacomo Exp $ */
 
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
 
/** TODO:
* - insert PREFETCH instructions to avoid cache-misses !
* - some more optimizations are possible...
* - for 40-50% more performance in the SSE-functions, the
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
*/
 
#include "matypes.h"
#include "norm_args.h"
 
SEG_TEXT
 
#define M(i) REGOFF(i * 4, EDX)
#define S(i) REGOFF(i * 4, ESI)
#define D(i) REGOFF(i * 4, EDI)
#define STRIDE REGOFF(12, ESI)
 
 
ALIGNTEXT16
GLOBL GLNAME(_mesa_sse_transform_rescale_normals_no_rot)
GLNAME(_mesa_sse_transform_rescale_normals_no_rot):
 
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
 
MOV_L ( ARG_IN, ESI ) /* ptr to source GLvector3f */
MOV_L ( ARG_DEST, EDI ) /* ptr to dest GLvector3f */
 
MOV_L ( ARG_MAT, EDX ) /* ptr to matrix */
MOV_L ( REGOFF(MATRIX_INV, EDX), EDX) /* matrix->inv */
 
MOV_L ( REGOFF(V3F_COUNT, ESI), ECX ) /* source count */
 
TEST_L ( ECX, ECX )
JZ( LLBL(K_G3TRNNRR_finish) ) /* count was zero; go to finish */
 
MOV_L ( STRIDE, EAX ) /* stride */
MOV_L ( ECX, REGOFF(V3F_COUNT, EDI) ) /* set dest-count */
 
IMUL_L( CONST(16), ECX ) /* count *= 16 */
MOV_L( REGOFF(V3F_START, ESI), ESI ) /* ptr to first source vertex */
 
MOV_L( REGOFF(V3F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
 
ALIGNTEXT32
MOVSS ( M(0), XMM1 ) /* m0 */
MOVSS ( M(5), XMM2 ) /* m5 */
UNPCKLPS( XMM2, XMM1 ) /* m5 | m0 */
MOVSS ( ARG_SCALE, XMM0 ) /* scale */
SHUFPS ( CONST(0x0), XMM0, XMM0 ) /* scale | scale */
MULPS ( XMM0, XMM1 ) /* m5*scale | m0*scale */
MULSS ( M(10), XMM0 ) /* m10*scale */
 
ALIGNTEXT32
LLBL(K_G3TRNNRR_top):
MOVLPS ( S(0), XMM2 ) /* uy | ux */
MULPS ( XMM1, XMM2 ) /* uy*m5*scale | ux*m0*scale */
MOVLPS ( XMM2, D(0) ) /* ->D(1) | D(0) */
 
MOVSS ( S(2), XMM2 ) /* uz */
MULSS ( XMM0, XMM2 ) /* uz*m10*scale */
MOVSS ( XMM2, D(2) ) /* ->D(2) */
 
LLBL(K_G3TRNNRR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_G3TRNNRR_top) )
 
LLBL(K_G3TRNNRR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
 
 
 
ALIGNTEXT16
GLOBL GLNAME(_mesa_sse_transform_rescale_normals)
GLNAME(_mesa_sse_transform_rescale_normals):
 
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
 
MOV_L ( ARG_IN, ESI ) /* ptr to source GLvector3f */
MOV_L ( ARG_DEST, EDI ) /* ptr to dest GLvector3f */
 
MOV_L ( ARG_MAT, EDX ) /* ptr to matrix */
MOV_L ( REGOFF(MATRIX_INV, EDX), EDX) /* matrix->inv */
 
MOV_L ( REGOFF(V3F_COUNT, ESI), ECX ) /* source count */
 
TEST_L ( ECX, ECX )
JZ( LLBL(K_G3TRNR_finish) ) /* count was zero; go to finish */
 
MOV_L ( STRIDE, EAX ) /* stride */
MOV_L ( ECX, REGOFF(V3F_COUNT, EDI) ) /* set dest-count */
 
IMUL_L( CONST(16), ECX ) /* count *= 16 */
MOV_L( REGOFF(V3F_START, ESI), ESI ) /* ptr to first source vertex */
 
MOV_L( REGOFF(V3F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
 
ALIGNTEXT32
MOVSS ( M(0), XMM0 ) /* m0 */
MOVSS ( M(4), XMM1 ) /* m4 */
UNPCKLPS( XMM1, XMM0 ) /* m4 | m0 */
 
MOVSS ( ARG_SCALE, XMM4 ) /* scale */
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* scale | scale */
 
MULPS ( XMM4, XMM0 ) /* m4*scale | m0*scale */
MOVSS ( M(1), XMM1 ) /* m1 */
MOVSS ( M(5), XMM2 ) /* m5 */
UNPCKLPS( XMM2, XMM1 ) /* m5 | m1 */
MULPS ( XMM4, XMM1 ) /* m5*scale | m1*scale */
MOVSS ( M(2), XMM2 ) /* m2 */
MOVSS ( M(6), XMM3 ) /* m6 */
UNPCKLPS( XMM3, XMM2 ) /* m6 | m2 */
MULPS ( XMM4, XMM2 ) /* m6*scale | m2*scale */
 
MOVSS ( M(8), XMM6 ) /* m8 */
MULSS ( ARG_SCALE, XMM6 ) /* m8*scale */
MOVSS ( M(9), XMM7 ) /* m9 */
MULSS ( ARG_SCALE, XMM7 ) /* m9*scale */
 
ALIGNTEXT32
LLBL(K_G3TRNR_top):
MOVSS ( S(0), XMM3 ) /* ux */
SHUFPS ( CONST(0x0), XMM3, XMM3 ) /* ux | ux */
MULPS ( XMM0, XMM3 ) /* ux*m4 | ux*m0 */
MOVSS ( S(1), XMM4 ) /* uy */
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* uy | uy */
MULPS ( XMM1, XMM4 ) /* uy*m5 | uy*m1 */
MOVSS ( S(2), XMM5 ) /* uz */
SHUFPS ( CONST(0x0), XMM5, XMM5 ) /* uz | uz */
MULPS ( XMM2, XMM5 ) /* uz*m6 | uz*m2 */
 
ADDPS ( XMM4, XMM3 )
ADDPS ( XMM5, XMM3 )
MOVLPS ( XMM3, D(0) )
 
MOVSS ( M(10), XMM3 ) /* m10 */
MULSS ( ARG_SCALE, XMM3 ) /* m10*scale */
MULSS ( S(2), XMM3 ) /* m10*scale*uz */
MOVSS ( S(1), XMM4 ) /* uy */
MULSS ( XMM7, XMM4 ) /* uy*m9*scale */
MOVSS ( S(0), XMM5 ) /* ux */
MULSS ( XMM6, XMM5 ) /* ux*m8*scale */
 
ADDSS ( XMM4, XMM3 )
ADDSS ( XMM5, XMM3 )
MOVSS ( XMM3, D(2) )
 
LLBL(K_G3TRNR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_G3TRNR_top) )
 
LLBL(K_G3TRNR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
 
 
ALIGNTEXT16
GLOBL GLNAME(_mesa_sse_transform_normals_no_rot)
GLNAME(_mesa_sse_transform_normals_no_rot):
 
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
 
MOV_L ( ARG_IN, ESI ) /* ptr to source GLvector3f */
MOV_L ( ARG_DEST, EDI ) /* ptr to dest GLvector3f */
 
MOV_L ( ARG_MAT, EDX ) /* ptr to matrix */
MOV_L ( REGOFF(MATRIX_INV, EDX), EDX) /* matrix->inv */
 
MOV_L ( REGOFF(V3F_COUNT, ESI), ECX ) /* source count */
 
TEST_L ( ECX, ECX )
JZ( LLBL(K_G3TNNRR_finish) ) /* count was zero; go to finish */
 
MOV_L ( STRIDE, EAX ) /* stride */
MOV_L ( ECX, REGOFF(V3F_COUNT, EDI) ) /* set dest-count */
 
IMUL_L( CONST(16), ECX ) /* count *= 16 */
MOV_L( REGOFF(V3F_START, ESI), ESI ) /* ptr to first source vertex */
 
MOV_L( REGOFF(V3F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
 
ALIGNTEXT32
MOVSS( M(0), XMM0 ) /* m0 */
MOVSS( M(5), XMM1 ) /* m5 */
UNPCKLPS( XMM1, XMM0 ) /* m5 | m0 */
MOVSS( M(10), XMM1 ) /* m10 */
 
ALIGNTEXT32
LLBL(K_G3TNNRR_top):
MOVLPS( S(0), XMM2 ) /* uy | ux */
MULPS( XMM0, XMM2 ) /* uy*m5 | ux*m0 */
MOVLPS( XMM2, D(0) )
 
MOVSS( S(2), XMM2 ) /* uz */
MULSS( XMM1, XMM2 ) /* uz*m10 */
MOVSS( XMM2, D(2) )
 
LLBL(K_G3TNNRR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_G3TNNRR_top) )
 
LLBL(K_G3TNNRR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
/shark/trunk/ports/mesa/src/x86/sse.c
0,0 → 1,119
/* $Id: sse.c,v 1.1 2003-03-13 12:11:49 giacomo Exp $ */
 
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
 
/*
* PentiumIII-SIMD (SSE) optimizations contributed by
* Andre Werthmann <wertmann@cs.uni-potsdam.de>
*/
 
#include "glheader.h"
#include "context.h"
#include "math/m_xform.h"
#include "tnl/t_context.h"
 
#include "sse.h"
#include "common_x86_macros.h"
 
#ifdef DEBUG
#include "math/m_debug.h"
#endif
 
 
#ifdef USE_SSE_ASM
DECLARE_XFORM_GROUP( sse, 2 )
DECLARE_XFORM_GROUP( sse, 3 )
 
#if 1
/* Some functions are not written in SSE-assembly, because the fpu ones are faster */
extern void _mesa_sse_transform_normals_no_rot( NORM_ARGS );
extern void _mesa_sse_transform_rescale_normals( NORM_ARGS );
extern void _mesa_sse_transform_rescale_normals_no_rot( NORM_ARGS );
 
extern void _mesa_sse_transform_points4_general( XFORM_ARGS );
extern void _mesa_sse_transform_points4_3d( XFORM_ARGS );
extern void _mesa_sse_transform_points4_identity( XFORM_ARGS );
#else
DECLARE_NORM_GROUP( sse )
#endif
 
 
extern void _ASMAPI
_mesa_v16_sse_general_xform( GLfloat *first_vert,
const GLfloat *m,
const GLfloat *src,
GLuint src_stride,
GLuint count );
 
extern void _ASMAPI
_mesa_sse_project_vertices( GLfloat *first,
GLfloat *last,
const GLfloat *m,
GLuint stride );
 
extern void _ASMAPI
_mesa_sse_project_clipped_vertices( GLfloat *first,
GLfloat *last,
const GLfloat *m,
GLuint stride,
const GLubyte *clipmask );
#endif
 
 
void _mesa_init_sse_transform_asm( void )
{
#ifdef USE_SSE_ASM
ASSIGN_XFORM_GROUP( sse, 2 );
ASSIGN_XFORM_GROUP( sse, 3 );
 
#if 1
/* TODO: Finish these off.
*/
_mesa_transform_tab[4][MATRIX_GENERAL] =
_mesa_sse_transform_points4_general;
_mesa_transform_tab[4][MATRIX_3D] =
_mesa_sse_transform_points4_3d;
_mesa_transform_tab[4][MATRIX_IDENTITY] =
_mesa_sse_transform_points4_identity;
 
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT] =
_mesa_sse_transform_normals_no_rot;
_mesa_normal_tab[NORM_TRANSFORM | NORM_RESCALE] =
_mesa_sse_transform_rescale_normals;
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_RESCALE] =
_mesa_sse_transform_rescale_normals_no_rot;
#else
ASSIGN_XFORM_GROUP( sse, 4 );
 
ASSIGN_NORM_GROUP( sse );
#endif
 
#ifdef DEBUG
_math_test_all_transform_functions( "SSE" );
_math_test_all_normal_transform_functions( "SSE" );
#endif
#endif
}
 
/shark/trunk/ports/mesa/src/tnl/t_vb_gentex.c
File deleted
/shark/trunk/ports/mesa/src/tnl/t_import_array.h
File deleted
/shark/trunk/ports/mesa/src/tnl/t_import_array.c
File deleted
/shark/trunk/ports/mesa/src/tnl/t_array_import.c
0,0 → 1,432
/* $Id: t_array_import.c,v 1.1 2003-02-28 11:48:06 pj Exp $ */
 
/*
* Mesa 3-D graphics library
* Version: 4.1
*
* Copyright (C) 1999-2002 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
 
#include "glheader.h"
#include "context.h"
#include "macros.h"
#include "imports.h"
#include "mmath.h"
#include "state.h"
#include "mtypes.h"
 
#include "array_cache/acache.h"
#include "math/m_translate.h"
 
#include "t_array_import.h"
#include "t_context.h"
#include "t_imm_debug.h"
 
 
static void _tnl_import_vertex( GLcontext *ctx,
GLboolean writeable,
GLboolean stride )
{
struct gl_client_array *tmp;
GLboolean is_writeable = 0;
struct vertex_arrays *inputs = &TNL_CONTEXT(ctx)->array_inputs;
 
tmp = _ac_import_vertex(ctx,
GL_FLOAT,
stride ? 4*sizeof(GLfloat) : 0,
0,
writeable,
&is_writeable);
 
inputs->Obj.data = (GLfloat (*)[4]) tmp->Ptr;
inputs->Obj.start = (GLfloat *) tmp->Ptr;
inputs->Obj.stride = tmp->StrideB;
inputs->Obj.size = tmp->Size;
inputs->Obj.flags &= ~(VEC_BAD_STRIDE|VEC_NOT_WRITEABLE);
if (inputs->Obj.stride != 4*sizeof(GLfloat))
inputs->Obj.flags |= VEC_BAD_STRIDE;
if (!is_writeable)
inputs->Obj.flags |= VEC_NOT_WRITEABLE;
}
 
static void _tnl_import_normal( GLcontext *ctx,
GLboolean writeable,
GLboolean stride )
{
struct gl_client_array *tmp;
GLboolean is_writeable = 0;
struct vertex_arrays *inputs = &TNL_CONTEXT(ctx)->array_inputs;
 
tmp = _ac_import_normal(ctx, GL_FLOAT,
stride ? 3*sizeof(GLfloat) : 0, writeable,
&is_writeable);
 
inputs->Normal.data = (GLfloat (*)[4]) tmp->Ptr;
inputs->Normal.start = (GLfloat *) tmp->Ptr;
inputs->Normal.stride = tmp->StrideB;
inputs->Normal.flags &= ~(VEC_BAD_STRIDE|VEC_NOT_WRITEABLE);
if (inputs->Normal.stride != 3*sizeof(GLfloat))
inputs->Normal.flags |= VEC_BAD_STRIDE;
if (!is_writeable)
inputs->Normal.flags |= VEC_NOT_WRITEABLE;
}
 
 
static void _tnl_import_color( GLcontext *ctx,
GLenum type,
GLboolean writeable,
GLboolean stride )
{
struct gl_client_array *tmp;
GLboolean is_writeable = 0;
struct vertex_arrays *inputs = &TNL_CONTEXT(ctx)->array_inputs;
 
tmp = _ac_import_color(ctx,
type,
stride ? 4*sizeof(GLfloat) : 0,
4,
writeable,
&is_writeable);
 
inputs->Color = *tmp;
}
 
 
static void _tnl_import_secondarycolor( GLcontext *ctx,
GLenum type,
GLboolean writeable,
GLboolean stride )
{
struct gl_client_array *tmp;
GLboolean is_writeable = 0;
struct vertex_arrays *inputs = &TNL_CONTEXT(ctx)->array_inputs;
 
tmp = _ac_import_secondarycolor(ctx,
type,
stride ? 4*sizeof(GLfloat) : 0,
4,
writeable,
&is_writeable);
 
inputs->SecondaryColor = *tmp;
}
 
static void _tnl_import_fogcoord( GLcontext *ctx,
GLboolean writeable,
GLboolean stride )
{
struct vertex_arrays *inputs = &TNL_CONTEXT(ctx)->array_inputs;
struct gl_client_array *tmp;
GLboolean is_writeable = 0;
 
tmp = _ac_import_fogcoord(ctx, GL_FLOAT,
stride ? sizeof(GLfloat) : 0, writeable,
&is_writeable);
 
inputs->FogCoord.data = (GLfloat (*)[4]) tmp->Ptr;
inputs->FogCoord.start = (GLfloat *) tmp->Ptr;
inputs->FogCoord.stride = tmp->StrideB;
inputs->FogCoord.flags &= ~(VEC_BAD_STRIDE|VEC_NOT_WRITEABLE);
if (inputs->FogCoord.stride != sizeof(GLfloat))
inputs->FogCoord.flags |= VEC_BAD_STRIDE;
if (!is_writeable)
inputs->FogCoord.flags |= VEC_NOT_WRITEABLE;
}
 
static void _tnl_import_index( GLcontext *ctx,
GLboolean writeable,
GLboolean stride )
{
struct vertex_arrays *inputs = &TNL_CONTEXT(ctx)->array_inputs;
struct gl_client_array *tmp;
GLboolean is_writeable = 0;
 
tmp = _ac_import_index(ctx, GL_UNSIGNED_INT,
stride ? sizeof(GLuint) : 0, writeable,
&is_writeable);
 
inputs->Index.data = (GLuint *) tmp->Ptr;
inputs->Index.start = (GLuint *) tmp->Ptr;
inputs->Index.stride = tmp->StrideB;
inputs->Index.flags &= ~(VEC_BAD_STRIDE|VEC_NOT_WRITEABLE);
if (inputs->Index.stride != sizeof(GLuint))
inputs->Index.flags |= VEC_BAD_STRIDE;
if (!is_writeable)
inputs->Index.flags |= VEC_NOT_WRITEABLE;
}
 
 
static void _tnl_import_texcoord( GLcontext *ctx,
GLuint unit,
GLboolean writeable,
GLboolean stride )
{
struct vertex_arrays *inputs = &TNL_CONTEXT(ctx)->array_inputs;
struct gl_client_array *tmp;
GLboolean is_writeable = 0;
 
tmp = _ac_import_texcoord(ctx, unit, GL_FLOAT,
stride ? 4 * sizeof(GLfloat) : 0,
0,
writeable,
&is_writeable);
 
inputs->TexCoord[unit].data = (GLfloat (*)[4]) tmp->Ptr;
inputs->TexCoord[unit].start = (GLfloat *) tmp->Ptr;
inputs->TexCoord[unit].stride = tmp->StrideB;
inputs->TexCoord[unit].size = tmp->Size;
inputs->TexCoord[unit].flags &= ~(VEC_BAD_STRIDE|VEC_NOT_WRITEABLE);
if (inputs->TexCoord[unit].stride != 4*sizeof(GLfloat))
inputs->TexCoord[unit].flags |= VEC_BAD_STRIDE;
if (!is_writeable)
inputs->TexCoord[unit].flags |= VEC_NOT_WRITEABLE;
}
 
 
static void _tnl_import_edgeflag( GLcontext *ctx,
GLboolean writeable,
GLboolean stride )
{
struct vertex_arrays *inputs = &TNL_CONTEXT(ctx)->array_inputs;
struct gl_client_array *tmp;
GLboolean is_writeable = 0;
 
tmp = _ac_import_edgeflag(ctx, GL_UNSIGNED_BYTE,
stride ? sizeof(GLubyte) : 0,
0,
&is_writeable);
 
inputs->EdgeFlag.data = (GLubyte *) tmp->Ptr;
inputs->EdgeFlag.start = (GLubyte *) tmp->Ptr;
inputs->EdgeFlag.stride = tmp->StrideB;
inputs->EdgeFlag.flags &= ~(VEC_BAD_STRIDE|VEC_NOT_WRITEABLE);
if (inputs->EdgeFlag.stride != sizeof(GLubyte))
inputs->EdgeFlag.flags |= VEC_BAD_STRIDE;
if (!is_writeable)
inputs->EdgeFlag.flags |= VEC_NOT_WRITEABLE;
}
 
 
 
static void _tnl_import_attrib( GLcontext *ctx,
GLuint index,
GLboolean writeable,
GLboolean stride )
{
struct vertex_arrays *inputs = &TNL_CONTEXT(ctx)->array_inputs;
struct gl_client_array *tmp;
GLboolean is_writeable = 0;
 
tmp = _ac_import_attrib(ctx, index, GL_FLOAT,
stride ? 4 * sizeof(GLfloat) : 0,
4, /* want GLfloat[4] */
writeable,
&is_writeable);
 
inputs->Attribs[index].data = (GLfloat (*)[4]) tmp->Ptr;
inputs->Attribs[index].start = (GLfloat *) tmp->Ptr;
inputs->Attribs[index].stride = tmp->StrideB;
inputs->Attribs[index].size = tmp->Size;
inputs->Attribs[index].flags &= ~(VEC_BAD_STRIDE|VEC_NOT_WRITEABLE);
if (inputs->Attribs[index].stride != 4 * sizeof(GLfloat))
inputs->Attribs[index].flags |= VEC_BAD_STRIDE;
if (!is_writeable)
inputs->Attribs[index].flags |= VEC_NOT_WRITEABLE;
}
 
 
 
/**
* Callback for VB stages that need to improve the quality of arrays
* bound to the VB. This is only necessary for client arrays which
* have not been transformed at any point in the pipeline.
* \param required - bitmask of VERT_*_BIT flags
* \param flags - bitmask of VEC_* flags (ex: VEC_NOT_WRITABLE)
*/
static void _tnl_upgrade_client_data( GLcontext *ctx,
GLuint required,
GLuint flags )
{
GLuint i;
struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
GLboolean writeable = (flags & VEC_NOT_WRITEABLE) != 0;
GLboolean stride = (flags & VEC_BAD_STRIDE) != 0;
struct vertex_arrays *inputs = &TNL_CONTEXT(ctx)->array_inputs;
GLuint ca_flags = 0;
(void) inputs;
 
if (writeable || stride) ca_flags |= CA_CLIENT_DATA;
 
if ((required & VERT_BIT_CLIP) && VB->ClipPtr == VB->ObjPtr)
required |= VERT_BIT_POS;
 
/* _tnl_print_vert_flags("_tnl_upgrade_client_data", required); */
 
if ((required & VERT_BIT_POS) && (VB->ObjPtr->flags & flags)) {
ASSERT(VB->ObjPtr == &inputs->Obj);
_tnl_import_vertex( ctx, writeable, stride );
VB->importable_data &= ~(VERT_BIT_POS|VERT_BIT_CLIP);
}
 
if ((required & VERT_BIT_NORMAL) && (VB->NormalPtr->flags & flags)) {
ASSERT(VB->NormalPtr == &inputs->Normal);
_tnl_import_normal( ctx, writeable, stride );
VB->importable_data &= ~VERT_BIT_NORMAL;
}
 
if ((required & VERT_BIT_COLOR0) && (VB->ColorPtr[0]->Flags & ca_flags)) {
ASSERT(VB->ColorPtr[0] == &inputs->Color);
_tnl_import_color( ctx, GL_FLOAT, writeable, stride );
VB->importable_data &= ~VERT_BIT_COLOR0;
}
 
if ((required & VERT_BIT_COLOR1) &&
(VB->SecondaryColorPtr[0]->Flags & ca_flags)) {
ASSERT(VB->SecondaryColorPtr[0] == &inputs->SecondaryColor);
_tnl_import_secondarycolor( ctx, GL_FLOAT, writeable, stride );
VB->importable_data &= ~VERT_BIT_COLOR1;
}
 
if ((required & VERT_BIT_FOG)
&& (VB->FogCoordPtr->flags & flags)) {
ASSERT(VB->FogCoordPtr == &inputs->FogCoord);
_tnl_import_fogcoord( ctx, writeable, stride );
VB->importable_data &= ~VERT_BIT_FOG;
}
 
if ((required & VERT_BIT_INDEX) && (VB->IndexPtr[0]->flags & flags)) {
ASSERT(VB->IndexPtr[0] == &inputs->Index);
_tnl_import_index( ctx, writeable, stride );
VB->importable_data &= ~VERT_BIT_INDEX;
}
 
if (required & VERT_BITS_TEX_ANY)
for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++)
if ((required & VERT_BIT_TEX(i)) && (VB->TexCoordPtr[i]->flags & flags)) {
ASSERT(VB->TexCoordPtr[i] == &inputs->TexCoord[i]);
_tnl_import_texcoord( ctx, i, writeable, stride );
VB->importable_data &= ~VERT_BIT_TEX(i);
}
 
/* XXX not sure what to do here for vertex program arrays */
}
 
 
 
void _tnl_vb_bind_arrays( GLcontext *ctx, GLint start, GLsizei count )
{
TNLcontext *tnl = TNL_CONTEXT(ctx);
struct vertex_buffer *VB = &tnl->vb;
GLuint inputs = tnl->pipeline.inputs;
struct vertex_arrays *tmp = &tnl->array_inputs;
 
/* _mesa_debug(ctx, "%s %d..%d // %d..%d\n", __FUNCTION__, */
/* start, count, ctx->Array.LockFirst, ctx->Array.LockCount); */
/* _tnl_print_vert_flags(" inputs", inputs); */
/* _tnl_print_vert_flags(" _Enabled", ctx->Array._Enabled); */
/* _tnl_print_vert_flags(" importable", inputs & VERT_BITS_FIXUP); */
 
VB->Count = count - start;
VB->FirstClipped = VB->Count;
VB->Elts = NULL;
VB->MaterialMask = NULL;
VB->Material = NULL;
VB->Flag = NULL;
VB->Primitive = tnl->tmp_primitive;
VB->PrimitiveLength = tnl->tmp_primitive_length;
VB->import_data = _tnl_upgrade_client_data;
VB->importable_data = inputs & VERT_BITS_FIXUP;
 
if (ctx->Array.LockCount) {
ASSERT(start == (GLint) ctx->Array.LockFirst);
ASSERT(count == (GLint) ctx->Array.LockCount);
}
 
_ac_import_range( ctx, start, count );
 
if (inputs & VERT_BIT_POS) {
_tnl_import_vertex( ctx, 0, 0 );
tmp->Obj.count = VB->Count;
VB->ObjPtr = &tmp->Obj;
}
 
if (inputs & VERT_BIT_NORMAL) {
_tnl_import_normal( ctx, 0, 0 );
tmp->Normal.count = VB->Count;
VB->NormalPtr = &tmp->Normal;
}
 
if (inputs & VERT_BIT_COLOR0) {
_tnl_import_color( ctx, 0, 0, 0 );
VB->ColorPtr[0] = &tmp->Color;
VB->ColorPtr[1] = 0;
}
 
if (inputs & VERT_BITS_TEX_ANY) {
GLuint unit;
for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
if (inputs & VERT_BIT_TEX(unit)) {
_tnl_import_texcoord( ctx, unit, GL_FALSE, GL_FALSE );
tmp->TexCoord[unit].count = VB->Count;
VB->TexCoordPtr[unit] = &tmp->TexCoord[unit];
}
}
}
 
if (inputs & (VERT_BIT_INDEX | VERT_BIT_FOG |
VERT_BIT_EDGEFLAG | VERT_BIT_COLOR1)) {
if (inputs & VERT_BIT_INDEX) {
_tnl_import_index( ctx, 0, 0 );
tmp->Index.count = VB->Count;
VB->IndexPtr[0] = &tmp->Index;
VB->IndexPtr[1] = 0;
}
 
if (inputs & VERT_BIT_FOG) {
_tnl_import_fogcoord( ctx, 0, 0 );
tmp->FogCoord.count = VB->Count;
VB->FogCoordPtr = &tmp->FogCoord;
}
 
if (inputs & VERT_BIT_EDGEFLAG) {
_tnl_import_edgeflag( ctx, GL_TRUE, sizeof(GLboolean) );
VB->EdgeFlag = (GLboolean *) tmp->EdgeFlag.data;
}
 
if (inputs & VERT_BIT_COLOR1) {
_tnl_import_secondarycolor( ctx, 0, 0, 0 );
VB->SecondaryColorPtr[0] = &tmp->SecondaryColor;
VB->SecondaryColorPtr[1] = 0;
}
}
 
/* XXX not 100% sure this is finished. Keith should probably inspect. */
if (ctx->VertexProgram.Enabled) {
GLuint index;
for (index = 0; index < VERT_ATTRIB_MAX; index++) {
/* XXX check program->InputsRead to reduce work here */
_tnl_import_attrib( ctx, index, GL_FALSE, GL_TRUE );
VB->AttribPtr[index] = &tmp->Attribs[index];
}
}
}
/shark/trunk/ports/mesa/src/tnl/t_array_import.h
0,0 → 1,37
/* $Id: t_array_import.h,v 1.1 2003-02-28 11:48:06 pj Exp $ */
 
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
 
#ifndef _T_ARRAY_IMPORT_H
#define _T_ARRAY_IMPORT_H
 
#include "mtypes.h"
#include "t_context.h"
 
extern void _tnl_vb_bind_arrays( GLcontext *ctx, GLint start, GLsizei count );
 
extern void _tnl_array_import_init( GLcontext *ctx );
 
#endif
/shark/trunk/ports/mesa/src/tnl/t_vb_texgen.c
0,0 → 1,692
/* $Id: t_vb_texgen.c,v 1.1 2003-02-28 11:48:08 pj Exp $ */
 
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Brian Paul
* Keith Whitwell <keith@tungstengraphics.com>
*/
 
 
#include "glheader.h"
#include "colormac.h"
#include "context.h"
#include "macros.h"
#include "mmath.h"
#include "imports.h"
#include "mtypes.h"
 
#include "math/m_xform.h"
 
#include "t_context.h"
#include "t_pipeline.h"
 
 
/***********************************************************************
* Automatic texture coordinate generation (texgen) code.
*/
 
 
struct texgen_stage_data;
 
typedef void (*texgen_func)( GLcontext *ctx,
struct texgen_stage_data *store,
GLuint unit);
 
 
struct texgen_stage_data {
 
/* Per-texunit derived state.
*/
GLuint TexgenSize[MAX_TEXTURE_UNITS];
GLuint TexgenHoles[MAX_TEXTURE_UNITS];
texgen_func TexgenFunc[MAX_TEXTURE_UNITS];
 
/* Temporary values used in texgen.
*/
GLfloat (*tmp_f)[3];
GLfloat *tmp_m;
 
/* Buffered outputs of the stage.
*/
GLvector4f texcoord[MAX_TEXTURE_UNITS];
};
 
 
#define TEXGEN_STAGE_DATA(stage) ((struct texgen_stage_data *)stage->privatePtr)
 
 
 
static GLuint all_bits[5] = {
0,
VEC_SIZE_1,
VEC_SIZE_2,
VEC_SIZE_3,
VEC_SIZE_4,
};
 
#define VEC_SIZE_FLAGS (VEC_SIZE_1|VEC_SIZE_2|VEC_SIZE_3|VEC_SIZE_4)
 
#define TEXGEN_NEED_M (TEXGEN_SPHERE_MAP)
#define TEXGEN_NEED_F (TEXGEN_SPHERE_MAP | \
TEXGEN_REFLECTION_MAP_NV)
 
 
 
static void build_m3( GLfloat f[][3], GLfloat m[],
const GLvector4f *normal,
const GLvector4f *eye )
{
GLuint stride = eye->stride;
GLfloat *coord = (GLfloat *)eye->start;
GLuint count = eye->count;
const GLfloat *norm = normal->start;
GLuint i;
 
for (i=0;i<count;i++,STRIDE_F(coord,stride),STRIDE_F(norm,normal->stride)) {
GLfloat u[3], two_nu, fx, fy, fz;
COPY_3V( u, coord );
NORMALIZE_3FV( u );
two_nu = 2.0F * DOT3(norm,u);
fx = f[i][0] = u[0] - norm[0] * two_nu;
fy = f[i][1] = u[1] - norm[1] * two_nu;
fz = f[i][2] = u[2] - norm[2] * two_nu;
m[i] = fx * fx + fy * fy + (fz + 1.0F) * (fz + 1.0F);
if (m[i] != 0.0F) {
m[i] = 0.5F / (GLfloat) GL_SQRT(m[i]);
}
}
}
 
 
 
static void build_m2( GLfloat f[][3], GLfloat m[],
const GLvector4f *normal,
const GLvector4f *eye )
{
GLuint stride = eye->stride;
GLfloat *coord = eye->start;
GLuint count = eye->count;
 
GLfloat *norm = normal->start;
GLuint i;
 
for (i=0;i<count;i++,STRIDE_F(coord,stride),STRIDE_F(norm,normal->stride)) {
GLfloat u[3], two_nu, fx, fy, fz;
COPY_2V( u, coord );
u[2] = 0;
NORMALIZE_3FV( u );
two_nu = 2.0F * DOT3(norm,u);
fx = f[i][0] = u[0] - norm[0] * two_nu;
fy = f[i][1] = u[1] - norm[1] * two_nu;
fz = f[i][2] = u[2] - norm[2] * two_nu;
m[i] = fx * fx + fy * fy + (fz + 1.0F) * (fz + 1.0F);
if (m[i] != 0.0F) {
m[i] = 0.5F / (GLfloat) GL_SQRT(m[i]);
}
}
}
 
 
 
typedef void (*build_m_func)( GLfloat f[][3],
GLfloat m[],
const GLvector4f *normal,
const GLvector4f *eye );
 
 
static build_m_func build_m_tab[5] = {
0,
0,
build_m2,
build_m3,
build_m3
};
 
 
/* This is unusual in that we respect the stride of the output vector
* (f). This allows us to pass in either a texcoord vector4f, or a
* temporary vector3f.
*/
static void build_f3( GLfloat *f,
GLuint fstride,
const GLvector4f *normal,
const GLvector4f *eye )
{
GLuint stride = eye->stride;
GLfloat *coord = eye->start;
GLuint count = eye->count;
 
GLfloat *norm = normal->start;
GLuint i;
 
for (i=0;i<count;i++) {
GLfloat u[3], two_nu;
COPY_3V( u, coord );
NORMALIZE_3FV( u );
two_nu = 2.0F * DOT3(norm,u);
f[0] = u[0] - norm[0] * two_nu;
f[1] = u[1] - norm[1] * two_nu;
f[2] = u[2] - norm[2] * two_nu;
STRIDE_F(coord,stride);
STRIDE_F(f,fstride);
STRIDE_F(norm, normal->stride);
}
}
 
 
static void build_f2( GLfloat *f,
GLuint fstride,
const GLvector4f *normal,
const GLvector4f *eye )
{
GLuint stride = eye->stride;
GLfloat *coord = eye->start;
GLuint count = eye->count;
GLfloat *norm = normal->start;
GLuint i;
 
for (i=0;i<count;i++) {
 
GLfloat u[3], two_nu;
COPY_2V( u, coord );
u[2] = 0;
NORMALIZE_3FV( u );
two_nu = 2.0F * DOT3(norm,u);
f[0] = u[0] - norm[0] * two_nu;
f[1] = u[1] - norm[1] * two_nu;
f[2] = u[2] - norm[2] * two_nu;
 
STRIDE_F(coord,stride);
STRIDE_F(f,fstride);
STRIDE_F(norm, normal->stride);
}
}
 
typedef void (*build_f_func)( GLfloat *f,
GLuint fstride,
const GLvector4f *normal_vec,
const GLvector4f *eye );
 
 
 
/* Just treat 4-vectors as 3-vectors.
*/
static build_f_func build_f_tab[5] = {
0,
0,
build_f2,
build_f3,
build_f3
};
 
 
/* Special case texgen functions.
*/
static void texgen_reflection_map_nv( GLcontext *ctx,
struct texgen_stage_data *store,
GLuint unit )
{
struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
GLvector4f *in = VB->TexCoordPtr[unit];
GLvector4f *out = &store->texcoord[unit];
 
build_f_tab[VB->EyePtr->size]( out->start,
out->stride,
VB->NormalPtr,
VB->EyePtr );
 
if (in) {
out->flags |= (in->flags & VEC_SIZE_FLAGS) | VEC_SIZE_3;
out->count = in->count;
out->size = MAX2(in->size, 3);
if (in->size == 4)
_mesa_copy_tab[0x8]( out, in );
}
else {
out->flags |= VEC_SIZE_3;
out->size = 3;
out->count = in->count;
}
 
}
 
 
 
static void texgen_normal_map_nv( GLcontext *ctx,
struct texgen_stage_data *store,
GLuint unit )
{
struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
GLvector4f *in = VB->TexCoordPtr[unit];
GLvector4f *out = &store->texcoord[unit];
GLvector4f *normal = VB->NormalPtr;
GLfloat (*texcoord)[4] = (GLfloat (*)[4])out->start;
GLuint count = VB->Count;
GLuint i;
const GLfloat *norm = normal->start;
 
for (i=0;i<count;i++, STRIDE_F(norm, normal->stride)) {
texcoord[i][0] = norm[0];
texcoord[i][1] = norm[1];
texcoord[i][2] = norm[2];
}
 
 
if (in) {
out->flags |= (in->flags & VEC_SIZE_FLAGS) | VEC_SIZE_3;
out->count = in->count;
out->size = MAX2(in->size, 3);
if (in->size == 4)
_mesa_copy_tab[0x8]( out, in );
}
else {
out->flags |= VEC_SIZE_3;
out->size = 3;
out->count = in->count;
}
}
 
 
static void texgen_sphere_map( GLcontext *ctx,
struct texgen_stage_data *store,
GLuint unit )
{
struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
GLvector4f *in = VB->TexCoordPtr[unit];
GLvector4f *out = &store->texcoord[unit];
GLfloat (*texcoord)[4] = (GLfloat (*)[4]) out->start;
GLuint count = VB->Count;
GLuint i;
GLfloat (*f)[3] = store->tmp_f;
GLfloat *m = store->tmp_m;
 
/* _mesa_debug(NULL, "%s normstride %d eyestride %d\n", */
/* __FUNCTION__, VB->NormalPtr->stride, */
/* VB->EyePtr->stride); */
 
(build_m_tab[VB->EyePtr->size])( store->tmp_f,
store->tmp_m,
VB->NormalPtr,
VB->EyePtr );
 
for (i=0;i<count;i++) {
texcoord[i][0] = f[i][0] * m[i] + 0.5F;
texcoord[i][1] = f[i][1] * m[i] + 0.5F;
}
 
if (in) {
out->size = MAX2(in->size,2);
out->count = in->count;
out->flags |= (in->flags & VEC_SIZE_FLAGS) | VEC_SIZE_2;
if (in->size > 2)
_mesa_copy_tab[all_bits[in->size] & ~0x3]( out, in );
} else {
out->size = 2;
out->flags |= VEC_SIZE_2;
out->count = in->count;
}
}
 
 
 
static void texgen( GLcontext *ctx,
struct texgen_stage_data *store,
GLuint unit )
{
TNLcontext *tnl = TNL_CONTEXT(ctx);
struct vertex_buffer *VB = &tnl->vb;
GLvector4f *in = VB->TexCoordPtr[unit];
GLvector4f *out = &store->texcoord[unit];
struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
const GLvector4f *obj = VB->ObjPtr;
const GLvector4f *eye = VB->EyePtr;
const GLvector4f *normal = VB->NormalPtr;
GLfloat (*texcoord)[4] = (GLfloat (*)[4])out->data;
GLfloat *indata;
GLuint count = VB->Count;
GLfloat (*f)[3] = store->tmp_f;
GLfloat *m = store->tmp_m;
GLuint holes = 0;
 
 
if (texUnit->_GenFlags & TEXGEN_NEED_M) {
build_m_tab[in->size]( store->tmp_f, store->tmp_m, normal, eye );
} else if (texUnit->_GenFlags & TEXGEN_NEED_F) {
build_f_tab[in->size]( (GLfloat *)store->tmp_f, 3, normal, eye );
}
 
if (!in) {
ASSERT(0);
in = out;
in->count = VB->Count;
 
out->size = store->TexgenSize[unit];
out->flags |= texUnit->TexGenEnabled;
out->count = VB->Count;
holes = store->TexgenHoles[unit];
}
else {
GLuint copy = (all_bits[in->size] & ~texUnit->TexGenEnabled);
if (copy)
_mesa_copy_tab[copy]( out, in );
 
out->size = MAX2(in->size, store->TexgenSize[unit]);
out->flags |= (in->flags & VEC_SIZE_FLAGS) | texUnit->TexGenEnabled;
out->count = in->count;
 
holes = ~all_bits[in->size] & store->TexgenHoles[unit];
}
 
if (holes) {
if (holes & VEC_DIRTY_2) _mesa_vector4f_clean_elem(out, count, 2);
if (holes & VEC_DIRTY_1) _mesa_vector4f_clean_elem(out, count, 1);
if (holes & VEC_DIRTY_0) _mesa_vector4f_clean_elem(out, count, 0);
}
 
if (texUnit->TexGenEnabled & S_BIT) {
GLuint i;
switch (texUnit->GenModeS) {
case GL_OBJECT_LINEAR:
_mesa_dotprod_tab[obj->size]( (GLfloat *)out->data,
sizeof(out->data[0]), obj,
texUnit->ObjectPlaneS );
break;
case GL_EYE_LINEAR:
_mesa_dotprod_tab[eye->size]( (GLfloat *)out->data,
sizeof(out->data[0]), eye,
texUnit->EyePlaneS );
break;
case GL_SPHERE_MAP:
for (indata=in->start,i=0 ; i<count ;i++, STRIDE_F(indata,in->stride))
texcoord[i][0] = indata[0] * m[i] + 0.5F;
break;
case GL_REFLECTION_MAP_NV:
for (i=0;i<count;i++)
texcoord[i][0] = f[i][0];
break;
case GL_NORMAL_MAP_NV: {
const GLfloat *norm = normal->start;
for (i=0;i<count;i++, STRIDE_F(norm, normal->stride)) {
texcoord[i][0] = norm[0];
}
break;
}
default:
_mesa_problem(ctx, "Bad S texgen");
}
}
 
if (texUnit->TexGenEnabled & T_BIT) {
GLuint i;
switch (texUnit->GenModeT) {
case GL_OBJECT_LINEAR:
_mesa_dotprod_tab[obj->size]( &(out->data[0][1]),
sizeof(out->data[0]), obj,
texUnit->ObjectPlaneT );
break;
case GL_EYE_LINEAR:
_mesa_dotprod_tab[eye->size]( &(out->data[0][1]),
sizeof(out->data[0]), eye,
texUnit->EyePlaneT );
break;
case GL_SPHERE_MAP:
for (indata=in->start,i=0; i<count ;i++,STRIDE_F(indata,in->stride))
texcoord[i][1] = indata[1] * m[i] + 0.5F;
break;
case GL_REFLECTION_MAP_NV:
for (i=0;i<count;i++)
texcoord[i][0] = f[i][0];
break;
case GL_NORMAL_MAP_NV: {
const GLfloat *norm = normal->start;
for (i=0;i<count;i++, STRIDE_F(norm, normal->stride)) {
texcoord[i][1] = norm[1];
}
break;
}
default:
_mesa_problem(ctx, "Bad T texgen");
}
}
 
if (texUnit->TexGenEnabled & R_BIT) {
GLuint i;
switch (texUnit->GenModeR) {
case GL_OBJECT_LINEAR:
_mesa_dotprod_tab[obj->size]( &(out->data[0][2]),
sizeof(out->data[0]), obj,
texUnit->ObjectPlaneR );
break;
case GL_EYE_LINEAR:
_mesa_dotprod_tab[eye->size]( &(out->data[0][2]),
sizeof(out->data[0]), eye,
texUnit->EyePlaneR );
break;
case GL_REFLECTION_MAP_NV:
for (i=0;i<count;i++)
texcoord[i][2] = f[i][2];
break;
case GL_NORMAL_MAP_NV: {
const GLfloat *norm = normal->start;
for (i=0;i<count;i++,STRIDE_F(norm, normal->stride)) {
texcoord[i][2] = norm[2];
}
break;
}
default:
_mesa_problem(ctx, "Bad R texgen");
}
}
 
if (texUnit->TexGenEnabled & Q_BIT) {
switch (texUnit->GenModeQ) {
case GL_OBJECT_LINEAR:
_mesa_dotprod_tab[obj->size]( &(out->data[0][3]),
sizeof(out->data[0]), obj,
texUnit->ObjectPlaneQ );
break;
case GL_EYE_LINEAR:
_mesa_dotprod_tab[eye->size]( &(out->data[0][3]),
sizeof(out->data[0]), eye,
texUnit->EyePlaneQ );
break;
default:
_mesa_problem(ctx, "Bad Q texgen");
}
}
}
 
 
 
static GLboolean run_texgen_stage( GLcontext *ctx,
struct gl_pipeline_stage *stage )
{
struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
struct texgen_stage_data *store = TEXGEN_STAGE_DATA( stage );
GLuint i;
 
for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++)
if (ctx->Texture._TexGenEnabled & ENABLE_TEXGEN(i)) {
if (stage->changed_inputs & (VERT_BIT_EYE | VERT_BIT_NORMAL | VERT_BIT_TEX(i)))
store->TexgenFunc[i]( ctx, store, i );
 
VB->TexCoordPtr[i] = &store->texcoord[i];
}
 
return GL_TRUE;
}
 
 
 
 
static GLboolean run_validate_texgen_stage( GLcontext *ctx,
struct gl_pipeline_stage *stage )
{
struct texgen_stage_data *store = TEXGEN_STAGE_DATA(stage);
GLuint i;
 
for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++) {
struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
 
if (texUnit->TexGenEnabled) {
GLuint sz;
 
if (texUnit->TexGenEnabled & R_BIT)
sz = 4;
else if (texUnit->TexGenEnabled & Q_BIT)
sz = 3;
else if (texUnit->TexGenEnabled & T_BIT)
sz = 2;
else
sz = 1;
 
store->TexgenSize[i] = sz;
store->TexgenHoles[i] = (all_bits[sz] & ~texUnit->TexGenEnabled);
store->TexgenFunc[i] = texgen;
 
if (texUnit->TexGenEnabled == (S_BIT|T_BIT|R_BIT)) {
if (texUnit->_GenFlags == TEXGEN_REFLECTION_MAP_NV) {
store->TexgenFunc[i] = texgen_reflection_map_nv;
}
else if (texUnit->_GenFlags == TEXGEN_NORMAL_MAP_NV) {
store->TexgenFunc[i] = texgen_normal_map_nv;
}
}
else if (texUnit->TexGenEnabled == (S_BIT|T_BIT) &&
texUnit->_GenFlags == TEXGEN_SPHERE_MAP) {
store->TexgenFunc[i] = texgen_sphere_map;
}
}
}
 
stage->run = run_texgen_stage;
return stage->run( ctx, stage );
}
 
 
static void check_texgen( GLcontext *ctx, struct gl_pipeline_stage *stage )
{
GLuint i;
stage->active = 0;
 
if (ctx->Texture._TexGenEnabled && !ctx->VertexProgram.Enabled) {
GLuint inputs = 0;
GLuint outputs = 0;
 
if (ctx->Texture._GenFlags & TEXGEN_OBJ_LINEAR)
inputs |= VERT_BIT_POS;
 
if (ctx->Texture._GenFlags & TEXGEN_NEED_EYE_COORD)
inputs |= VERT_BIT_EYE;
 
if (ctx->Texture._GenFlags & TEXGEN_NEED_NORMALS)
inputs |= VERT_BIT_NORMAL;
 
for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++)
if (ctx->Texture._TexGenEnabled & ENABLE_TEXGEN(i))
{
outputs |= VERT_BIT_TEX(i);
 
/* Need the original input in case it contains a Q coord:
* (sigh)
*/
inputs |= VERT_BIT_TEX(i);
 
/* Something for Feedback? */
}
 
if (stage->privatePtr)
stage->run = run_validate_texgen_stage;
stage->active = 1;
stage->inputs = inputs;
stage->outputs = outputs;
}
}
 
 
 
 
/* Called the first time stage->run() is invoked.
*/
static GLboolean alloc_texgen_data( GLcontext *ctx,
struct gl_pipeline_stage *stage )
{
struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
struct texgen_stage_data *store;
GLuint i;
 
stage->privatePtr = CALLOC(sizeof(*store));
store = TEXGEN_STAGE_DATA(stage);
if (!store)
return GL_FALSE;
 
for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++)
_mesa_vector4f_alloc( &store->texcoord[i], 0, VB->Size, 32 );
 
store->tmp_f = (GLfloat (*)[3]) MALLOC(VB->Size * sizeof(GLfloat) * 3);
store->tmp_m = (GLfloat *) MALLOC(VB->Size * sizeof(GLfloat));
 
/* Now validate and run the stage.
*/
stage->run = run_validate_texgen_stage;
return stage->run( ctx, stage );
}
 
 
static void free_texgen_data( struct gl_pipeline_stage *stage )
 
{
struct texgen_stage_data *store = TEXGEN_STAGE_DATA(stage);
GLuint i;
 
if (store) {
for (i = 0 ; i < MAX_TEXTURE_UNITS ; i++)
if (store->texcoord[i].data)
_mesa_vector4f_free( &store->texcoord[i] );
 
 
if (store->tmp_f) FREE( store->tmp_f );
if (store->tmp_m) FREE( store->tmp_m );
FREE( store );
stage->privatePtr = NULL;
}
}
 
 
 
const struct gl_pipeline_stage _tnl_texgen_stage =
{
"texgen", /* name */
_NEW_TEXTURE, /* when to call check() */
_NEW_TEXTURE, /* when to invalidate stored data */
GL_FALSE, /* active? */
0, /* inputs */
0, /* outputs */
0, /* changed_inputs */
NULL, /* private data */
free_texgen_data, /* destructor */
check_texgen, /* check */
alloc_texgen_data /* run -- initially set to alloc data */
};
/shark/trunk/ports/mesa/src/tnl/t_imm_exec.c
1,4 → 1,4
/* $Id: t_imm_exec.c,v 1.2 2003-04-24 14:22:20 giacomo Exp $ */
/* $Id: t_imm_exec.c,v 1.1 2003-02-28 11:48:07 pj Exp $ */
 
/*
* Mesa 3-D graphics library
46,7 → 46,7
#include "math/m_xform.h"
 
#include "t_context.h"
#include "t_import_array.h"
#include "t_array_import.h"
#include "t_imm_alloc.h"
#include "t_imm_api.h"
#include "t_imm_debug.h"
/shark/trunk/ports/mesa/src/tnl/t_array_api.c
1,4 → 1,4
/* $Id: t_array_api.c,v 1.2 2003-04-24 14:22:20 giacomo Exp $ */
/* $Id: t_array_api.c,v 1.1 2003-02-28 11:48:06 pj Exp $ */
 
/*
* Mesa 3-D graphics library
42,7 → 42,7
#include "array_cache/acache.h"
 
#include "t_array_api.h"
#include "t_import_array.h"
#include "t_array_import.h"
#include "t_imm_api.h"
#include "t_imm_exec.h"
#include "t_context.h"
/shark/trunk/ports/mesa/src/swrast/s_aatempline.h
File deleted
/shark/trunk/ports/mesa/src/swrast/s_aaline.c
1,4 → 1,4
/* $Id: s_aaline.c,v 1.2 2003-04-24 14:22:20 giacomo Exp $ */
/* $Id: s_aaline.c,v 1.1 2003-02-28 11:49:40 pj Exp $ */
 
/*
* Mesa 3-D graphics library
468,7 → 468,7
#define DO_Z
#define DO_FOG
#define DO_INDEX
#include "s_aatempline.h"
#include "s_aalinetemp.h"
 
 
#define NAME(x) aa_rgba_##x
475,7 → 475,7
#define DO_Z
#define DO_FOG
#define DO_RGBA
#include "s_aatempline.h"
#include "s_aalinetemp.h"
 
 
#define NAME(x) aa_tex_rgba_##x
483,7 → 483,7
#define DO_FOG
#define DO_RGBA
#define DO_TEX
#include "s_aatempline.h"
#include "s_aalinetemp.h"
 
 
#define NAME(x) aa_multitex_rgba_##x
491,7 → 491,7
#define DO_FOG
#define DO_RGBA
#define DO_MULTITEX
#include "s_aatempline.h"
#include "s_aalinetemp.h"
 
 
#define NAME(x) aa_multitex_spec_##x
500,7 → 500,7
#define DO_RGBA
#define DO_MULTITEX
#define DO_SPEC
#include "s_aatempline.h"
#include "s_aalinetemp.h"
 
 
 
/shark/trunk/ports/mesa/src/swrast/s_aalinetemp.h
0,0 → 1,315
/* $Id: s_aalinetemp.h,v 1.1 2003-02-28 11:49:40 pj Exp $ */
 
/*
* Mesa 3-D graphics library
* Version: 4.1
*
* Copyright (C) 1999-2002 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
 
 
/*
* Antialiased line template.
*/
 
 
/*
* Function to render each fragment in the AA line.
*/
static void
NAME(plot)(GLcontext *ctx, struct LineInfo *line, int ix, int iy)
{
const GLfloat fx = (GLfloat) ix;
const GLfloat fy = (GLfloat) iy;
const GLfloat coverage = compute_coveragef(line, ix, iy);
const GLuint i = line->span.end;
 
if (coverage == 0.0)
return;
 
line->span.end++;
line->span.array->coverage[i] = coverage;
line->span.array->x[i] = ix;
line->span.array->y[i] = iy;
 
/*
* Compute Z, color, texture coords, fog for the fragment by
* solving the plane equations at (ix,iy).
*/
#ifdef DO_Z
line->span.array->z[i] = (GLdepth) solve_plane(fx, fy, line->zPlane);
#endif
#ifdef DO_FOG
line->span.array->fog[i] = solve_plane(fx, fy, line->fPlane);
#endif
#ifdef DO_RGBA
line->span.array->rgba[i][RCOMP] = solve_plane_chan(fx, fy, line->rPlane);
line->span.array->rgba[i][GCOMP] = solve_plane_chan(fx, fy, line->gPlane);
line->span.array->rgba[i][BCOMP] = solve_plane_chan(fx, fy, line->bPlane);
line->span.array->rgba[i][ACOMP] = solve_plane_chan(fx, fy, line->aPlane);
#endif
#ifdef DO_INDEX
line->span.array->index[i] = (GLint) solve_plane(fx, fy, line->iPlane);
#endif
#ifdef DO_SPEC
line->span.array->spec[i][RCOMP] = solve_plane_chan(fx, fy, line->srPlane);
line->span.array->spec[i][GCOMP] = solve_plane_chan(fx, fy, line->sgPlane);
line->span.array->spec[i][BCOMP] = solve_plane_chan(fx, fy, line->sbPlane);
#endif
#ifdef DO_TEX
{
const GLfloat invQ = solve_plane_recip(fx, fy, line->vPlane[0]);
line->span.array->texcoords[0][i][0] = solve_plane(fx, fy, line->sPlane[0]) * invQ;
line->span.array->texcoords[0][i][1] = solve_plane(fx, fy, line->tPlane[0]) * invQ;
line->span.array->texcoords[0][i][2] = solve_plane(fx, fy, line->uPlane[0]) * invQ;
line->span.array->lambda[0][i] = compute_lambda(line->sPlane[0], line->tPlane[0], invQ,
line->texWidth[0], line->texHeight[0]);
}
#elif defined(DO_MULTITEX)
{
GLuint unit;
for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
if (ctx->Texture.Unit[unit]._ReallyEnabled) {
const GLfloat invQ = solve_plane_recip(fx, fy, line->vPlane[unit]);
line->span.array->texcoords[unit][i][0] = solve_plane(fx, fy, line->sPlane[unit]) * invQ;
line->span.array->texcoords[unit][i][1] = solve_plane(fx, fy, line->tPlane[unit]) * invQ;
line->span.array->texcoords[unit][i][2] = solve_plane(fx, fy, line->uPlane[unit]) * invQ;
line->span.array->lambda[unit][i] = compute_lambda(line->sPlane[unit],
line->tPlane[unit], invQ,
line->texWidth[unit], line->texHeight[unit]);
}
}
}
#endif
 
if (line->span.end == MAX_WIDTH) {
#if defined(DO_TEX) || defined(DO_MULTITEX)
_mesa_write_texture_span(ctx, &(line->span));
#elif defined(DO_RGBA)
_mesa_write_rgba_span(ctx, &(line->span));
#else
_mesa_write_index_span(ctx, &(line->span));
#endif
line->span.end = 0; /* reset counter */
}
}
 
 
 
/*
* Line setup
*/
static void
NAME(line)(GLcontext *ctx, const SWvertex *v0, const SWvertex *v1)
{
SWcontext *swrast = SWRAST_CONTEXT(ctx);
GLfloat tStart, tEnd; /* segment start, end along line length */
GLboolean inSegment;
GLint iLen, i;
 
/* Init the LineInfo struct */
struct LineInfo line;
line.x0 = v0->win[0];
line.y0 = v0->win[1];
line.x1 = v1->win[0];
line.y1 = v1->win[1];
line.dx = line.x1 - line.x0;
line.dy = line.y1 - line.y0;
line.len = (GLfloat) sqrt(line.dx * line.dx + line.dy * line.dy);
line.halfWidth = 0.5F * ctx->Line.Width;
 
if (line.len == 0.0 || IS_INF_OR_NAN(line.len))
return;
 
INIT_SPAN(line.span, GL_LINE, 0, 0, SPAN_XY | SPAN_COVERAGE);
 
line.xAdj = line.dx / line.len * line.halfWidth;
line.yAdj = line.dy / line.len * line.halfWidth;
 
#ifdef DO_Z
line.span.arrayMask |= SPAN_Z;
compute_plane(line.x0, line.y0, line.x1, line.y1,
v0->win[2], v1->win[2], line.zPlane);
#endif
#ifdef DO_FOG
line.span.arrayMask |= SPAN_FOG;
compute_plane(line.x0, line.y0, line.x1, line.y1,
v0->fog, v1->fog, line.fPlane);
#endif
#ifdef DO_RGBA
line.span.arrayMask |= SPAN_RGBA;
if (ctx->Light.ShadeModel == GL_SMOOTH) {
compute_plane(line.x0, line.y0, line.x1, line.y1,
v0->color[RCOMP], v1->color[RCOMP], line.rPlane);
compute_plane(line.x0, line.y0, line.x1, line.y1,
v0->color[GCOMP], v1->color[GCOMP], line.gPlane);
compute_plane(line.x0, line.y0, line.x1, line.y1,
v0->color[BCOMP], v1->color[BCOMP], line.bPlane);
compute_plane(line.x0, line.y0, line.x1, line.y1,
v0->color[ACOMP], v1->color[ACOMP], line.aPlane);
}
else {
constant_plane(v1->color[RCOMP], line.rPlane);
constant_plane(v1->color[GCOMP], line.gPlane);
constant_plane(v1->color[BCOMP], line.bPlane);
constant_plane(v1->color[ACOMP], line.aPlane);
}
#endif
#ifdef DO_SPEC
line.span.arrayMask |= SPAN_SPEC;
if (ctx->Light.ShadeModel == GL_SMOOTH) {
compute_plane(line.x0, line.y0, line.x1, line.y1,
v0->specular[RCOMP], v1->specular[RCOMP], line.srPlane);
compute_plane(line.x0, line.y0, line.x1, line.y1,
v0->specular[GCOMP], v1->specular[GCOMP], line.sgPlane);
compute_plane(line.x0, line.y0, line.x1, line.y1,
v0->specular[BCOMP], v1->specular[BCOMP], line.sbPlane);
}
else {
constant_plane(v1->specular[RCOMP], line.srPlane);
constant_plane(v1->specular[GCOMP], line.sgPlane);
constant_plane(v1->specular[BCOMP], line.sbPlane);
}
#endif
#ifdef DO_INDEX
line.span.arrayMask |= SPAN_INDEX;
if (ctx->Light.ShadeModel == GL_SMOOTH) {
compute_plane(line.x0, line.y0, line.x1, line.y1,
(GLfloat) v0->index, (GLfloat) v1->index, line.iPlane);
}
else {
constant_plane((GLfloat) v1->index, line.iPlane);
}
#endif
#ifdef DO_TEX
{
const struct gl_texture_object *obj = ctx->Texture.Unit[0]._Current;
const struct gl_texture_image *texImage = obj->Image[obj->BaseLevel];
const GLfloat invW0 = v0->win[3];
const GLfloat invW1 = v1->win[3];
const GLfloat s0 = v0->texcoord[0][0] * invW0;
const GLfloat s1 = v1->texcoord[0][0] * invW1;
const GLfloat t0 = v0->texcoord[0][1] * invW0;
const GLfloat t1 = v1->texcoord[0][1] * invW0;
const GLfloat r0 = v0->texcoord[0][2] * invW0;
const GLfloat r1 = v1->texcoord[0][2] * invW0;
const GLfloat q0 = v0->texcoord[0][3] * invW0;
const GLfloat q1 = v1->texcoord[0][3] * invW0;
line.span.arrayMask |= (SPAN_TEXTURE | SPAN_LAMBDA);
compute_plane(line.x0, line.y0, line.x1, line.y1, s0, s1, line.sPlane[0]);
compute_plane(line.x0, line.y0, line.x1, line.y1, t0, t1, line.tPlane[0]);
compute_plane(line.x0, line.y0, line.x1, line.y1, r0, r1, line.uPlane[0]);
compute_plane(line.x0, line.y0, line.x1, line.y1, q0, q1, line.vPlane[0]);
line.texWidth[0] = (GLfloat) texImage->Width;
line.texHeight[0] = (GLfloat) texImage->Height;
}
#elif defined(DO_MULTITEX)
{
GLuint u;
line.span.arrayMask |= (SPAN_TEXTURE | SPAN_LAMBDA);
for (u = 0; u < ctx->Const.MaxTextureUnits; u++) {
if (ctx->Texture.Unit[u]._ReallyEnabled) {
const struct gl_texture_object *obj = ctx->Texture.Unit[u]._Current;
const struct gl_texture_image *texImage = obj->Image[obj->BaseLevel];
const GLfloat invW0 = v0->win[3];
const GLfloat invW1 = v1->win[3];
const GLfloat s0 = v0->texcoord[u][0] * invW0;
const GLfloat s1 = v1->texcoord[u][0] * invW1;
const GLfloat t0 = v0->texcoord[u][1] * invW0;
const GLfloat t1 = v1->texcoord[u][1] * invW0;
const GLfloat r0 = v0->texcoord[u][2] * invW0;
const GLfloat r1 = v1->texcoord[u][2] * invW0;
const GLfloat q0 = v0->texcoord[u][3] * invW0;
const GLfloat q1 = v1->texcoord[u][3] * invW0;
compute_plane(line.x0, line.y0, line.x1, line.y1, s0, s1, line.sPlane[u]);
compute_plane(line.x0, line.y0, line.x1, line.y1, t0, t1, line.tPlane[u]);
compute_plane(line.x0, line.y0, line.x1, line.y1, r0, r1, line.uPlane[u]);
compute_plane(line.x0, line.y0, line.x1, line.y1, q0, q1, line.vPlane[u]);
line.texWidth[u] = (GLfloat) texImage->Width;
line.texHeight[u] = (GLfloat) texImage->Height;
}
}
}
#endif
 
tStart = tEnd = 0.0;
inSegment = GL_FALSE;
iLen = (GLint) line.len;
 
if (ctx->Line.StippleFlag) {
for (i = 0; i < iLen; i++) {
const GLuint bit = (swrast->StippleCounter / ctx->Line.StippleFactor) & 0xf;
if ((1 << bit) & ctx->Line.StipplePattern) {
/* stipple bit is on */
const GLfloat t = (GLfloat) i / (GLfloat) line.len;
if (!inSegment) {
/* start new segment */
inSegment = GL_TRUE;
tStart = t;
}
else {
/* still in the segment, extend it */
tEnd = t;
}
}
else {
/* stipple bit is off */
if (inSegment && (tEnd > tStart)) {
/* draw the segment */
segment(ctx, &line, NAME(plot), tStart, tEnd);
inSegment = GL_FALSE;
}
else {
/* still between segments, do nothing */
}
}
swrast->StippleCounter++;
}
 
if (inSegment) {
/* draw the final segment of the line */
segment(ctx, &line, NAME(plot), tStart, 1.0F);
}
}
else {
/* non-stippled */
segment(ctx, &line, NAME(plot), 0.0, 1.0);
}
 
#if defined(DO_TEX) || defined(DO_MULTITEX)
_mesa_write_texture_span(ctx, &(line.span));
#elif defined(DO_RGBA)
_mesa_write_rgba_span(ctx, &(line.span));
#else
_mesa_write_index_span(ctx, &(line.span));
#endif
}
 
 
 
 
#undef DO_Z
#undef DO_FOG
#undef DO_RGBA
#undef DO_INDEX
#undef DO_SPEC
#undef DO_TEX
#undef DO_MULTITEX
#undef NAME
/shark/trunk/ports/mesa/src/texformat_tmp.h
0,0 → 1,461
/* $Id: texformat_tmp.h,v 1.1 2003-02-28 11:42:05 pj Exp $ */
 
/*
* Mesa 3-D graphics library
* Version: 4.1
*
* Copyright (C) 1999-2002 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Gareth Hughes
* Brian Paul
*/
 
 
/*
* This template file generates texel fetch functions for 1-D, 2-D and 3-D
* texture images.
*/
 
 
#if DIM == 1
 
#define CHAN_SRC( t, i, j, k, sz ) \
((GLchan *)(t)->Data + (i) * (sz))
#define UBYTE_SRC( t, i, j, k, sz ) \
((GLubyte *)(t)->Data + (i) * (sz))
#define USHORT_SRC( t, i, j, k ) \
((GLushort *)(t)->Data + (i))
#define FLOAT_SRC( t, i, j, k ) \
((GLfloat *)(t)->Data + (i))
 
#define FETCH(x) fetch_1d_texel_##x
 
#elif DIM == 2
 
#define CHAN_SRC( t, i, j, k, sz ) \
((GLchan *)(t)->Data + ((t)->RowStride * (j) + (i)) * (sz))
#define UBYTE_SRC( t, i, j, k, sz ) \
((GLubyte *)(t)->Data + ((t)->RowStride * (j) + (i)) * (sz))
#define USHORT_SRC( t, i, j, k ) \
((GLushort *)(t)->Data + ((t)->RowStride * (j) + (i)))
#define FLOAT_SRC( t, i, j, k ) \
((GLfloat *)(t)->Data + ((t)->RowStride * (j) + (i)))
 
#define FETCH(x) fetch_2d_texel_##x
 
#elif DIM == 3
 
#define CHAN_SRC( t, i, j, k, sz ) \
(GLchan *)(t)->Data + (((t)->Height * (k) + (j)) * \
(t)->RowStride + (i)) * (sz)
#define UBYTE_SRC( t, i, j, k, sz ) \
((GLubyte *)(t)->Data + (((t)->Height * (k) + (j)) * \
(t)->RowStride + (i)) * (sz))
#define USHORT_SRC( t, i, j, k ) \
((GLushort *)(t)->Data + (((t)->Height * (k) + (j)) * \
(t)->RowStride + (i)))
#define FLOAT_SRC( t, i, j, k ) \
((GLfloat *)(t)->Data + (((t)->Height * (k) + (j)) * \
(t)->RowStride + (i)))
 
#define FETCH(x) fetch_3d_texel_##x
 
#else
#error illegal number of texture dimensions
#endif
 
 
static void FETCH(rgba)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLchan *src = CHAN_SRC( texImage, i, j, k, 4 );
GLchan *rgba = (GLchan *) texel;
COPY_CHAN4( rgba, src );
}
 
static void FETCH(rgb)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLchan *src = CHAN_SRC( texImage, i, j, k, 3 );
GLchan *rgba = (GLchan *) texel;
rgba[RCOMP] = src[0];
rgba[GCOMP] = src[1];
rgba[BCOMP] = src[2];
rgba[ACOMP] = CHAN_MAX;
}
 
static void FETCH(alpha)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLchan *src = CHAN_SRC( texImage, i, j, k, 1 );
GLchan *rgba = (GLchan *) texel;
rgba[RCOMP] = 0;
rgba[GCOMP] = 0;
rgba[BCOMP] = 0;
rgba[ACOMP] = src[0];
}
 
static void FETCH(luminance)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLchan *src = CHAN_SRC( texImage, i, j, k, 1 );
GLchan *rgba = (GLchan *) texel;
rgba[RCOMP] = src[0];
rgba[GCOMP] = src[0];
rgba[BCOMP] = src[0];
rgba[ACOMP] = CHAN_MAX;
}
 
static void FETCH(luminance_alpha)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLchan *src = CHAN_SRC( texImage, i, j, k, 2 );
GLchan *rgba = (GLchan *) texel;
rgba[RCOMP] = src[0];
rgba[GCOMP] = src[0];
rgba[BCOMP] = src[0];
rgba[ACOMP] = src[1];
}
 
static void FETCH(intensity)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLchan *src = CHAN_SRC( texImage, i, j, k, 1 );
GLchan *rgba = (GLchan *) texel;
rgba[RCOMP] = src[0];
rgba[GCOMP] = src[0];
rgba[BCOMP] = src[0];
rgba[ACOMP] = src[0];
}
 
static void FETCH(color_index)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLchan *src = CHAN_SRC( texImage, i, j, k, 1 );
GLchan *index = (GLchan *) texel;
*index = *src;
}
 
static void FETCH(depth_component)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLfloat *src = FLOAT_SRC( texImage, i, j, k );
GLfloat *depth = (GLfloat *) texel;
*depth = *src;
}
 
static void FETCH(rgba8888)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 4 );
GLchan *rgba = (GLchan *) texel;
rgba[RCOMP] = UBYTE_TO_CHAN( src[3] );
rgba[GCOMP] = UBYTE_TO_CHAN( src[2] );
rgba[BCOMP] = UBYTE_TO_CHAN( src[1] );
rgba[ACOMP] = UBYTE_TO_CHAN( src[0] );
}
 
static void FETCH(argb8888)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 4 );
GLchan *rgba = (GLchan *) texel;
rgba[RCOMP] = UBYTE_TO_CHAN( src[2] );
rgba[GCOMP] = UBYTE_TO_CHAN( src[1] );
rgba[BCOMP] = UBYTE_TO_CHAN( src[0] );
rgba[ACOMP] = UBYTE_TO_CHAN( src[3] );
}
 
static void FETCH(rgb888)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 3 );
GLchan *rgba = (GLchan *) texel;
rgba[RCOMP] = UBYTE_TO_CHAN( src[2] );
rgba[GCOMP] = UBYTE_TO_CHAN( src[1] );
rgba[BCOMP] = UBYTE_TO_CHAN( src[0] );
rgba[ACOMP] = CHAN_MAX;
}
 
static void FETCH(rgb565)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLushort *src = USHORT_SRC( texImage, i, j, k );
const GLushort s = *src;
GLchan *rgba = (GLchan *) texel;
rgba[RCOMP] = UBYTE_TO_CHAN( ((s >> 8) & 0xf8) * 255 / 0xf8 );
rgba[GCOMP] = UBYTE_TO_CHAN( ((s >> 3) & 0xfc) * 255 / 0xfc );
rgba[BCOMP] = UBYTE_TO_CHAN( ((s << 3) & 0xf8) * 255 / 0xf8 );
rgba[ACOMP] = CHAN_MAX;
}
 
static void FETCH(argb4444)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLushort *src = USHORT_SRC( texImage, i, j, k );
const GLushort s = *src;
GLchan *rgba = (GLchan *) texel;
rgba[RCOMP] = UBYTE_TO_CHAN( ((s >> 8) & 0xf) * 255 / 0xf );
rgba[GCOMP] = UBYTE_TO_CHAN( ((s >> 4) & 0xf) * 255 / 0xf );
rgba[BCOMP] = UBYTE_TO_CHAN( ((s ) & 0xf) * 255 / 0xf );
rgba[ACOMP] = UBYTE_TO_CHAN( ((s >> 12) & 0xf) * 255 / 0xf );
}
 
static void FETCH(argb1555)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLushort *src = USHORT_SRC( texImage, i, j, k );
const GLushort s = *src;
GLchan *rgba = (GLchan *) texel;
rgba[RCOMP] = UBYTE_TO_CHAN( ((s >> 10) & 0x1f) * 255 / 0x1f );
rgba[GCOMP] = UBYTE_TO_CHAN( ((s >> 5) & 0x1f) * 255 / 0x1f );
rgba[BCOMP] = UBYTE_TO_CHAN( ((s ) & 0x1f) * 255 / 0x1f );
rgba[ACOMP] = UBYTE_TO_CHAN( ((s >> 15) & 0x01) * 255 );
}
 
static void FETCH(al88)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 2 );
GLchan *rgba = (GLchan *) texel;
rgba[RCOMP] = UBYTE_TO_CHAN( src[0] );
rgba[GCOMP] = UBYTE_TO_CHAN( src[0] );
rgba[BCOMP] = UBYTE_TO_CHAN( src[0] );
rgba[ACOMP] = UBYTE_TO_CHAN( src[1] );
}
 
static void FETCH(rgb332)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 1 );
const GLubyte s = *src;
GLchan *rgba = (GLchan *) texel;
rgba[RCOMP] = UBYTE_TO_CHAN( ((s ) & 0xe0) * 255 / 0xe0 );
rgba[GCOMP] = UBYTE_TO_CHAN( ((s << 3) & 0xe0) * 255 / 0xe0 );
rgba[BCOMP] = UBYTE_TO_CHAN( ((s << 5) & 0xc0) * 255 / 0xc0 );
rgba[ACOMP] = CHAN_MAX;
}
 
static void FETCH(a8)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 1 );
GLchan *rgba = (GLchan *) texel;
rgba[RCOMP] = 0;
rgba[GCOMP] = 0;
rgba[BCOMP] = 0;
rgba[ACOMP] = UBYTE_TO_CHAN( src[0] );
}
 
static void FETCH(l8)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 1 );
GLchan *rgba = (GLchan *) texel;
rgba[RCOMP] = UBYTE_TO_CHAN( src[0] );
rgba[GCOMP] = UBYTE_TO_CHAN( src[0] );
rgba[BCOMP] = UBYTE_TO_CHAN( src[0] );
rgba[ACOMP] = CHAN_MAX;
}
 
static void FETCH(i8)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 1 );
GLchan *rgba = (GLchan *) texel;
rgba[RCOMP] = UBYTE_TO_CHAN( src[0] );
rgba[GCOMP] = UBYTE_TO_CHAN( src[0] );
rgba[BCOMP] = UBYTE_TO_CHAN( src[0] );
rgba[ACOMP] = UBYTE_TO_CHAN( src[0] );
}
 
static void FETCH(ci8)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 1 );
GLchan *index = (GLchan *) texel;
*index = UBYTE_TO_CHAN( *src );
}
 
/* XXX this may break if GLchan != GLubyte */
static void FETCH(ycbcr)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLushort *src0 = USHORT_SRC( texImage, (i & ~1), j, k ); /* even */
const GLushort *src1 = src0 + 1; /* odd */
const GLubyte y0 = (*src0 >> 8) & 0xff; /* luminance */
const GLubyte cb = *src0 & 0xff; /* chroma U */
const GLubyte y1 = (*src1 >> 8) & 0xff; /* luminance */
const GLubyte cr = *src1 & 0xff; /* chroma V */
GLchan *rgba = (GLchan *) texel;
GLint r, g, b;
if (i & 1) {
/* odd pixel: use y1,cr,cb */
r = (GLint) (1.164 * (y1-16) + 1.596 * (cr-128));
g = (GLint) (1.164 * (y1-16) - 0.813 * (cr-128) - 0.391 * (cb-128));
b = (GLint) (1.164 * (y1-16) + 2.018 * (cb-128));
}
else {
/* even pixel: use y0,cr,cb */
r = (GLint) (1.164 * (y0-16) + 1.596 * (cr-128));
g = (GLint) (1.164 * (y0-16) - 0.813 * (cr-128) - 0.391 * (cb-128));
b = (GLint) (1.164 * (y0-16) + 2.018 * (cb-128));
}
rgba[RCOMP] = CLAMP(r, 0, CHAN_MAX);
rgba[GCOMP] = CLAMP(g, 0, CHAN_MAX);
rgba[BCOMP] = CLAMP(b, 0, CHAN_MAX);
rgba[ACOMP] = CHAN_MAX;
}
 
/* XXX this may break if GLchan != GLubyte */
static void FETCH(ycbcr_rev)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLushort *src0 = USHORT_SRC( texImage, (i & ~1), j, k ); /* even */
const GLushort *src1 = src0 + 1; /* odd */
const GLubyte y0 = *src0 & 0xff; /* luminance */
const GLubyte cr = (*src0 >> 8) & 0xff; /* chroma U */
const GLubyte y1 = *src1 & 0xff; /* luminance */
const GLubyte cb = (*src1 >> 8) & 0xff; /* chroma V */
GLchan *rgba = (GLchan *) texel;
GLint r, g, b;
if (i & 1) {
/* odd pixel: use y1,cr,cb */
r = (GLint) (1.164 * (y1-16) + 1.596 * (cr-128));
g = (GLint) (1.164 * (y1-16) - 0.813 * (cr-128) - 0.391 * (cb-128));
b = (GLint) (1.164 * (y1-16) + 2.018 * (cb-128));
}
else {
/* even pixel: use y0,cr,cb */
r = (GLint) (1.164 * (y0-16) + 1.596 * (cr-128));
g = (GLint) (1.164 * (y0-16) - 0.813 * (cr-128) - 0.391 * (cb-128));
b = (GLint) (1.164 * (y0-16) + 2.018 * (cb-128));
}
rgba[RCOMP] = CLAMP(r, 0, CHAN_MAX);
rgba[GCOMP] = CLAMP(g, 0, CHAN_MAX);
rgba[BCOMP] = CLAMP(b, 0, CHAN_MAX);
rgba[ACOMP] = CHAN_MAX;
}
 
 
/* big-endian */
 
#if 0
static void FETCH(abgr8888)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 4 );
GLchan *rgba = (GLchan *) texel;
rgba[RCOMP] = UBYTE_TO_CHAN( src[3] );
rgba[GCOMP] = UBYTE_TO_CHAN( src[2] );
rgba[BCOMP] = UBYTE_TO_CHAN( src[1] );
rgba[ACOMP] = UBYTE_TO_CHAN( src[0] );
}
 
static void FETCH(bgra8888)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 4 );
GLchan *rgba = (GLchan *) texel;
rgba[RCOMP] = UBYTE_TO_CHAN( src[2] );
rgba[GCOMP] = UBYTE_TO_CHAN( src[1] );
rgba[BCOMP] = UBYTE_TO_CHAN( src[0] );
rgba[ACOMP] = UBYTE_TO_CHAN( src[3] );
}
 
static void FETCH(bgr888)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 3 );
GLchan *rgba = (GLchan *) texel;
rgba[RCOMP] = UBYTE_TO_CHAN( src[2] );
rgba[GCOMP] = UBYTE_TO_CHAN( src[1] );
rgba[BCOMP] = UBYTE_TO_CHAN( src[0] );
rgba[ACOMP] = CHAN_MAX;
}
 
static void FETCH(bgr565)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLushort *src = USHORT_SRC( texImage, i, j, k );
const GLushort s = *src;
GLchan *rgba = (GLchan *) texel;
rgba[RCOMP] = UBYTE_TO_CHAN( ((s >> 8) & 0xf8) * 255 / 0xf8 );
rgba[GCOMP] = UBYTE_TO_CHAN( ((s >> 3) & 0xfc) * 255 / 0xfc );
rgba[BCOMP] = UBYTE_TO_CHAN( ((s << 3) & 0xf8) * 255 / 0xf8 );
rgba[ACOMP] = CHAN_MAX;
}
 
static void FETCH(bgra4444)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLushort *src = USHORT_SRC( texImage, i, j, k );
const GLushort s = *src;
GLchan *rgba = (GLchan *) texel;
rgba[RCOMP] = UBYTE_TO_CHAN( ((s >> 8) & 0xf) * 255 / 0xf );
rgba[GCOMP] = UBYTE_TO_CHAN( ((s >> 4) & 0xf) * 255 / 0xf );
rgba[BCOMP] = UBYTE_TO_CHAN( ((s ) & 0xf) * 255 / 0xf );
rgba[ACOMP] = UBYTE_TO_CHAN( ((s >> 12) & 0xf) * 255 / 0xf );
}
 
static void FETCH(bgra5551)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLushort *src = USHORT_SRC( texImage, i, j, k );
const GLushort s = *src;
GLchan *rgba = (GLchan *) texel;
rgba[RCOMP] = UBYTE_TO_CHAN( ((s >> 10) & 0x1f) * 255 / 0x1f );
rgba[GCOMP] = UBYTE_TO_CHAN( ((s >> 5) & 0x1f) * 255 / 0x1f );
rgba[BCOMP] = UBYTE_TO_CHAN( ((s ) & 0x1f) * 255 / 0x1f );
rgba[ACOMP] = UBYTE_TO_CHAN( ((s >> 15) & 0x01) * 255 );
}
 
static void FETCH(la88)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 2 );
GLchan *rgba = (GLchan *) texel;
rgba[RCOMP] = UBYTE_TO_CHAN( src[0] );
rgba[GCOMP] = UBYTE_TO_CHAN( src[0] );
rgba[BCOMP] = UBYTE_TO_CHAN( src[0] );
rgba[ACOMP] = UBYTE_TO_CHAN( src[1] );
}
 
static void FETCH(bgr233)( const struct gl_texture_image *texImage,
GLint i, GLint j, GLint k, GLvoid *texel )
{
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 1 );
const GLubyte s = *src;
GLchan *rgba = (GLchan *) texel;
rgba[RCOMP] = UBYTE_TO_CHAN( ((s ) & 0xe0) * 255 / 0xe0 );
rgba[GCOMP] = UBYTE_TO_CHAN( ((s << 3) & 0xe0) * 255 / 0xe0 );
rgba[BCOMP] = UBYTE_TO_CHAN( ((s << 5) & 0xc0) * 255 / 0xc0 );
rgba[ACOMP] = CHAN_MAX;
}
#endif
 
 
#undef CHAN_SRC
#undef UBYTE_SRC
#undef USHORT_SRC
#undef FLOAT_SRC
#undef FETCH
#undef DIM
/shark/trunk/ports/mesa/src/texformat.c
1,4 → 1,4
/* $Id: texformat.c,v 1.2 2003-04-24 14:22:20 giacomo Exp $ */
/* $Id: texformat.c,v 1.1 2003-02-28 11:42:04 pj Exp $ */
 
/*
* Mesa 3-D graphics library
42,13 → 42,13
/* Texel fetch routines for all supported formats:
*/
#define DIM 1
#include "tmp_texformat.h"
#include "texformat_tmp.h"
 
#define DIM 2
#include "tmp_texformat.h"
#include "texformat_tmp.h"
 
#define DIM 3
#include "tmp_texformat.h"
#include "texformat_tmp.h"
 
/* Have to have this so the FetchTexel function pointer is never NULL.
*/