/shark/trunk/ports/mesa/src/tmp_texformat.h |
---|
File deleted |
/shark/trunk/ports/mesa/src/makefile |
---|
27,12 → 27,12 |
./swrast/s_span.o ./swrast/s_stencil.o ./swrast/s_texstore.o ./swrast/s_texture.o\ |
./swrast/s_triangle.o ./swrast/s_zoom.o ./swrast_setup/ss_context.o\ |
./swrast_setup/ss_triangle.o ./swrast_setup/ss_vb.o ./tnl/t_array_api.o\ |
./tnl/t_import_array.o ./tnl/t_context.o ./tnl/t_eval_api.o ./tnl/t_imm_alloc.o\ |
./tnl/t_array_import.o ./tnl/t_context.o ./tnl/t_eval_api.o ./tnl/t_imm_alloc.o\ |
./tnl/t_imm_api.o ./tnl/t_imm_debug.o ./tnl/t_imm_dlist.o ./tnl/t_imm_elt.o\ |
./tnl/t_imm_eval.o ./tnl/t_imm_exec.o ./tnl/t_imm_fixup.o ./tnl/t_pipeline.o\ |
./tnl/t_vb_fog.o ./tnl/t_vb_light.o\ |
./tnl/t_vb_normals.o ./tnl/t_vb_points.o ./tnl/t_vb_program.o ./tnl/t_vb_render.o\ |
./tnl/t_vb_gentex.o ./tnl/t_vb_texmat.o ./tnl/t_vb_vertex.o\ |
./tnl/t_vb_texgen.o ./tnl/t_vb_texmat.o ./tnl/t_vb_vertex.o\ |
./math/m_clip_debug.o ./math/m_norm_debug.o\ |
./math/m_xform_debug.o ./math/m_eval.o ./math/m_matrix.o ./math/m_translate.o\ |
./math/m_vector.o ./math/m_xform.o\ |
/shark/trunk/ports/mesa/src/x86/sse.h |
---|
0,0 → 1,39 |
/* $Id: sse.h,v 1.1 2003-03-13 12:11:49 giacomo Exp $ */ |
/* |
* Mesa 3-D graphics library |
* Version: 3.5 |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/* |
* PentiumIII-SIMD (SSE) optimizations contributed by |
* Andre Werthmann <wertmann@cs.uni-potsdam.de> |
*/ |
#ifndef __SSE_H__ |
#define __SSE_H__ |
#include "math/m_xform.h" |
void _mesa_init_sse_transform_asm( void ); |
#endif |
/shark/trunk/ports/mesa/src/x86/3dnow_xform1.s |
---|
0,0 → 1,423 |
/* $Id: 3dnow_xform1.s,v 1.1 2003-03-13 12:11:47 giacomo Exp $ */ |
/* |
* Mesa 3-D graphics library |
* Version: 3.5 |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "matypes.h" |
#include "xform_args.h" |
SEG_TEXT |
#define FRAME_OFFSET 4 |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points1_general ) |
GLNAME( _mesa_3dnow_transform_points1_general ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(4, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TPGR_3 ) ) |
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */ |
MOVQ ( REGOFF(8, ECX), MM1 ) /* m03 | m02 */ |
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ |
MOVQ ( REGOFF(56, ECX), MM3 ) /* m33 | m32 */ |
ALIGNTEXT16 |
LLBL( G3TPGR_2 ): |
MOVD ( REGIND(EAX), MM4 ) /* | x0 */ |
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */ |
MOVQ ( MM4, MM5 ) /* x0 | x0 */ |
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */ |
PFMUL ( MM1, MM5 ) /* x0*m03 | x0*m02 */ |
PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */ |
PFADD ( MM3, MM5 ) /* x0*m03+m33 | x0*m02+m32 */ |
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ |
MOVQ ( MM5, REGOFF(8, EDX) ) /* write r3, r2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TPGR_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TPGR_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points1_identity ) |
GLNAME( _mesa_3dnow_transform_points1_identity ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(1), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_1), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(4, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TPIR_4) ) |
ALIGNTEXT16 |
LLBL( G3TPIR_3 ): |
MOVD ( REGIND(EAX), MM0 ) /* | x0 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
MOVD ( MM0, REGIND(EDX) ) /* | r0 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TPIR_3 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TPIR_4 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points1_3d_no_rot ) |
GLNAME( _mesa_3dnow_transform_points1_3d_no_rot ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(4, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP3NRR_3 ) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ |
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ |
ALIGNTEXT16 |
LLBL( G3TP3NRR_2 ): |
MOVD ( REGIND(EAX), MM4 ) /* | x0 */ |
PFMUL ( MM0, MM4 ) /* | x0*m00 */ |
PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */ |
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ |
MOVD ( MM3, REGOFF(8, EDX) ) /* write r2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP3NRR_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP3NRR_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points1_perspective ) |
GLNAME( _mesa_3dnow_transform_points1_perspective ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(4, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TPPR_3 ) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ |
ALIGNTEXT16 |
LLBL( G3TPPR_2 ): |
MOVD ( REGIND(EAX), MM4 ) /* 0 | x0 */ |
PFMUL ( MM0, MM4 ) /* 0 | x0*m00 */ |
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ |
MOVQ ( MM3, REGOFF(8, EDX) ) /* write r2 (=m32), r3 (=0) */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TPPR_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TPPR_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points1_2d ) |
GLNAME( _mesa_3dnow_transform_points1_2d ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(4, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP2R_3 ) ) |
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */ |
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ |
ALIGNTEXT16 |
LLBL( G3TP2R_2 ): |
MOVD ( REGIND(EAX), MM4 ) /* | x0 */ |
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */ |
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */ |
PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */ |
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP2R_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP2R_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points1_2d_no_rot ) |
GLNAME( _mesa_3dnow_transform_points1_2d_no_rot ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(4, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP2NRR_3 ) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ |
ALIGNTEXT16 |
LLBL( G3TP2NRR_2 ): |
MOVD ( REGIND(EAX), MM4 ) /* | x0 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PFMUL ( MM0, MM4 ) /* | x0*m00 */ |
PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */ |
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP2NRR_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points1_3d ) |
GLNAME( _mesa_3dnow_transform_points1_3d ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(4, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP3R_3 ) ) |
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */ |
MOVD ( REGOFF(8, ECX), MM1 ) /* | m02 */ |
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ |
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ |
ALIGNTEXT16 |
LLBL( G3TP3R_2 ): |
MOVD ( REGIND(EAX), MM4 ) /* | x0 */ |
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */ |
MOVQ ( MM4, MM5 ) /* | x0 */ |
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */ |
PFMUL ( MM1, MM5 ) /* | x0*m02 */ |
PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */ |
PFADD ( MM3, MM5 ) /* | x0*m02+m32 */ |
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ |
MOVD ( MM5, REGOFF(8, EDX) ) /* write r2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP3R_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP3R_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
/shark/trunk/ports/mesa/src/x86/3dnow_xform2.s |
---|
0,0 → 1,464 |
/* $Id: 3dnow_xform2.s,v 1.1 2003-03-13 12:11:48 giacomo Exp $ */ |
/* |
* Mesa 3-D graphics library |
* Version: 3.5 |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "matypes.h" |
#include "xform_args.h" |
SEG_TEXT |
#define FRAME_OFFSET 4 |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points2_general ) |
GLNAME( _mesa_3dnow_transform_points2_general ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TPGR_3 ) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */ |
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */ |
MOVD ( REGOFF(8, ECX), MM2 ) /* | m02 */ |
PUNPCKLDQ ( REGOFF(24, ECX), MM2 ) /* m12 | m02 */ |
MOVD ( REGOFF(12, ECX), MM3 ) /* | m03 */ |
PUNPCKLDQ ( REGOFF(28, ECX), MM3 ) /* m13 | m03 */ |
MOVQ ( REGOFF(48, ECX), MM4 ) /* m31 | m30 */ |
MOVQ ( REGOFF(56, ECX), MM5 ) /* m33 | m32 */ |
ALIGNTEXT16 |
LLBL( G3TPGR_2 ): |
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */ |
MOVQ ( MM6, MM7 ) /* x1 | x0 */ |
PFMUL ( MM0, MM6 ) /* x1*m10 | x0*m00 */ |
PFMUL ( MM1, MM7 ) /* x1*m11 | x0*m01 */ |
PFACC ( MM7, MM6 ) /* x0*m01+x1*m11 | x0*x00+x1*m10 */ |
PFADD ( MM4, MM6 ) /* x0*...*m11+m31 | x0*...*m10+m30 */ |
MOVQ ( MM6, REGIND(EDX) ) /* write r1, r0 */ |
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */ |
MOVQ ( MM6, MM7 ) /* x1 | x0 */ |
PFMUL ( MM2, MM6 ) /* x1*m12 | x0*m02 */ |
PFMUL ( MM3, MM7 ) /* x1*m13 | x0*m03 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PFACC ( MM7, MM6 ) /* x0*m03+x1*m13 | x0*x02+x1*m12 */ |
PFADD ( MM5, MM6 ) /* x0*...*m13+m33 | x0*...*m12+m32 */ |
MOVQ ( MM6, REGOFF(8, EDX) ) /* write r3, r2 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TPGR_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TPGR_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points2_perspective ) |
GLNAME( _mesa_3dnow_transform_points2_perspective ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TPPR_3 ) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ |
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ |
ALIGNTEXT16 |
LLBL( G3TPPR_2 ): |
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ |
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ |
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ |
MOVQ ( MM3, REGOFF(8, EDX) ) /* write r2 (=m32), r3 (=0) */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TPPR_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TPPR_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points2_3d ) |
GLNAME( _mesa_3dnow_transform_points2_3d ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_3 ), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP3R_3 ) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */ |
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */ |
MOVD ( REGOFF(8, ECX), MM2 ) /* | m02 */ |
PUNPCKLDQ ( REGOFF(24, ECX), MM2 ) /* m12 | m02 */ |
MOVQ ( REGOFF(48, ECX), MM4 ) /* m31 | m30 */ |
MOVD ( REGOFF(56, ECX), MM5 ) /* | m32 */ |
ALIGNTEXT16 |
LLBL( G3TP3R_2 ): |
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */ |
MOVQ ( MM6, MM7 ) /* x1 | x0 */ |
PFMUL ( MM0, MM6 ) /* x1*m10 | x0*m00 */ |
PFMUL ( MM1, MM7 ) /* x1*m11 | x0*m01 */ |
PFACC ( MM7, MM6 ) /* x0*m01+x1*m11 | x0*x00+x1*m10 */ |
PFADD ( MM4, MM6 ) /* x0*...*m11+m31 | x0*...*m10+m30 */ |
MOVQ ( MM6, REGIND(EDX) ) /* write r1, r0 */ |
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */ |
MOVQ ( MM6, MM7 ) /* x1 | x0 */ |
PFMUL ( MM2, MM6 ) /* x1*m12 | x0*m02 */ |
PFACC ( MM7, MM6 ) /* ***trash*** | x0*x02+x1*m12 */ |
PFADD ( MM5, MM6 ) /* ***trash*** | x0*...*m12+m32 */ |
MOVD ( MM6, REGOFF(8, EDX) ) /* write r2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP3R_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP3R_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points2_3d_no_rot ) |
GLNAME( _mesa_3dnow_transform_points2_3d_no_rot ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_3 ), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP3NRR_3 ) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ |
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ |
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ |
ALIGNTEXT16 |
LLBL( G3TP3NRR_2 ): |
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ |
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ |
PFADD ( MM2, MM4 ) /* x1*m11+m31 | x0*m00+m30 */ |
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ |
MOVD ( MM3, REGOFF(8, EDX) ) /* write r2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP3NRR_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP3NRR_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points2_2d ) |
GLNAME( _mesa_3dnow_transform_points2_2d ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP2R_3 ) ) |
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */ |
MOVQ ( REGOFF(16, ECX), MM1 ) /* m11 | m10 */ |
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ |
ALIGNTEXT16 |
LLBL( G3TP2R_2 ): |
MOVD ( REGIND(EAX), MM4 ) /* | x0 */ |
MOVD ( REGOFF(4, EAX), MM5 ) /* | x1 */ |
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */ |
PUNPCKLDQ ( MM5, MM5 ) /* x1 | x1 */ |
PFMUL ( MM1, MM5 ) /* x1*m11 | x1*m10 */ |
PFADD ( MM2, MM4 ) /* x...x1*m11+31 | x0*..*m10+m30 */ |
PFADD ( MM5, MM4 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */ |
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP2R_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP2R_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points2_2d_no_rot ) |
GLNAME( _mesa_3dnow_transform_points2_2d_no_rot ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP2NRR_3 ) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ |
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ |
ALIGNTEXT16 |
LLBL( G3TP2NRR_2 ): |
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ |
PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */ |
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP2NRR_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points2_identity ) |
GLNAME( _mesa_3dnow_transform_points2_identity ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TPIR_3 ) ) |
ALIGNTEXT16 |
LLBL( G3TPIR_3 ): |
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
MOVQ ( MM0, REGIND(EDX) ) /* r1 | r0 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TPIR_3 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TPIR_4 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
/shark/trunk/ports/mesa/src/x86/3dnow_xform3.s |
---|
0,0 → 1,548 |
/* $Id: 3dnow_xform3.s,v 1.1 2003-03-13 12:11:48 giacomo Exp $ */ |
/* |
* Mesa 3-D graphics library |
* Version: 3.5 |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "matypes.h" |
#include "xform_args.h" |
SEG_TEXT |
#define FRAME_OFFSET 4 |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points3_general ) |
GLNAME( _mesa_3dnow_transform_points3_general ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TPGR_2 ) ) |
PREFETCHW ( REGIND(EDX) ) |
ALIGNTEXT16 |
LLBL( G3TPGR_1 ): |
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ |
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EAX), MM2 ) /* | x2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PREFETCH ( REGIND(EAX) ) |
MOVQ ( MM0, MM1 ) /* x1 | x0 */ |
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */ |
PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */ |
MOVQ ( MM2, MM5 ) /* x2 | x2 */ |
PUNPCKHDQ ( MM1, MM1 ) /* x1 | x1 */ |
PFMUL ( REGOFF(32, ECX), MM2 ) /* x2*m9 | x2*m8 */ |
MOVQ ( MM0, MM3 ) /* x0 | x0 */ |
PFMUL ( REGOFF(40, ECX), MM5 ) /* x2*m11 | x2*m10 */ |
MOVQ ( MM1, MM4 ) /* x1 | x1 */ |
PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */ |
PFADD ( REGOFF(48, ECX), MM2 ) /* x2*m9+m13 | x2*m8+m12 */ |
PFMUL ( REGOFF(16, ECX), MM1 ) /* x1*m5 | x1*m4 */ |
PFADD ( REGOFF(56, ECX), MM5 ) /* x2*m11+m15 | x2*m10+m14 */ |
PFADD ( MM0, MM1 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */ |
PFMUL ( REGOFF(8, ECX), MM3 ) /* x0*m3 | x0*m2 */ |
PFADD ( MM1, MM2 ) /* r1 | r0 */ |
PFMUL ( REGOFF(24, ECX), MM4 ) /* x1*m7 | x1*m6 */ |
ADD_L ( CONST(16), EDX ) /* next output vertex */ |
PFADD ( MM3, MM4 ) /* x0*m3+x1*m7 | x0*m2+x1*m6 */ |
MOVQ ( MM2, REGOFF(-16, EDX) ) /* write r0, r1 */ |
PFADD ( MM4, MM5 ) /* r3 | r2 */ |
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TPGR_1 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TPGR_2 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points3_perspective ) |
GLNAME( _mesa_3dnow_transform_points3_perspective ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TPPR_2 ) ) |
PREFETCH ( REGIND(EAX) ) |
PREFETCHW ( REGIND(EDX) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ |
MOVQ ( REGOFF(32, ECX), MM1 ) /* m21 | m20 */ |
MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */ |
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ |
ALIGNTEXT16 |
LLBL( G3TPPR_1 ): |
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ |
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ |
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PREFETCH ( REGIND(EAX) ) |
PXOR ( MM7, MM7 ) /* 0 | 0 */ |
MOVQ ( MM5, MM6 ) /* | x2 */ |
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ |
PFSUB ( MM5, MM7 ) /* | -x2 */ |
PFMUL ( MM2, MM6 ) /* | x2*m22 */ |
PUNPCKLDQ ( MM5, MM5 ) /* x2 | x2 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
PFMUL ( MM1, MM5 ) /* x2*m21 | x2*m20 */ |
PFADD ( MM3, MM6 ) /* | x2*m22+m32 */ |
PFADD ( MM4, MM5 ) /* x1*m11+x2*m21 | x0*m00+x2*m20 */ |
MOVQ ( MM5, REGOFF(-16, EDX) ) /* write r0, r1 */ |
MOVD ( MM6, REGOFF(-8, EDX) ) /* write r2 */ |
MOVD ( MM7, REGOFF(-4, EDX) ) /* write r3 */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TPPR_1 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TPPR_2 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points3_3d ) |
GLNAME( _mesa_3dnow_transform_points3_3d ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP3R_2 ) ) |
PREFETCH ( REGIND(EAX) ) |
PREFETCH ( REGIND(EDX) ) |
MOVD ( REGOFF(8, ECX), MM7 ) /* | m2 */ |
PUNPCKLDQ ( REGOFF(24, ECX), MM7 ) /* m6 | m2 */ |
ALIGNTEXT16 |
LLBL( G3TP3R_1 ): |
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ |
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PREFETCH ( REGIND(EAX) ) |
MOVQ ( MM0, MM2 ) /* x1 | x0 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
PUNPCKLDQ ( MM2, MM2 ) /* x0 | x0 */ |
MOVQ ( MM0, MM3 ) /* x1 | x0 */ |
PFMUL ( REGIND(ECX), MM2 ) /* x0*m1 | x0*m0 */ |
PUNPCKHDQ ( MM3, MM3 ) /* x1 | x1 */ |
MOVQ ( MM1, MM4 ) /* | x2 */ |
PFMUL ( REGOFF(16, ECX), MM3 ) /* x1*m5 | x1*m4 */ |
PUNPCKLDQ ( MM4, MM4 ) /* x2 | x2 */ |
PFADD ( MM2, MM3 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */ |
PFMUL ( REGOFF(32, ECX), MM4 ) /* x2*m9 | x2*m8 */ |
PFADD ( REGOFF(48, ECX), MM3 ) /* x0*m1+...+m11 | x0*m0+x1*m4+m12 */ |
PFMUL ( MM7, MM0 ) /* x1*m6 | x0*m2 */ |
PFADD ( MM4, MM3 ) /* r1 | r0 */ |
PFMUL ( REGOFF(40, ECX), MM1 ) /* | x2*m10 */ |
PUNPCKLDQ ( REGOFF(56, ECX), MM1 ) /* m14 | x2*m10 */ |
PFACC ( MM0, MM1 ) |
MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */ |
PFACC ( MM1, MM1 ) /* | r2 */ |
MOVD ( MM1, REGOFF(-8, EDX) ) /* write r2 */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP3R_1 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP3R_2 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points3_3d_no_rot ) |
GLNAME( _mesa_3dnow_transform_points3_3d_no_rot ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP3NRR_2 ) ) |
PREFETCH ( REGIND(EAX) ) |
PREFETCHW ( REGIND(EDX) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ |
MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */ |
PUNPCKLDQ ( MM2, MM2 ) /* m22 | m22 */ |
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */ |
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ |
PUNPCKLDQ ( MM3, MM3 ) /* m32 | m32 */ |
ALIGNTEXT16 |
LLBL( G3TP3NRR_1 ): |
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ |
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PREFETCHW ( REGIND(EAX) ) |
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ |
PFADD ( MM1, MM4 ) /* x1*m11+m31 | x0*m00+m30 */ |
PFMUL ( MM2, MM5 ) /* | x2*m22 */ |
PFADD ( MM3, MM5 ) /* | x2*m22+m32 */ |
MOVQ ( MM4, REGIND(EDX) ) /* write r0, r1 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 */ |
JNZ ( LLBL( G3TP3NRR_1 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP3NRR_2 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points3_2d ) |
GLNAME( _mesa_3dnow_transform_points3_2d ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP2R_3) ) |
PREFETCH ( REGIND(EAX) ) |
PREFETCHW ( REGIND(EDX) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */ |
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */ |
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ |
ALIGNTEXT16 |
LLBL( G3TP2R_2 ): |
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ |
MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PREFETCH ( REGIND(EAX) ) |
MOVQ ( MM3, MM4 ) /* x1 | x0 */ |
PFMUL ( MM0, MM3 ) /* x1*m10 | x0*m00 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
PFMUL ( MM1, MM4 ) /* x1*m11 | x0*m01 */ |
PFACC ( MM4, MM3 ) /* x0*m00+x1*m10 | x0*m01+x1*m11 */ |
MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 (=x2) */ |
PFADD ( MM2, MM3 ) /* x0*...*m10+m30 | x0*...*m11+m31 */ |
MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP2R_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP2R_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points3_2d_no_rot ) |
GLNAME( _mesa_3dnow_transform_points3_2d_no_rot ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP2NRR_2 ) ) |
PREFETCH ( REGIND(EAX) ) |
PREFETCHW ( REGIND(EDX) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ |
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */ |
ALIGNTEXT16 |
LLBL( G3TP2NRR_1 ): |
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ |
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PREFETCH ( REGIND(EAX) ) |
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
PFADD ( MM1, MM4 ) /* x1*m11+m31 | x0*m00+m30 */ |
MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */ |
MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 (=x2) */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP2NRR_1 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP2NRR_2 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points3_identity ) |
GLNAME( _mesa_3dnow_transform_points3_identity ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TPIR_2 ) ) |
PREFETCHW ( REGIND(EDX) ) |
ALIGNTEXT16 |
LLBL( G3TPIR_1 ): |
PREFETCHW ( REGOFF(32, EDX) ) |
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
MOVQ ( MM0, REGOFF(-16, EDX) ) /* r1 | r0 */ |
MOVD ( MM1, REGOFF(-8, EDX) ) /* | r2 */ |
JNZ ( LLBL( G3TPIR_1 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TPIR_2 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
/shark/trunk/ports/mesa/src/x86/3dnow_xform4.s |
---|
0,0 → 1,557 |
/* $Id: 3dnow_xform4.s,v 1.1 2003-03-13 12:11:48 giacomo Exp $ */ |
/* |
* Mesa 3-D graphics library |
* Version: 3.5 |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "matypes.h" |
#include "xform_args.h" |
SEG_TEXT |
#define FRAME_OFFSET 4 |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points4_general ) |
GLNAME( _mesa_3dnow_transform_points4_general ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TPGR_2 ) ) |
PREFETCHW ( REGIND(EDX) ) |
ALIGNTEXT16 |
LLBL( G3TPGR_1 ): |
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ |
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ |
MOVQ ( REGOFF(8, EAX), MM4 ) /* x3 | x2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PREFETCH ( REGIND(EAX) ) |
MOVQ ( MM0, MM2 ) /* x1 | x0 */ |
MOVQ ( MM4, MM6 ) /* x3 | x2 */ |
PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */ |
PUNPCKHDQ ( MM2, MM2 ) /* x1 | x1 */ |
MOVQ ( MM0, MM1 ) /* x0 | x0 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */ |
MOVQ ( MM2, MM3 ) /* x1 | x1 */ |
PFMUL ( REGOFF(8, ECX), MM1 ) /* x0*m3 | x0*m2 */ |
PUNPCKLDQ ( MM4, MM4 ) /* x2 | x2 */ |
PFMUL ( REGOFF(16, ECX), MM2 ) /* x1*m5 | x1*m4 */ |
MOVQ ( MM4, MM5 ) /* x2 | x2 */ |
PFMUL ( REGOFF(24, ECX), MM3 ) /* x1*m7 | x1*m6 */ |
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */ |
PFMUL ( REGOFF(32, ECX), MM4 ) /* x2*m9 | x2*m8 */ |
MOVQ ( MM6, MM7 ) /* x3 | x3 */ |
PFMUL ( REGOFF(40, ECX), MM5 ) /* x2*m11 | x2*m10 */ |
PFADD ( MM0, MM2 ) |
PFMUL ( REGOFF(48, ECX), MM6 ) /* x3*m13 | x3*m12 */ |
PFADD ( MM1, MM3 ) |
PFMUL ( REGOFF(56, ECX), MM7 ) /* x3*m15 | x3*m14 */ |
PFADD ( MM4, MM6 ) |
PFADD ( MM5, MM7 ) |
PFADD ( MM2, MM6 ) |
PFADD ( MM3, MM7 ) |
MOVQ ( MM6, REGOFF(-16, EDX) ) |
MOVQ ( MM7, REGOFF(-8, EDX) ) |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TPGR_1 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TPGR_2 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points4_perspective ) |
GLNAME( _mesa_3dnow_transform_points4_perspective ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TPPR_2 ) ) |
PREFETCH ( REGIND(EAX) ) |
PREFETCHW ( REGIND(EDX) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ |
MOVD ( REGOFF(40, ECX), MM1 ) /* | m22 */ |
PUNPCKLDQ ( REGOFF(56, ECX), MM1 ) /* m32 | m22 */ |
MOVQ ( REGOFF(32, ECX), MM2 ) /* m21 | m20 */ |
PXOR ( MM7, MM7 ) /* 0 | 0 */ |
ALIGNTEXT16 |
LLBL( G3TPPR_1 ): |
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ |
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ |
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ |
MOVD ( REGOFF(8, EAX), MM3 ) /* | x2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PREFETCH ( REGOFF(32, EAX) ) /* hopefully stride is zero */ |
MOVQ ( MM5, MM6 ) /* x3 | x2 */ |
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ |
PUNPCKLDQ ( MM5, MM5 ) /* x2 | x2 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
PFMUL ( MM2, MM5 ) /* x2*m21 | x2*m20 */ |
PFSUBR ( MM7, MM3 ) /* | -x2 */ |
PFMUL ( MM1, MM6 ) /* x3*m32 | x2*m22 */ |
PFADD ( MM4, MM5 ) /* x1*m11+x2*m21 | x0*m00+x2*m20 */ |
PFACC ( MM3, MM6 ) /* -x2 | x2*m22+x3*m32 */ |
MOVQ ( MM5, REGOFF(-16, EDX) ) /* write r0, r1 */ |
MOVQ ( MM6, REGOFF(-8, EDX) ) /* write r2, r3 */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TPPR_1 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TPPR_2 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points4_3d ) |
GLNAME( _mesa_3dnow_transform_points4_3d ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP3R_2 ) ) |
MOVD ( REGOFF(8, ECX), MM6 ) /* | m2 */ |
PUNPCKLDQ ( REGOFF(24, ECX), MM6 ) /* m6 | m2 */ |
MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */ |
PUNPCKLDQ ( REGOFF(56, ECX), MM7 ) /* m14 | m10 */ |
ALIGNTEXT16 |
LLBL( G3TP3R_1 ): |
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ |
PREFETCH ( REGOFF(32, EAX) ) /* hopefully array is tightly packed */ |
MOVQ ( REGIND(EAX), MM2 ) /* x1 | x0 */ |
MOVQ ( REGOFF(8, EAX), MM3 ) /* x3 | x2 */ |
MOVQ ( MM2, MM0 ) /* x1 | x0 */ |
MOVQ ( MM3, MM4 ) /* x3 | x2 */ |
MOVQ ( MM0, MM1 ) /* x1 | x0 */ |
MOVQ ( MM4, MM5 ) /* x3 | x2 */ |
PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */ |
PUNPCKHDQ ( MM1, MM1 ) /* x1 | x1 */ |
PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */ |
PUNPCKLDQ ( MM3, MM3 ) /* x2 | x2 */ |
PFMUL ( REGOFF(16, ECX), MM1 ) /* x1*m5 | x1*m4 */ |
PUNPCKHDQ ( MM4, MM4 ) /* x3 | x3 */ |
PFMUL ( MM6, MM2 ) /* x1*m6 | x0*m2 */ |
PFADD ( MM0, MM1 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */ |
PFMUL ( REGOFF(32, ECX), MM3 ) /* x2*m9 | x2*m8 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
PFMUL ( REGOFF(48, ECX), MM4 ) /* x3*m13 | x3*m12 */ |
PFADD ( MM1, MM3 ) /* x0*m1+..+x2*m9 | x0*m0+...+x2*m8 */ |
PFMUL ( MM7, MM5 ) /* x3*m14 | x2*m10 */ |
PFADD ( MM3, MM4 ) /* r1 | r0 */ |
PFACC ( MM2, MM5 ) /* x0*m2+x1*m6 | x2*m10+x3*m14 */ |
MOVD ( REGOFF(12, EAX), MM0 ) /* | x3 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PFACC ( MM0, MM5 ) /* r3 | r2 */ |
MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */ |
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP3R_1 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP3R_2 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points4_3d_no_rot ) |
GLNAME( _mesa_3dnow_transform_points4_3d_no_rot ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP3NRR_2 ) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ |
MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */ |
PUNPCKLDQ ( REGOFF(56, ECX), MM2 ) /* m32 | m22 */ |
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */ |
ALIGNTEXT16 |
LLBL( G3TP3NRR_1 ): |
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ |
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ |
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ |
MOVD ( REGOFF(12, EAX), MM7 ) /* | x3 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PREFETCH ( REGOFF(32, EAX) ) /* hopefully stride is zero */ |
MOVQ ( MM5, MM6 ) /* x3 | x2 */ |
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ |
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */ |
PFMUL ( MM2, MM5 ) /* x3*m32 | x2*m22 */ |
PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */ |
PFACC ( MM7, MM5 ) /* x3 | x2*m22+x3*m32 */ |
PFADD ( MM6, MM4 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */ |
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP3NRR_1 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP3NRR_2 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points4_2d ) |
GLNAME( _mesa_3dnow_transform_points4_2d ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP2R_2 ) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */ |
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */ |
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ |
ALIGNTEXT16 |
LLBL( G3TP2R_1 ): |
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ |
MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */ |
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PREFETCH ( REGIND(EAX) ) |
MOVQ ( MM3, MM4 ) /* x1 | x0 */ |
MOVQ ( MM5, MM6 ) /* x3 | x2 */ |
PFMUL ( MM1, MM4 ) /* x1*m11 | x0*m01 */ |
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */ |
PFMUL ( MM0, MM3 ) /* x1*m10 | x0*m00 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
PFACC ( MM4, MM3 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */ |
PFMUL ( MM2, MM6 ) /* x3*m31 | x3*m30 */ |
PFADD ( MM6, MM3 ) /* r1 | r0 */ |
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */ |
MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP2R_1 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP2R_2 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points4_2d_no_rot ) |
GLNAME( _mesa_3dnow_transform_points4_2d_no_rot ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP2NRR_3 ) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ |
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */ |
ALIGNTEXT16 |
LLBL( G3TP2NRR_2 ): |
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ |
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ |
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PREFETCH ( REGIND(EAX) ) |
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ |
MOVQ ( MM5, MM6 ) /* x3 | x2 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */ |
PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */ |
PFADD ( MM4, MM6 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */ |
MOVQ ( MM6, REGOFF(-16, EDX) ) /* write r0, r1 */ |
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP2NRR_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points4_identity ) |
GLNAME( _mesa_3dnow_transform_points4_identity ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TPIR_2 ) ) |
ALIGNTEXT16 |
LLBL( G3TPIR_1 ): |
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ |
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ |
MOVQ ( REGOFF(8, EAX), MM1 ) /* x3 | x2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PREFETCH ( REGIND(EAX) ) |
ADD_L ( CONST(16), EDX ) /* next r */ |
MOVQ ( MM0, REGOFF(-16, EDX) ) /* r1 | r0 */ |
MOVQ ( MM1, REGOFF(-8, EDX) ) /* r3 | r2 */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TPIR_1 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TPIR_2 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
/shark/trunk/ports/mesa/src/x86/3dnow_normal.s |
---|
0,0 → 1,836 |
/* $Id: 3dnow_normal.s,v 1.1 2003-03-13 12:11:47 giacomo Exp $ */ |
/* |
* Mesa 3-D graphics library |
* Version: 4.1 |
* |
* Copyright (C) 1999-2002 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/* |
* 3Dnow assembly code by Holger Waechtler |
*/ |
#include "matypes.h" |
#include "norm_args.h" |
SEG_TEXT |
#define M(i) REGOFF(i * 4, ECX) |
#define STRIDE REGOFF(12, ESI) |
ALIGNTEXT16 |
GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals) |
GLNAME(_mesa_3dnow_transform_normalize_normals): |
#define FRAME_OFFSET 12 |
PUSH_L ( EDI ) |
PUSH_L ( ESI ) |
PUSH_L ( EBP ) |
MOV_L ( ARG_LENGTHS, EDI ) |
MOV_L ( ARG_IN, ESI ) |
MOV_L ( ARG_DEST, EAX ) |
MOV_L ( REGOFF(V3F_COUNT, ESI), EBP ) /* dest->count = in->count */ |
MOV_L ( EBP, REGOFF(V3F_COUNT, EAX) ) |
MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */ |
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ |
MOV_L ( ARG_MAT, ECX ) |
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ |
CMP_L ( CONST(0), EBP ) /* count > 0 ?? */ |
JE ( LLBL (G3TN_end) ) |
MOV_L ( REGOFF (V3F_COUNT, ESI), EBP ) |
FEMMS |
PUSH_L ( EBP ) |
PUSH_L ( EAX ) |
PUSH_L ( EDX ) /* save counter & pointer for */ |
/* the normalize pass */ |
#undef FRAME_OFFSET |
#define FRAME_OFFSET 24 |
MOVQ ( M(0), MM3 ) /* m1 | m0 */ |
MOVQ ( M(4), MM4 ) /* m5 | m4 */ |
MOVD ( M(2), MM5 ) /* | m2 */ |
PUNPCKLDQ ( M(6), MM5 ) /* m6 | m2 */ |
MOVQ ( M(8), MM6 ) /* m9 | m8 */ |
MOVQ ( M(10), MM7 ) /* | m10 */ |
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */ |
JNE ( LLBL (G3TN_scale_end ) ) |
MOVD ( ARG_SCALE, MM0 ) /* | scale */ |
PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */ |
PFMUL ( MM0, MM3 ) /* scale * m1 | scale * m0 */ |
PFMUL ( MM0, MM4 ) /* scale * m5 | scale * m4 */ |
PFMUL ( MM0, MM5 ) /* scale * m6 | scale * m2 */ |
PFMUL ( MM0, MM6 ) /* scale * m9 | scale * m8 */ |
PFMUL ( MM0, MM7 ) /* | scale * m10 */ |
ALIGNTEXT32 |
LLBL (G3TN_scale_end): |
LLBL (G3TN_transform): |
MOVQ ( REGIND (EDX), MM0 ) /* x1 | x0 */ |
MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */ |
MOVQ ( MM0, MM1 ) /* x1 | x0 */ |
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */ |
PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */ |
ADD_L ( CONST(16), EAX ) /* next r */ |
PREFETCHW ( REGIND(EAX) ) |
PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */ |
PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */ |
PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */ |
PFADD ( MM2, MM0 ) /* x0*m4+x1*m5+x2*m6| x0*m0+...+x2**/ |
MOVQ ( REGIND (EDX), MM1 ) /* x1 | x0 */ |
MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */ |
PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */ |
MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */ |
PFMUL ( MM7, MM2 ) /* | x2*m10 */ |
PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */ |
PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m*/ |
ADD_L ( STRIDE, EDX ) /* next normal */ |
PREFETCH ( REGIND(EDX) ) |
MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */ |
DEC_L ( EBP ) /* decrement normal counter */ |
JA ( LLBL (G3TN_transform) ) |
POP_L ( EDX ) /* end of transform --- */ |
POP_L ( EAX ) /* now normalizing ... */ |
POP_L ( EBP ) |
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */ |
JE ( LLBL (G3TN_norm ) ) /* calculate lengths */ |
ALIGNTEXT32 |
LLBL (G3TN_norm_w_lengths): |
PREFETCHW ( REGOFF(12,EAX) ) |
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */ |
MOVD ( REGIND (EDI), MM3 ) /* | length (x) */ |
PFMUL ( MM3, MM1 ) /* | x2 (normalize*/ |
PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */ |
PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalize*/ |
ADD_L ( STRIDE, EDX ) /* next normal */ |
ADD_L ( CONST(4), EDI ) /* next length */ |
PREFETCH ( REGIND(EDI) ) |
MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */ |
MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */ |
ADD_L ( CONST(16), EAX ) /* next r */ |
DEC_L ( EBP ) /* decrement normal counter */ |
JA ( LLBL (G3TN_norm_w_lengths) ) |
JMP ( LLBL (G3TN_exit_3dnow) ) |
ALIGNTEXT32 |
LLBL (G3TN_norm): |
PREFETCHW ( REGIND(EAX) ) |
MOVQ ( MM0, MM3 ) /* x1 | x0 */ |
MOVQ ( MM1, MM4 ) /* | x2 */ |
PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */ |
ADD_L ( CONST(16), EAX ) /* next r */ |
PFMUL ( MM1, MM4 ) /* | x2*x2 */ |
PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */ |
PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1+x2**/ |
PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */ |
MOVQ ( MM5, MM4 ) |
PUNPCKLDQ ( MM3, MM3 ) |
DEC_L ( EBP ) /* decrement normal counter */ |
PFMUL ( MM5, MM5 ) |
PFRSQIT1 ( MM3, MM5 ) |
PFRCPIT2 ( MM4, MM5 ) |
PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalize*/ |
MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */ |
PFMUL ( MM5, MM1 ) /* | x2 (normalize*/ |
MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */ |
MOVQ ( REGIND (EAX), MM0 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */ |
JA ( LLBL (G3TN_norm) ) |
LLBL (G3TN_exit_3dnow): |
FEMMS |
LLBL (G3TN_end): |
POP_L ( EBP ) |
POP_L ( ESI ) |
POP_L ( EDI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot) |
GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot): |
#undef FRAME_OFFSET |
#define FRAME_OFFSET 12 |
PUSH_L ( EDI ) |
PUSH_L ( ESI ) |
PUSH_L ( EBP ) |
MOV_L ( ARG_LENGTHS, EDI ) |
MOV_L ( ARG_IN, ESI ) |
MOV_L ( ARG_DEST, EAX ) |
MOV_L ( REGOFF(V3F_COUNT, ESI), EBP ) /* dest->count = in->count */ |
MOV_L ( EBP, REGOFF(V3F_COUNT, EAX) ) |
MOV_L ( ARG_MAT, ECX ) |
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ |
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ |
MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */ |
CMP_L ( CONST(0), EBP ) /* count > 0 ?? */ |
JE ( LLBL (G3TNNR_end) ) |
FEMMS |
MOVD ( M(0), MM0 ) /* | m0 */ |
PUNPCKLDQ ( M(5), MM0 ) /* m5 | m0 */ |
MOVD ( M(10), MM2 ) /* | m10 */ |
PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */ |
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */ |
JNE ( LLBL (G3TNNR_scale_end ) ) |
MOVD ( ARG_SCALE, MM7 ) /* | scale */ |
PUNPCKLDQ ( MM7, MM7 ) /* scale | scale */ |
PFMUL ( MM7, MM0 ) /* scale * m5 | scale * m0 */ |
PFMUL ( MM7, MM2 ) /* scale * m10 | scale * m10 */ |
ALIGNTEXT32 |
LLBL (G3TNNR_scale_end): |
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */ |
JE ( LLBL (G3TNNR_norm) ) /* need to calculate lengths */ |
MOVD ( REGIND(EDI), MM3 ) /* | length (x) */ |
ALIGNTEXT32 |
LLBL (G3TNNR_norm_w_lengths): /* use precalculated lengths */ |
PREFETCHW ( REGIND(EAX) ) |
MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */ |
PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */ |
ADD_L ( STRIDE, EDX ) /* next normal */ |
PREFETCH ( REGIND(EDX) ) |
PFMUL ( MM2, MM7 ) /* | x2*m10 */ |
ADD_L ( CONST(16), EAX ) /* next r */ |
PFMUL ( MM3, MM7 ) /* | x2 (normalized) */ |
PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */ |
ADD_L ( CONST(4), EDI ) /* next length */ |
PFMUL ( MM3, MM6 ) /* x1 (normalized) | x0 (normalized) */ |
DEC_L ( EBP ) /* decrement normal counter */ |
MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */ |
MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */ |
MOVD ( REGIND(EDI), MM3 ) /* | length (x) */ |
JA ( LLBL (G3TNNR_norm_w_lengths) ) |
JMP ( LLBL (G3TNNR_exit_3dnow) ) |
ALIGNTEXT32 |
LLBL (G3TNNR_norm): /* need to calculate lengths */ |
PREFETCHW ( REGIND(EAX) ) |
MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */ |
PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */ |
ADD_L ( CONST(16), EAX ) /* next r */ |
PFMUL ( MM2, MM7 ) /* | x2*m10 */ |
MOVQ ( MM6, MM3 ) /* x1 (transformed)| x0 (transformed) */ |
MOVQ ( MM7, MM4 ) /* | x2 (transformed) */ |
PFMUL ( MM6, MM3 ) /* x1*x1 | x0*x0 */ |
PFMUL ( MM7, MM4 ) /* | x2*x2 */ |
PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1 */ |
PFADD ( MM4, MM3 ) /* | x0*x0+x1*x1+x2*x2*/ |
ADD_L ( STRIDE, EDX ) /* next normal */ |
PREFETCH ( REGIND(EDX) ) |
PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */ |
MOVQ ( MM5, MM4 ) |
PUNPCKLDQ ( MM3, MM3 ) |
PFMUL ( MM5, MM5 ) |
PFRSQIT1 ( MM3, MM5 ) |
DEC_L ( EBP ) /* decrement normal counter */ |
PFRCPIT2 ( MM4, MM5 ) |
PFMUL ( MM5, MM6 ) /* x1 (normalized) | x0 (normalized) */ |
MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */ |
PFMUL ( MM5, MM7 ) /* | x2 (normalized) */ |
MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */ |
JA ( LLBL (G3TNNR_norm) ) |
LLBL (G3TNNR_exit_3dnow): |
FEMMS |
LLBL (G3TNNR_end): |
POP_L ( EBP ) |
POP_L ( ESI ) |
POP_L ( EDI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot) |
GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot): |
#undef FRAME_OFFSET |
#define FRAME_OFFSET 12 |
PUSH_L ( EDI ) |
PUSH_L ( ESI ) |
PUSH_L ( EBP ) |
MOV_L ( ARG_IN, EAX ) |
MOV_L ( ARG_DEST, EDX ) |
MOV_L ( REGOFF(V3F_COUNT, EAX), EBP ) /* dest->count = in->count */ |
MOV_L ( EBP, REGOFF(V3F_COUNT, EDX) ) |
MOV_L ( ARG_IN, ESI ) |
MOV_L ( ARG_MAT, ECX ) |
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ |
MOV_L ( REGOFF(V3F_START, EDX), EAX ) /* dest->start */ |
MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */ |
CMP_L ( CONST(0), EBP ) |
JE ( LLBL (G3TRNR_end) ) |
FEMMS |
MOVD ( ARG_SCALE, MM6 ) /* | scale */ |
PUNPCKLDQ ( MM6, MM6 ) /* scale | scale */ |
MOVD ( REGIND(ECX), MM0 ) /* | m0 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */ |
PFMUL ( MM6, MM0 ) /* scale*m5 | scale*m0 */ |
MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */ |
PFMUL ( MM6, MM2 ) /* | scale*m10 */ |
ALIGNTEXT32 |
LLBL (G3TRNR_rescale): |
PREFETCHW ( REGIND(EAX) ) |
MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */ |
PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */ |
ADD_L ( STRIDE, EDX ) /* next normal */ |
PREFETCH ( REGIND(EDX) ) |
PFMUL ( MM2, MM5 ) /* | x2*m10 */ |
ADD_L ( CONST(16), EAX ) /* next r */ |
DEC_L ( EBP ) /* decrement normal counter */ |
MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */ |
MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */ |
JA ( LLBL (G3TRNR_rescale) ) /* cnt > 0 ? -> process next normal */ |
FEMMS |
LLBL (G3TRNR_end): |
POP_L ( EBP ) |
POP_L ( ESI ) |
POP_L ( EDI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals) |
GLNAME(_mesa_3dnow_transform_rescale_normals): |
#undef FRAME_OFFSET |
#define FRAME_OFFSET 8 |
PUSH_L ( EDI ) |
PUSH_L ( ESI ) |
MOV_L ( ARG_IN, ESI ) |
MOV_L ( ARG_DEST, EAX ) |
MOV_L ( ARG_MAT, ECX ) |
MOV_L ( REGOFF(V3F_COUNT, ESI), EDI ) /* dest->count = in->count */ |
MOV_L ( EDI, REGOFF(V3F_COUNT, EAX) ) |
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ |
MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */ |
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ |
CMP_L ( CONST(0), EDI ) |
JE ( LLBL (G3TR_end) ) |
FEMMS |
MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */ |
MOVQ ( REGOFF(16,ECX), MM4 ) /* m5 | m4 */ |
MOVD ( ARG_SCALE, MM0 ) /* scale */ |
MOVD ( REGOFF(8,ECX), MM5 ) /* | m2 */ |
PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */ |
PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) |
PFMUL ( MM0, MM3 ) /* scale*m1 | scale*m0 */ |
MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8*/ |
PFMUL ( MM0, MM4 ) /* scale*m5 | scale*m4 */ |
MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */ |
PFMUL ( MM0, MM5 ) /* scale*m6 | scale*m2 */ |
PFMUL ( MM0, MM6 ) /* scale*m9 | scale*m8 */ |
PFMUL ( MM0, MM7 ) /* | scale*m10 */ |
ALIGNTEXT32 |
LLBL (G3TR_rescale): |
PREFETCHW ( REGIND(EAX) ) |
MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */ |
MOVQ ( MM0, MM1 ) /* x1 | x0 */ |
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */ |
PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */ |
ADD_L ( CONST(16), EAX ) /* next r */ |
PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */ |
PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */ |
MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */ |
PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */ |
PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */ |
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */ |
ADD_L ( STRIDE, EDX ) /* next normal */ |
PREFETCH ( REGIND(EDX) ) |
MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */ |
PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */ |
PFMUL ( MM7, MM2 ) /* | x2*m10 */ |
PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */ |
PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */ |
MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */ |
DEC_L ( EDI ) /* decrement normal counter */ |
JA ( LLBL (G3TR_rescale) ) |
FEMMS |
LLBL (G3TR_end): |
POP_L ( ESI ) |
POP_L ( EDI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME(_mesa_3dnow_transform_normals_no_rot) |
GLNAME(_mesa_3dnow_transform_normals_no_rot): |
#undef FRAME_OFFSET |
#define FRAME_OFFSET 8 |
PUSH_L ( EDI ) |
PUSH_L ( ESI ) |
MOV_L ( ARG_IN, ESI ) |
MOV_L ( ARG_DEST, EAX ) |
MOV_L ( ARG_MAT, ECX ) |
MOV_L ( REGOFF(V3F_COUNT, ESI), EDI ) /* dest->count = in->count */ |
MOV_L ( EDI, REGOFF(V3F_COUNT, EAX) ) |
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ |
MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */ |
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ |
CMP_L ( CONST(0), EDI ) |
JE ( LLBL (G3TNR_end) ) |
FEMMS |
MOVD ( REGIND(ECX), MM0 ) /* | m0 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */ |
MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */ |
PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */ |
ALIGNTEXT32 |
LLBL (G3TNR_transform): |
PREFETCHW ( REGIND(EAX) ) |
MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */ |
PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */ |
ADD_L ( STRIDE, EDX) /* next normal */ |
PREFETCH ( REGIND(EDX) ) |
PFMUL ( MM2, MM5 ) /* | x2*m10 */ |
ADD_L ( CONST(16), EAX ) /* next r */ |
DEC_L ( EDI ) /* decrement normal counter */ |
MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */ |
MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */ |
JA ( LLBL (G3TNR_transform) ) |
FEMMS |
LLBL (G3TNR_end): |
POP_L ( ESI ) |
POP_L ( EDI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME(_mesa_3dnow_transform_normals) |
GLNAME(_mesa_3dnow_transform_normals): |
#undef FRAME_OFFSET |
#define FRAME_OFFSET 8 |
PUSH_L ( EDI ) |
PUSH_L ( ESI ) |
MOV_L ( ARG_IN, ESI ) |
MOV_L ( ARG_DEST, EAX ) |
MOV_L ( ARG_MAT, ECX ) |
MOV_L ( REGOFF(V3F_COUNT, ESI), EDI ) /* dest->count = in->count */ |
MOV_L ( EDI, REGOFF(V3F_COUNT, EAX) ) |
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ |
MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */ |
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ |
CMP_L ( CONST(0), EDI ) /* count > 0 ?? */ |
JE ( LLBL (G3T_end) ) |
FEMMS |
MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */ |
MOVQ ( REGOFF(16, ECX), MM4 ) /* m5 | m4 */ |
MOVD ( REGOFF(8, ECX), MM5 ) /* | m2 */ |
PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) /* m6 | m2 */ |
MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8 */ |
MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */ |
ALIGNTEXT32 |
LLBL (G3T_transform): |
PREFETCHW ( REGIND(EAX) ) |
MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */ |
MOVQ ( MM0, MM1 ) /* x1 | x0 */ |
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */ |
PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */ |
ADD_L ( CONST(16), EAX ) /* next r */ |
PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */ |
PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */ |
PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */ |
PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */ |
MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */ |
MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */ |
PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */ |
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */ |
PFMUL ( MM7, MM2 ) /* | x2*m10 */ |
ADD_L ( STRIDE, EDX ) /* next normal */ |
PREFETCH ( REGIND(EDX) ) |
PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */ |
PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */ |
MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */ |
DEC_L ( EDI ) /* decrement normal counter */ |
JA ( LLBL (G3T_transform) ) |
FEMMS |
LLBL (G3T_end): |
POP_L ( ESI ) |
POP_L ( EDI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME(_mesa_3dnow_normalize_normals) |
GLNAME(_mesa_3dnow_normalize_normals): |
#undef FRAME_OFFSET |
#define FRAME_OFFSET 12 |
PUSH_L ( EDI ) |
PUSH_L ( ESI ) |
PUSH_L ( EBP ) |
MOV_L ( ARG_IN, ESI ) |
MOV_L ( ARG_DEST, EAX ) |
MOV_L ( REGOFF(V3F_COUNT, ESI), EBP ) /* dest->count = in->count */ |
MOV_L ( EBP, REGOFF(V3F_COUNT, EAX) ) |
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ |
MOV_L ( REGOFF(V3F_START, ESI), ECX ) /* in->start */ |
MOV_L ( ARG_LENGTHS, EDX ) |
CMP_L ( CONST(0), EBP ) /* count > 0 ?? */ |
JE ( LLBL (G3N_end) ) |
FEMMS |
CMP_L ( CONST(0), EDX ) /* lengths == 0 ? */ |
JE ( LLBL (G3N_norm2) ) /* calculate lengths */ |
ALIGNTEXT32 |
LLBL (G3N_norm1): /* use precalculated lengths */ |
PREFETCH ( REGIND(EAX) ) |
MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */ |
MOVD ( REGIND(EDX), MM3 ) /* | length (x) */ |
PFMUL ( MM3, MM1 ) /* | x2 (normalized) */ |
PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */ |
ADD_L ( STRIDE, ECX ) /* next normal */ |
PREFETCH ( REGIND(ECX) ) |
PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalized) */ |
MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */ |
MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */ |
ADD_L ( CONST(16), EAX ) /* next r */ |
ADD_L ( CONST(4), EDX ) /* next length */ |
DEC_L ( EBP ) /* decrement normal counter */ |
JA ( LLBL (G3N_norm1) ) |
JMP ( LLBL (G3N_end1) ) |
ALIGNTEXT32 |
LLBL (G3N_norm2): /* need to calculate lengths */ |
PREFETCHW ( REGIND(EAX) ) |
MOVQ ( MM0, MM3 ) /* x1 | x0 */ |
ADD_L ( STRIDE, ECX ) /* next normal */ |
PREFETCH ( REGIND(ECX) ) |
MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */ |
PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */ |
MOVQ ( MM1, MM4 ) /* | x2 */ |
ADD_L ( CONST(16), EAX ) /* next r */ |
PFMUL ( MM1, MM4 ) /* | x2*x2 */ |
PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */ |
PFACC ( MM3, MM3 ) /* x0*x0+...+x2*x2 | x0*x0+x1*x1+x2*x2*/ |
PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */ |
MOVQ ( MM5, MM4 ) |
PUNPCKLDQ ( MM3, MM3 ) |
PFMUL ( MM5, MM5 ) |
PFRSQIT1 ( MM3, MM5 ) |
DEC_L ( EBP ) /* decrement normal counter */ |
PFRCPIT2 ( MM4, MM5 ) |
PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalized) */ |
MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */ |
PFMUL ( MM5, MM1 ) /* | x2 (normalized) */ |
MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */ |
JA ( LLBL (G3N_norm2) ) |
LLBL (G3N_end1): |
FEMMS |
LLBL (G3N_end): |
POP_L ( EBP ) |
POP_L ( ESI ) |
POP_L ( EDI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME(_mesa_3dnow_rescale_normals) |
GLNAME(_mesa_3dnow_rescale_normals): |
#undef FRAME_OFFSET |
#define FRAME_OFFSET 8 |
PUSH_L ( EDI ) |
PUSH_L ( ESI ) |
MOV_L ( ARG_IN, ESI ) |
MOV_L ( ARG_DEST, EAX ) |
MOV_L ( REGOFF(V3F_COUNT, ESI), EDX ) /* dest->count = in->count */ |
MOV_L ( EDX, REGOFF(V3F_COUNT, EAX) ) |
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ |
MOV_L ( REGOFF(V3F_START, ESI), ECX ) /* in->start */ |
CMP_L ( CONST(0), EDX ) |
JE ( LLBL (G3R_end) ) |
FEMMS |
MOVD ( ARG_SCALE, MM0 ) /* scale */ |
PUNPCKLDQ ( MM0, MM0 ) |
ALIGNTEXT32 |
LLBL (G3R_rescale): |
PREFETCHW ( REGIND(EAX) ) |
MOVQ ( REGIND(ECX), MM1 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, ECX), MM2 ) /* | x2 */ |
PFMUL ( MM0, MM1 ) /* x1*scale | x0*scale */ |
ADD_L ( STRIDE, ECX ) /* next normal */ |
PREFETCH ( REGIND(ECX) ) |
PFMUL ( MM0, MM2 ) /* | x2*scale */ |
ADD_L ( CONST(16), EAX ) /* next r */ |
MOVQ ( MM1, REGOFF(-16, EAX) ) /* write r0, r1 */ |
MOVD ( MM2, REGOFF(-8, EAX) ) /* write r2 */ |
DEC_L ( EDX ) /* decrement normal counter */ |
JA ( LLBL (G3R_rescale) ) |
FEMMS |
LLBL (G3R_end): |
POP_L ( ESI ) |
POP_L ( EDI ) |
RET |
/shark/trunk/ports/mesa/src/x86/3dnow.c |
---|
0,0 → 1,89 |
/* $Id: 3dnow.c,v 1.1 2003-03-13 12:11:47 giacomo Exp $ */ |
/* |
* Mesa 3-D graphics library |
* Version: 4.1 |
* |
* Copyright (C) 1999-2002 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/* |
* 3DNow! optimizations contributed by |
* Holger Waechtler <holger@akaflieg.extern.tu-berlin.de> |
*/ |
#include "glheader.h" |
#include "context.h" |
#include "math/m_xform.h" |
#include "tnl/t_context.h" |
#include "3dnow.h" |
#include "common_x86_macros.h" |
#ifdef DEBUG |
#include "math/m_debug.h" |
#endif |
#ifdef USE_3DNOW_ASM |
DECLARE_XFORM_GROUP( 3dnow, 2 ) |
DECLARE_XFORM_GROUP( 3dnow, 3 ) |
DECLARE_XFORM_GROUP( 3dnow, 4 ) |
DECLARE_NORM_GROUP( 3dnow ) |
extern void _ASMAPI |
_mesa_v16_3dnow_general_xform( GLfloat *first_vert, |
const GLfloat *m, |
const GLfloat *src, |
GLuint src_stride, |
GLuint count ); |
extern void _ASMAPI |
_mesa_3dnow_project_vertices( GLfloat *first, |
GLfloat *last, |
const GLfloat *m, |
GLuint stride ); |
extern void _ASMAPI |
_mesa_3dnow_project_clipped_vertices( GLfloat *first, |
GLfloat *last, |
const GLfloat *m, |
GLuint stride, |
const GLubyte *clipmask ); |
#endif |
void _mesa_init_3dnow_transform_asm( void ) |
{ |
#ifdef USE_3DNOW_ASM |
ASSIGN_XFORM_GROUP( 3dnow, 2 ); |
ASSIGN_XFORM_GROUP( 3dnow, 3 ); |
ASSIGN_XFORM_GROUP( 3dnow, 4 ); |
ASSIGN_NORM_GROUP( 3dnow ); |
#ifdef DEBUG |
_math_test_all_transform_functions( "3DNow!" ); |
_math_test_all_normal_transform_functions( "3DNow!" ); |
#endif |
#endif |
} |
/shark/trunk/ports/mesa/src/x86/3dnow.h |
---|
0,0 → 1,39 |
/* $Id: 3dnow.h,v 1.1 2003-03-13 12:11:47 giacomo Exp $ */ |
/* |
* Mesa 3-D graphics library |
* Version: 3.5 |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/* |
* 3DNow! optimizations contributed by |
* Holger Waechtler <holger@akaflieg.extern.tu-berlin.de> |
*/ |
#ifndef __3DNOW_H__ |
#define __3DNOW_H__ |
#include "math/m_xform.h" |
void _mesa_init_3dnow_transform_asm( void ); |
#endif |
/shark/trunk/ports/mesa/src/x86/sse_xform1.s |
---|
0,0 → 1,433 |
/* $Id: sse_xform1.s,v 1.1 2003-03-13 12:11:49 giacomo Exp $ */ |
/* |
* Mesa 3-D graphics library |
* Version: 3.5 |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/** TODO: |
* - insert PREFETCH instructions to avoid cache-misses ! |
* - some more optimizations are possible... |
* - for 40-50% more performance in the SSE-functions, the |
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! |
*/ |
#include "matypes.h" |
#include "xform_args.h" |
SEG_TEXT |
#define S(i) REGOFF(i * 4, ESI) |
#define D(i) REGOFF(i * 4, EDI) |
#define M(i) REGOFF(i * 4, EDX) |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points1_general) |
GLNAME( _mesa_sse_transform_points1_general ): |
#define FRAME_OFFSET 8 |
PUSH_L ( ESI ) |
PUSH_L ( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
CMP_L( CONST(0), ECX ) /* count == 0 ? */ |
JE( LLBL(K_GTP1GR_finish) ) /* yes -> nothing to do. */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ |
MOVAPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */ |
ALIGNTEXT32 |
LLBL(K_GTP1GR_top): |
MOVSS( S(0), XMM2 ) /* ox */ |
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */ |
MULPS( XMM0, XMM2 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ |
ADDPS( XMM1, XMM2 ) /* + | + | + | + */ |
MOVUPS( XMM2, D(0) ) |
LLBL(K_GTP1GR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_GTP1GR_top) ) |
LLBL(K_GTP1GR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points1_identity) |
GLNAME( _mesa_sse_transform_points1_identity ): |
#define FRAME_OFFSET 8 |
PUSH_L ( ESI ) |
PUSH_L ( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP1IR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_1), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(1), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
CMP_L( ESI, EDI ) |
JE( LLBL(K_GTP1IR_finish) ) |
ALIGNTEXT32 |
LLBL(K_GTP1IR_top): |
MOV_L( S(0), EDX ) |
MOV_L( EDX, D(0) ) |
LLBL(K_GTP1IR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_GTP1IR_top) ) |
LLBL(K_GTP1IR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points1_3d_no_rot) |
GLNAME(_mesa_sse_transform_points1_3d_no_rot): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP13DNRR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVSS( M(0), XMM0 ) /* m0 */ |
MOVSS( M(12), XMM1 ) /* m12 */ |
MOVSS( M(13), XMM2 ) /* m13 */ |
MOVSS( M(14), XMM3 ) /* m14 */ |
ALIGNTEXT32 |
LLBL(K_GTP13DNRR_top): |
MOVSS( S(0), XMM4 ) /* ox */ |
MULSS( XMM0, XMM4 ) /* ox*m0 */ |
ADDSS( XMM1, XMM4 ) /* ox*m0+m12 */ |
MOVSS( XMM4, D(0) ) |
MOVSS( XMM2, D(1) ) |
MOVSS( XMM3, D(2) ) |
LLBL(K_GTP13DNRR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_GTP13DNRR_top) ) |
LLBL(K_GTP13DNRR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points1_perspective) |
GLNAME(_mesa_sse_transform_points1_perspective): |
#define FRAME_OFFSET 8 |
PUSH_L ( ESI ) |
PUSH_L ( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP13PR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
XORPS( XMM0, XMM0 ) /* 0 | 0 | 0 | 0 */ |
MOVSS( M(0), XMM1 ) /* m0 */ |
MOVSS( M(14), XMM2 ) /* m14 */ |
ALIGNTEXT32 |
LLBL(K_GTP13PR_top): |
MOVSS( S(0), XMM3 ) /* ox */ |
MULSS( XMM1, XMM3 ) /* ox*m0 */ |
MOVSS( XMM3, D(0) ) /* ox*m0->D(0) */ |
MOVSS( XMM2, D(2) ) /* m14->D(2) */ |
MOVSS( XMM0, D(1) ) |
MOVSS( XMM0, D(3) ) |
LLBL(K_GTP13PR_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(K_GTP13PR_top) ) |
LLBL(K_GTP13PR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points1_2d) |
GLNAME(_mesa_sse_transform_points1_2d): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP13P2DR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVLPS( M(0), XMM0 ) /* m1 | m0 */ |
MOVLPS( M(12), XMM1 ) /* m13 | m12 */ |
ALIGNTEXT32 |
LLBL(K_GTP13P2DR_top): |
MOVSS( S(0), XMM2 ) /* ox */ |
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */ |
MULPS( XMM0, XMM2 ) /* - | - | ox*m1 | ox*m0 */ |
ADDPS( XMM1, XMM2 ) /* - | - | ox*m1+m13 | ox*m0+m12 */ |
MOVLPS( XMM2, D(0) ) |
LLBL(K_GTP13P2DR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_GTP13P2DR_top) ) |
LLBL(K_GTP13P2DR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points1_2d_no_rot) |
GLNAME(_mesa_sse_transform_points1_2d_no_rot): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP13P2DNRR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVSS( M(0), XMM0 ) /* m0 */ |
MOVSS( M(12), XMM1 ) /* m12 */ |
MOVSS( M(13), XMM2 ) /* m13 */ |
ALIGNTEXT32 |
LLBL(K_GTP13P2DNRR_top): |
MOVSS( S(0), XMM3 ) /* ox */ |
MULSS( XMM0, XMM3 ) /* ox*m0 */ |
ADDSS( XMM1, XMM3 ) /* ox*m0+m12 */ |
MOVSS( XMM3, D(0) ) |
MOVSS( XMM2, D(1) ) |
LLBL(K_GTP13P2DNRR_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(K_GTP13P2DNRR_top) ) |
LLBL(K_GTP13P2DNRR_finish): |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points1_3d) |
GLNAME(_mesa_sse_transform_points1_3d): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP13P3DR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ |
MOVAPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */ |
ALIGNTEXT32 |
LLBL(K_GTP13P3DR_top): |
MOVSS( S(0), XMM2 ) /* ox */ |
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */ |
MULPS( XMM0, XMM2 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ |
ADDPS( XMM1, XMM2 ) /* +m15 | +m14 | +m13 | +m12 */ |
MOVLPS( XMM2, D(0) ) /* - | - | ->D(1)| ->D(0)*/ |
UNPCKHPS( XMM2, XMM2 ) /* ox*m3+m15 | ox*m3+m15 | ox*m2+m14 | ox*m2+m14 */ |
MOVSS( XMM2, D(2) ) |
LLBL(K_GTP13P3DR_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(K_GTP13P3DR_top) ) |
LLBL(K_GTP13P3DR_finish): |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
/shark/trunk/ports/mesa/src/x86/sse_xform2.s |
---|
0,0 → 1,452 |
/* $Id: sse_xform2.s,v 1.1 2003-03-13 12:11:49 giacomo Exp $ */ |
/* |
* Mesa 3-D graphics library |
* Version: 3.5 |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/** TODO: |
* - insert PREFETCH instructions to avoid cache-misses ! |
* - some more optimizations are possible... |
* - for 40-50% more performance in the SSE-functions, the |
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! |
*/ |
#include "matypes.h" |
#include "xform_args.h" |
SEG_TEXT |
#define S(i) REGOFF(i * 4, ESI) |
#define D(i) REGOFF(i * 4, EDI) |
#define M(i) REGOFF(i * 4, EDX) |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points2_general) |
GLNAME( _mesa_sse_transform_points2_general ): |
#define FRAME_OFFSET 8 |
PUSH_L ( ESI ) |
PUSH_L ( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX ) |
JZ( LLBL(K_GTP2GR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ |
MOVAPS( M(4), XMM1 ) /* m7 | m6 | m5 | m4 */ |
MOVAPS( M(12), XMM2 ) /* m15 | m14 | m13 | m12 */ |
ALIGNTEXT32 |
LLBL(K_GTP2GR_top): |
MOVSS( S(0), XMM3 ) /* ox */ |
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox | ox | ox */ |
MULPS( XMM0, XMM3 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ |
MOVSS( S(1), XMM4 ) /* oy */ |
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* oy | oy | oy | oy */ |
MULPS( XMM1, XMM4 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */ |
ADDPS( XMM4, XMM3 ) |
ADDPS( XMM2, XMM3 ) |
MOVAPS( XMM3, D(0) ) |
LLBL(K_GTP2GR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_GTP2GR_top) ) |
LLBL(K_GTP2GR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points2_identity) |
GLNAME( _mesa_sse_transform_points2_identity ): |
#define FRAME_OFFSET 8 |
PUSH_L ( ESI ) |
PUSH_L ( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP2IR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
CMP_L( ESI, EDI ) |
JE( LLBL(K_GTP2IR_finish) ) |
ALIGNTEXT32 |
LLBL(K_GTP2IR_top): |
MOV_L ( S(0), EDX ) |
MOV_L ( EDX, D(0) ) |
MOV_L ( S(1), EDX ) |
MOV_L ( EDX, D(1) ) |
LLBL(K_GTP2IR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_GTP2IR_top) ) |
LLBL(K_GTP2IR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points2_3d_no_rot) |
GLNAME(_mesa_sse_transform_points2_3d_no_rot): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP23DNRR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */ |
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */ |
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */ |
MOVLPS ( M(12), XMM2 ) /* - | - | m13 | m12 */ |
MOVSS ( M(14), XMM3 ) /* - | - | - | m14 */ |
ALIGNTEXT32 |
LLBL(K_GTP23DNRR_top): |
MOVLPS ( S(0), XMM0 ) /* - | - | oy | ox */ |
MULPS ( XMM1, XMM0 ) /* - | - | oy*m5 | ox*m0 */ |
ADDPS ( XMM2, XMM0 ) /* - | - | +m13 | +m12 */ |
MOVLPS ( XMM0, D(0) ) /* -> D(1) | -> D(0) */ |
MOVSS ( XMM3, D(2) ) /* -> D(2) */ |
LLBL(K_GTP23DNRR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_GTP23DNRR_top) ) |
LLBL(K_GTP23DNRR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points2_perspective) |
GLNAME(_mesa_sse_transform_points2_perspective): |
#define FRAME_OFFSET 8 |
PUSH_L ( ESI ) |
PUSH_L ( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP23PR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */ |
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */ |
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */ |
MOVSS ( M(14), XMM3 ) /* m14 */ |
XORPS ( XMM0, XMM0 ) /* 0 | 0 | 0 | 0 */ |
ALIGNTEXT32 |
LLBL(K_GTP23PR_top): |
MOVLPS( S(0), XMM4 ) /* oy | ox */ |
MULPS( XMM1, XMM4 ) /* oy*m5 | ox*m0 */ |
MOVLPS( XMM4, D(0) ) /* ->D(1) | ->D(0) */ |
MOVSS( XMM3, D(2) ) /* ->D(2) */ |
MOVSS( XMM0, D(3) ) /* ->D(3) */ |
LLBL(K_GTP23PR_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(K_GTP23PR_top) ) |
LLBL(K_GTP23PR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points2_2d) |
GLNAME(_mesa_sse_transform_points2_2d): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP23P2DR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVLPS( M(0), XMM0 ) /* m1 | m0 */ |
MOVLPS( M(4), XMM1 ) /* m5 | m4 */ |
MOVLPS( M(12), XMM2 ) /* m13 | m12 */ |
ALIGNTEXT32 |
LLBL(K_GTP23P2DR_top): |
MOVSS( S(0), XMM3 ) /* ox */ |
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox */ |
MULPS( XMM0, XMM3 ) /* ox*m1 | ox*m0 */ |
MOVSS( S(1), XMM4 ) /* oy */ |
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* oy | oy */ |
MULPS( XMM1, XMM4 ) /* oy*m5 | oy*m4 */ |
ADDPS( XMM4, XMM3 ) |
ADDPS( XMM2, XMM3 ) |
MOVLPS( XMM3, D(0) ) /* ->D(1) | ->D(0) */ |
LLBL(K_GTP23P2DR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_GTP23P2DR_top) ) |
LLBL(K_GTP23P2DR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points2_2d_no_rot) |
GLNAME(_mesa_sse_transform_points2_2d_no_rot): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP23P2DNRR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVSS ( M(0), XMM1 ) /* m0 */ |
MOVSS ( M(5), XMM2 ) /* m5 */ |
UNPCKLPS ( XMM2, XMM1 ) /* m5 | m0 */ |
MOVLPS ( M(12), XMM2 ) /* m13 | m12 */ |
ALIGNTEXT32 |
LLBL(K_GTP23P2DNRR_top): |
MOVLPS( S(0), XMM0 ) /* oy | ox */ |
MULPS( XMM1, XMM0 ) /* oy*m5 | ox*m0 */ |
ADDPS( XMM2, XMM0 ) /* +m13 | +m12 */ |
MOVLPS( XMM0, D(0) ) /* ->D(1) | ->D(0) */ |
LLBL(K_GTP23P2DNRR_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(K_GTP23P2DNRR_top) ) |
LLBL(K_GTP23P2DNRR_finish): |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points2_3d) |
GLNAME(_mesa_sse_transform_points2_3d): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP23P3DR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVAPS( M(0), XMM0 ) /* m2 | m1 | m0 */ |
MOVAPS( M(4), XMM1 ) /* m6 | m5 | m4 */ |
MOVAPS( M(12), XMM2 ) /* m14 | m13 | m12 */ |
ALIGNTEXT32 |
LLBL(K_GTP23P3DR_top): |
MOVSS( S(0), XMM3 ) /* ox */ |
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox | ox */ |
MULPS( XMM0, XMM3 ) /* ox*m2 | ox*m1 | ox*m0 */ |
MOVSS( S(1), XMM4 ) /* oy */ |
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* oy | oy | oy */ |
MULPS( XMM1, XMM4 ) /* oy*m6 | oy*m5 | oy*m4 */ |
ADDPS( XMM4, XMM3 ) |
ADDPS( XMM2, XMM3 ) |
MOVLPS( XMM3, D(0) ) /* ->D(1) | ->D(0) */ |
UNPCKHPS( XMM3, XMM3 ) |
MOVSS( XMM3, D(2) ) /* ->D(2) */ |
LLBL(K_GTP23P3DR_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(K_GTP23P3DR_top) ) |
LLBL(K_GTP23P3DR_finish): |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
/shark/trunk/ports/mesa/src/x86/sse_xform3.s |
---|
0,0 → 1,498 |
/* $Id: sse_xform3.s,v 1.1 2003-03-13 12:11:49 giacomo Exp $ */ |
/* |
* Mesa 3-D graphics library |
* Version: 3.5 |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/** TODO: |
* - insert PREFETCH instructions to avoid cache-misses ! |
* - some more optimizations are possible... |
* - for 40-50% more performance in the SSE-functions, the |
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! |
*/ |
#include "matypes.h" |
#include "xform_args.h" |
SEG_TEXT |
#define S(i) REGOFF(i * 4, ESI) |
#define D(i) REGOFF(i * 4, EDI) |
#define M(i) REGOFF(i * 4, EDX) |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points3_general) |
GLNAME( _mesa_sse_transform_points3_general ): |
#define FRAME_OFFSET 8 |
PUSH_L ( ESI ) |
PUSH_L ( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
CMP_L ( CONST(0), ECX ) /* count == 0 ? */ |
JE ( LLBL(K_GTPGR_finish) ) /* yes -> nothing to do. */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVAPS ( REGOFF(0, EDX), XMM0 ) /* m0 | m1 | m2 | m3 */ |
MOVAPS ( REGOFF(16, EDX), XMM1 ) /* m4 | m5 | m6 | m7 */ |
MOVAPS ( REGOFF(32, EDX), XMM2 ) /* m8 | m9 | m10 | m11 */ |
MOVAPS ( REGOFF(48, EDX), XMM3 ) /* m12 | m13 | m14 | m15 */ |
ALIGNTEXT32 |
LLBL(K_GTPGR_top): |
MOVSS ( REGOFF(0, ESI), XMM4 ) /* | | | ox */ |
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox | ox */ |
MOVSS ( REGOFF(4, ESI), XMM5 ) /* | | | oy */ |
SHUFPS ( CONST(0x0), XMM5, XMM5 ) /* oy | oy | oy | oy */ |
MOVSS ( REGOFF(8, ESI), XMM6 ) /* | | | oz */ |
SHUFPS ( CONST(0x0), XMM6, XMM6 ) /* oz | oz | oz | oz */ |
MULPS ( XMM0, XMM4 ) /* m3*ox | m2*ox | m1*ox | m0*ox */ |
MULPS ( XMM1, XMM5 ) /* m7*oy | m6*oy | m5*oy | m4*oy */ |
MULPS ( XMM2, XMM6 ) /* m11*oz | m10*oz | m9*oz | m8*oz */ |
ADDPS ( XMM5, XMM4 ) |
ADDPS ( XMM6, XMM4 ) |
ADDPS ( XMM3, XMM4 ) |
MOVAPS ( XMM4, REGOFF(0, EDI) ) |
LLBL(K_GTPGR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_GTPGR_top) ) |
LLBL(K_GTPGR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points3_identity) |
GLNAME( _mesa_sse_transform_points3_identity ): |
#define FRAME_OFFSET 8 |
PUSH_L ( ESI ) |
PUSH_L ( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTPIR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
CMP_L( ESI, EDI ) |
JE( LLBL(K_GTPIR_finish) ) |
ALIGNTEXT32 |
LLBL(K_GTPIR_top): |
MOVLPS ( S(0), XMM0 ) |
MOVLPS ( XMM0, D(0) ) |
MOVSS ( S(2), XMM0 ) |
MOVSS ( XMM0, D(2) ) |
LLBL(K_GTPIR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_GTPIR_top) ) |
LLBL(K_GTPIR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points3_3d_no_rot) |
GLNAME(_mesa_sse_transform_points3_3d_no_rot): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP3DNRR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */ |
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */ |
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */ |
MOVLPS ( M(12), XMM2 ) /* - | - | m13 | m12 */ |
MOVSS ( M(10), XMM3 ) /* - | - | - | m10 */ |
MOVSS ( M(14), XMM4 ) /* - | - | - | m14 */ |
ALIGNTEXT32 |
LLBL(K_GTP3DNRR_top): |
MOVLPS ( S(0), XMM0 ) /* - | - | s1 | s0 */ |
MULPS ( XMM1, XMM0 ) /* - | - | s1*m5 | s0*m0 */ |
ADDPS ( XMM2, XMM0 ) /* - | - | +m13 | +m12 */ |
MOVLPS ( XMM0, D(0) ) /* -> D(1) | -> D(0) */ |
MOVSS ( S(2), XMM0 ) /* sz */ |
MULSS ( XMM3, XMM0 ) /* sz*m10 */ |
ADDSS ( XMM4, XMM0 ) /* +m14 */ |
MOVSS ( XMM0, D(2) ) /* -> D(2) */ |
LLBL(K_GTP3DNRR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_GTP3DNRR_top) ) |
LLBL(K_GTP3DNRR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points3_perspective) |
GLNAME(_mesa_sse_transform_points3_perspective): |
#define FRAME_OFFSET 8 |
PUSH_L ( ESI ) |
PUSH_L ( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP3PR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */ |
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */ |
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */ |
MOVLPS ( M(8), XMM2 ) /* - | - | m9 | m8 */ |
MOVSS ( M(10), XMM3 ) /* m10 */ |
MOVSS ( M(14), XMM4 ) /* m14 */ |
XORPS ( XMM6, XMM6 ) /* 0 */ |
ALIGNTEXT32 |
LLBL(K_GTP3PR_top): |
MOVLPS ( S(0), XMM0 ) /* oy | ox */ |
MULPS ( XMM1, XMM0 ) /* oy*m5 | ox*m0 */ |
MOVSS ( S(2), XMM5 ) /* oz */ |
SHUFPS ( CONST(0x0), XMM5, XMM5 ) /* oz | oz */ |
MULPS ( XMM2, XMM5 ) /* oz*m9 | oz*m8 */ |
ADDPS ( XMM5, XMM0 ) /* +oy*m5 | +ox*m0 */ |
MOVLPS ( XMM0, D(0) ) /* ->D(1) | ->D(0) */ |
MOVSS ( S(2), XMM0 ) /* oz */ |
MULSS ( XMM3, XMM0 ) /* oz*m10 */ |
ADDSS ( XMM4, XMM0 ) /* +m14 */ |
MOVSS ( XMM0, D(2) ) /* ->D(2) */ |
MOVSS ( S(2), XMM0 ) /* oz */ |
MOVSS ( XMM6, XMM5 ) /* 0 */ |
SUBPS ( XMM0, XMM5 ) /* -oz */ |
MOVSS ( XMM5, D(3) ) /* ->D(3) */ |
LLBL(K_GTP3PR_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(K_GTP3PR_top) ) |
LLBL(K_GTP3PR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points3_2d) |
GLNAME(_mesa_sse_transform_points3_2d): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP3P2DR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVLPS( M(0), XMM0 ) /* m1 | m0 */ |
MOVLPS( M(4), XMM1 ) /* m5 | m4 */ |
MOVLPS( M(12), XMM2 ) /* m13 | m12 */ |
ALIGNTEXT32 |
LLBL(K_GTP3P2DR_top): |
MOVSS ( S(0), XMM3 ) /* ox */ |
SHUFPS ( CONST(0x0), XMM3, XMM3 ) /* ox | ox */ |
MULPS ( XMM0, XMM3 ) /* ox*m1 | ox*m0 */ |
MOVSS ( S(1), XMM4 ) /* oy */ |
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* oy | oy */ |
MULPS ( XMM1, XMM4 ) /* oy*m5 | oy*m4 */ |
ADDPS ( XMM4, XMM3 ) |
ADDPS ( XMM2, XMM3 ) |
MOVLPS ( XMM3, D(0) ) |
MOVSS ( S(2), XMM3 ) |
MOVSS ( XMM3, D(2) ) |
LLBL(K_GTP3P2DR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_GTP3P2DR_top) ) |
LLBL(K_GTP3P2DR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points3_2d_no_rot) |
GLNAME(_mesa_sse_transform_points3_2d_no_rot): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP3P2DNRR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVSS ( M(0), XMM1 ) /* m0 */ |
MOVSS ( M(5), XMM2 ) /* m5 */ |
UNPCKLPS ( XMM2, XMM1 ) /* m5 | m0 */ |
MOVLPS ( M(12), XMM2 ) /* m13 | m12 */ |
ALIGNTEXT32 |
LLBL(K_GTP3P2DNRR_top): |
MOVLPS( S(0), XMM0 ) /* oy | ox */ |
MULPS( XMM1, XMM0 ) /* oy*m5 | ox*m0 */ |
ADDPS( XMM2, XMM0 ) /* +m13 | +m12 */ |
MOVLPS( XMM0, D(0) ) /* ->D(1) | ->D(0) */ |
MOVSS( S(2), XMM0 ) |
MOVSS( XMM0, D(2) ) |
LLBL(K_GTP3P2DNRR_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(K_GTP3P2DNRR_top) ) |
LLBL(K_GTP3P2DNRR_finish): |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points3_3d) |
GLNAME(_mesa_sse_transform_points3_3d): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP3P3DR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVAPS( M(0), XMM0 ) /* m2 | m1 | m0 */ |
MOVAPS( M(4), XMM1 ) /* m6 | m5 | m4 */ |
MOVAPS( M(8), XMM2 ) /* m10 | m9 | m8 */ |
MOVAPS( M(12), XMM3 ) /* m14 | m13 | m12 */ |
ALIGNTEXT32 |
LLBL(K_GTP3P3DR_top): |
MOVSS( S(0), XMM4 ) |
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox */ |
MULPS( XMM0, XMM4 ) /* ox*m2 | ox*m1 | ox*m0 */ |
MOVSS( S(1), XMM5 ) |
SHUFPS( CONST(0x0), XMM5, XMM5 ) /* oy | oy | oy */ |
MULPS( XMM1, XMM5 ) /* oy*m6 | oy*m5 | oy*m4 */ |
MOVSS( S(2), XMM6 ) |
SHUFPS( CONST(0x0), XMM6, XMM6 ) /* oz | oz | oz */ |
MULPS( XMM2, XMM6 ) /* oz*m10 | oz*m9 | oz*m8 */ |
ADDPS( XMM5, XMM4 ) /* + | + | + */ |
ADDPS( XMM6, XMM4 ) /* + | + | + */ |
ADDPS( XMM3, XMM4 ) /* + | + | + */ |
MOVLPS( XMM4, D(0) ) /* => D(1) | => D(0) */ |
UNPCKHPS( XMM4, XMM4 ) |
MOVSS( XMM4, D(2) ) |
LLBL(K_GTP3P3DR_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(K_GTP3P3DR_top) ) |
LLBL(K_GTP3P3DR_finish): |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
/shark/trunk/ports/mesa/src/x86/sse_xform4.s |
---|
0,0 → 1,226 |
/* $Id: sse_xform4.s,v 1.1 2003-03-13 12:11:49 giacomo Exp $ */ |
/* |
* Mesa 3-D graphics library |
* Version: 3.5 |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "matypes.h" |
#include "xform_args.h" |
SEG_TEXT |
#define FRAME_OFFSET 8 |
#define SRC(i) REGOFF(i * 4, ESI) |
#define DST(i) REGOFF(i * 4, EDI) |
#define MAT(i) REGOFF(i * 4, EDX) |
#define SELECT(r0, r1, r2, r3) CONST( r0 * 64 + r1 * 16 + r2 * 4 + r3 ) |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_sse_transform_points4_general ) |
GLNAME( _mesa_sse_transform_points4_general ): |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( ARG_SOURCE, ESI ) |
MOV_L( ARG_DEST, EDI ) |
MOV_L( ARG_MATRIX, EDX ) |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
TEST_L( ECX, ECX ) /* verify non-zero count */ |
JE( LLBL( sse_general_done ) ) |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )/* set dest size */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
PREFETCHT0( REGIND(ESI) ) |
MOVAPS( MAT(0), XMM4 ) /* m3 | m2 | m1 | m0 */ |
MOVAPS( MAT(4), XMM5 ) /* m7 | m6 | m5 | m4 */ |
MOVAPS( MAT(8), XMM6 ) /* m11 | m10 | m9 | m8 */ |
MOVAPS( MAT(12), XMM7 ) /* m15 | m14 | m13 | m12 */ |
ALIGNTEXT16 |
LLBL( sse_general_loop ): |
MOVSS( SRC(0), XMM0 ) /* ox */ |
SHUFPS( CONST(0x0), XMM0, XMM0 ) /* ox | ox | ox | ox */ |
MULPS( XMM4, XMM0 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ |
MOVSS( SRC(1), XMM1 ) /* oy */ |
SHUFPS( CONST(0x0), XMM1, XMM1 ) /* oy | oy | oy | oy */ |
MULPS( XMM5, XMM1 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */ |
MOVSS( SRC(2), XMM2 ) /* oz */ |
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* oz | oz | oz | oz */ |
MULPS( XMM6, XMM2 ) /* oz*m11 | oz*m10 | oz*m9 | oz*m8 */ |
MOVSS( SRC(3), XMM3 ) /* ow */ |
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ow | ow | ow | ow */ |
MULPS( XMM7, XMM3 ) /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */ |
ADDPS( XMM1, XMM0 ) /* ox*m3+oy*m7 | ... */ |
ADDPS( XMM2, XMM0 ) /* ox*m3+oy*m7+oz*m11 | ... */ |
ADDPS( XMM3, XMM0 ) /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */ |
MOVAPS( XMM0, DST(0) ) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */ |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
DEC_L( ECX ) |
JNZ( LLBL( sse_general_loop ) ) |
LLBL( sse_general_done ): |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
ALIGNTEXT4 |
GLOBL GLNAME( _mesa_sse_transform_points4_3d ) |
GLNAME( _mesa_sse_transform_points4_3d ): |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ |
MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP43P3DR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )/* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
MOVAPS( MAT(0), XMM0 ) /* m3 | m2 | m1 | m0 */ |
MOVAPS( MAT(4), XMM1 ) /* m7 | m6 | m5 | m4 */ |
MOVAPS( MAT(8), XMM2 ) /* m11 | m10 | m9 | m8 */ |
MOVAPS( MAT(12), XMM3 ) /* m15 | m14 | m13 | m12 */ |
ALIGNTEXT32 |
LLBL( K_GTP43P3DR_top ): |
MOVSS( SRC(0), XMM4 ) /* ox */ |
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox | ox */ |
MULPS( XMM0, XMM4 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ |
MOVSS( SRC(1), XMM5 ) /* oy */ |
SHUFPS( CONST(0x0), XMM5, XMM5 ) /* oy | oy | oy | oy */ |
MULPS( XMM1, XMM5 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */ |
MOVSS( SRC(2), XMM6 ) /* oz */ |
SHUFPS( CONST(0x0), XMM6, XMM6 ) /* oz | oz | oz | oz */ |
MULPS( XMM2, XMM6 ) /* oz*m11 | oz*m10 | oz*m9 | oz*m8 */ |
MOVSS( SRC(3), XMM7 ) /* ow */ |
SHUFPS( CONST(0x0), XMM7, XMM7 ) /* ow | ow | ow | ow */ |
MULPS( XMM3, XMM7 ) /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */ |
ADDPS( XMM5, XMM4 ) /* ox*m3+oy*m7 | ... */ |
ADDPS( XMM6, XMM4 ) /* ox*m3+oy*m7+oz*m11 | ... */ |
ADDPS( XMM7, XMM4 ) /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */ |
MOVAPS( XMM4, DST(0) ) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */ |
MOVSS( SRC(3), XMM4 ) /* ow */ |
MOVSS( XMM4, DST(3) ) /* ->D(3) */ |
LLBL( K_GTP43P3DR_skip ): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(K_GTP43P3DR_top) ) |
LLBL( K_GTP43P3DR_finish ): |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_sse_transform_points4_identity ) |
GLNAME( _mesa_sse_transform_points4_identity ): |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( ARG_SOURCE, ESI ) |
MOV_L( ARG_DEST, EDI ) |
MOV_L( ARG_MATRIX, EDX ) |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
TEST_L( ECX, ECX ) /* verify non-zero count */ |
JE( LLBL( sse_identity_done ) ) |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )/* set dest size */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ALIGNTEXT16 |
LLBL( sse_identity_loop ): |
PREFETCHNTA( REGOFF(32, ESI) ) |
MOVAPS( REGIND(ESI), XMM0 ) |
ADD_L( EAX, ESI ) |
MOVAPS( XMM0, REGIND(EDI) ) |
ADD_L( CONST(16), EDI ) |
DEC_L( ECX ) |
JNZ( LLBL( sse_identity_loop ) ) |
LLBL( sse_identity_done ): |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
/shark/trunk/ports/mesa/src/x86/sse_normal.s |
---|
0,0 → 1,252 |
/* $Id: sse_normal.s,v 1.1 2003-03-13 12:11:49 giacomo Exp $ */ |
/* |
* Mesa 3-D graphics library |
* Version: 3.5 |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/** TODO: |
* - insert PREFETCH instructions to avoid cache-misses ! |
* - some more optimizations are possible... |
* - for 40-50% more performance in the SSE-functions, the |
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! |
*/ |
#include "matypes.h" |
#include "norm_args.h" |
SEG_TEXT |
#define M(i) REGOFF(i * 4, EDX) |
#define S(i) REGOFF(i * 4, ESI) |
#define D(i) REGOFF(i * 4, EDI) |
#define STRIDE REGOFF(12, ESI) |
ALIGNTEXT16 |
GLOBL GLNAME(_mesa_sse_transform_rescale_normals_no_rot) |
GLNAME(_mesa_sse_transform_rescale_normals_no_rot): |
#define FRAME_OFFSET 8 |
PUSH_L ( ESI ) |
PUSH_L ( EDI ) |
MOV_L ( ARG_IN, ESI ) /* ptr to source GLvector3f */ |
MOV_L ( ARG_DEST, EDI ) /* ptr to dest GLvector3f */ |
MOV_L ( ARG_MAT, EDX ) /* ptr to matrix */ |
MOV_L ( REGOFF(MATRIX_INV, EDX), EDX) /* matrix->inv */ |
MOV_L ( REGOFF(V3F_COUNT, ESI), ECX ) /* source count */ |
TEST_L ( ECX, ECX ) |
JZ( LLBL(K_G3TRNNRR_finish) ) /* count was zero; go to finish */ |
MOV_L ( STRIDE, EAX ) /* stride */ |
MOV_L ( ECX, REGOFF(V3F_COUNT, EDI) ) /* set dest-count */ |
IMUL_L( CONST(16), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V3F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V3F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVSS ( M(0), XMM1 ) /* m0 */ |
MOVSS ( M(5), XMM2 ) /* m5 */ |
UNPCKLPS( XMM2, XMM1 ) /* m5 | m0 */ |
MOVSS ( ARG_SCALE, XMM0 ) /* scale */ |
SHUFPS ( CONST(0x0), XMM0, XMM0 ) /* scale | scale */ |
MULPS ( XMM0, XMM1 ) /* m5*scale | m0*scale */ |
MULSS ( M(10), XMM0 ) /* m10*scale */ |
ALIGNTEXT32 |
LLBL(K_G3TRNNRR_top): |
MOVLPS ( S(0), XMM2 ) /* uy | ux */ |
MULPS ( XMM1, XMM2 ) /* uy*m5*scale | ux*m0*scale */ |
MOVLPS ( XMM2, D(0) ) /* ->D(1) | D(0) */ |
MOVSS ( S(2), XMM2 ) /* uz */ |
MULSS ( XMM0, XMM2 ) /* uz*m10*scale */ |
MOVSS ( XMM2, D(2) ) /* ->D(2) */ |
LLBL(K_G3TRNNRR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_G3TRNNRR_top) ) |
LLBL(K_G3TRNNRR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT16 |
GLOBL GLNAME(_mesa_sse_transform_rescale_normals) |
GLNAME(_mesa_sse_transform_rescale_normals): |
#define FRAME_OFFSET 8 |
PUSH_L ( ESI ) |
PUSH_L ( EDI ) |
MOV_L ( ARG_IN, ESI ) /* ptr to source GLvector3f */ |
MOV_L ( ARG_DEST, EDI ) /* ptr to dest GLvector3f */ |
MOV_L ( ARG_MAT, EDX ) /* ptr to matrix */ |
MOV_L ( REGOFF(MATRIX_INV, EDX), EDX) /* matrix->inv */ |
MOV_L ( REGOFF(V3F_COUNT, ESI), ECX ) /* source count */ |
TEST_L ( ECX, ECX ) |
JZ( LLBL(K_G3TRNR_finish) ) /* count was zero; go to finish */ |
MOV_L ( STRIDE, EAX ) /* stride */ |
MOV_L ( ECX, REGOFF(V3F_COUNT, EDI) ) /* set dest-count */ |
IMUL_L( CONST(16), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V3F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V3F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVSS ( M(0), XMM0 ) /* m0 */ |
MOVSS ( M(4), XMM1 ) /* m4 */ |
UNPCKLPS( XMM1, XMM0 ) /* m4 | m0 */ |
MOVSS ( ARG_SCALE, XMM4 ) /* scale */ |
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* scale | scale */ |
MULPS ( XMM4, XMM0 ) /* m4*scale | m0*scale */ |
MOVSS ( M(1), XMM1 ) /* m1 */ |
MOVSS ( M(5), XMM2 ) /* m5 */ |
UNPCKLPS( XMM2, XMM1 ) /* m5 | m1 */ |
MULPS ( XMM4, XMM1 ) /* m5*scale | m1*scale */ |
MOVSS ( M(2), XMM2 ) /* m2 */ |
MOVSS ( M(6), XMM3 ) /* m6 */ |
UNPCKLPS( XMM3, XMM2 ) /* m6 | m2 */ |
MULPS ( XMM4, XMM2 ) /* m6*scale | m2*scale */ |
MOVSS ( M(8), XMM6 ) /* m8 */ |
MULSS ( ARG_SCALE, XMM6 ) /* m8*scale */ |
MOVSS ( M(9), XMM7 ) /* m9 */ |
MULSS ( ARG_SCALE, XMM7 ) /* m9*scale */ |
ALIGNTEXT32 |
LLBL(K_G3TRNR_top): |
MOVSS ( S(0), XMM3 ) /* ux */ |
SHUFPS ( CONST(0x0), XMM3, XMM3 ) /* ux | ux */ |
MULPS ( XMM0, XMM3 ) /* ux*m4 | ux*m0 */ |
MOVSS ( S(1), XMM4 ) /* uy */ |
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* uy | uy */ |
MULPS ( XMM1, XMM4 ) /* uy*m5 | uy*m1 */ |
MOVSS ( S(2), XMM5 ) /* uz */ |
SHUFPS ( CONST(0x0), XMM5, XMM5 ) /* uz | uz */ |
MULPS ( XMM2, XMM5 ) /* uz*m6 | uz*m2 */ |
ADDPS ( XMM4, XMM3 ) |
ADDPS ( XMM5, XMM3 ) |
MOVLPS ( XMM3, D(0) ) |
MOVSS ( M(10), XMM3 ) /* m10 */ |
MULSS ( ARG_SCALE, XMM3 ) /* m10*scale */ |
MULSS ( S(2), XMM3 ) /* m10*scale*uz */ |
MOVSS ( S(1), XMM4 ) /* uy */ |
MULSS ( XMM7, XMM4 ) /* uy*m9*scale */ |
MOVSS ( S(0), XMM5 ) /* ux */ |
MULSS ( XMM6, XMM5 ) /* ux*m8*scale */ |
ADDSS ( XMM4, XMM3 ) |
ADDSS ( XMM5, XMM3 ) |
MOVSS ( XMM3, D(2) ) |
LLBL(K_G3TRNR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_G3TRNR_top) ) |
LLBL(K_G3TRNR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT16 |
GLOBL GLNAME(_mesa_sse_transform_normals_no_rot) |
GLNAME(_mesa_sse_transform_normals_no_rot): |
#define FRAME_OFFSET 8 |
PUSH_L ( ESI ) |
PUSH_L ( EDI ) |
MOV_L ( ARG_IN, ESI ) /* ptr to source GLvector3f */ |
MOV_L ( ARG_DEST, EDI ) /* ptr to dest GLvector3f */ |
MOV_L ( ARG_MAT, EDX ) /* ptr to matrix */ |
MOV_L ( REGOFF(MATRIX_INV, EDX), EDX) /* matrix->inv */ |
MOV_L ( REGOFF(V3F_COUNT, ESI), ECX ) /* source count */ |
TEST_L ( ECX, ECX ) |
JZ( LLBL(K_G3TNNRR_finish) ) /* count was zero; go to finish */ |
MOV_L ( STRIDE, EAX ) /* stride */ |
MOV_L ( ECX, REGOFF(V3F_COUNT, EDI) ) /* set dest-count */ |
IMUL_L( CONST(16), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V3F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V3F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVSS( M(0), XMM0 ) /* m0 */ |
MOVSS( M(5), XMM1 ) /* m5 */ |
UNPCKLPS( XMM1, XMM0 ) /* m5 | m0 */ |
MOVSS( M(10), XMM1 ) /* m10 */ |
ALIGNTEXT32 |
LLBL(K_G3TNNRR_top): |
MOVLPS( S(0), XMM2 ) /* uy | ux */ |
MULPS( XMM0, XMM2 ) /* uy*m5 | ux*m0 */ |
MOVLPS( XMM2, D(0) ) |
MOVSS( S(2), XMM2 ) /* uz */ |
MULSS( XMM1, XMM2 ) /* uz*m10 */ |
MOVSS( XMM2, D(2) ) |
LLBL(K_G3TNNRR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_G3TNNRR_top) ) |
LLBL(K_G3TNNRR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
/shark/trunk/ports/mesa/src/x86/sse.c |
---|
0,0 → 1,119 |
/* $Id: sse.c,v 1.1 2003-03-13 12:11:49 giacomo Exp $ */ |
/* |
* Mesa 3-D graphics library |
* Version: 3.5 |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/* |
* PentiumIII-SIMD (SSE) optimizations contributed by |
* Andre Werthmann <wertmann@cs.uni-potsdam.de> |
*/ |
#include "glheader.h" |
#include "context.h" |
#include "math/m_xform.h" |
#include "tnl/t_context.h" |
#include "sse.h" |
#include "common_x86_macros.h" |
#ifdef DEBUG |
#include "math/m_debug.h" |
#endif |
#ifdef USE_SSE_ASM |
DECLARE_XFORM_GROUP( sse, 2 ) |
DECLARE_XFORM_GROUP( sse, 3 ) |
#if 1 |
/* Some functions are not written in SSE-assembly, because the fpu ones are faster */ |
extern void _mesa_sse_transform_normals_no_rot( NORM_ARGS ); |
extern void _mesa_sse_transform_rescale_normals( NORM_ARGS ); |
extern void _mesa_sse_transform_rescale_normals_no_rot( NORM_ARGS ); |
extern void _mesa_sse_transform_points4_general( XFORM_ARGS ); |
extern void _mesa_sse_transform_points4_3d( XFORM_ARGS ); |
extern void _mesa_sse_transform_points4_identity( XFORM_ARGS ); |
#else |
DECLARE_NORM_GROUP( sse ) |
#endif |
extern void _ASMAPI |
_mesa_v16_sse_general_xform( GLfloat *first_vert, |
const GLfloat *m, |
const GLfloat *src, |
GLuint src_stride, |
GLuint count ); |
extern void _ASMAPI |
_mesa_sse_project_vertices( GLfloat *first, |
GLfloat *last, |
const GLfloat *m, |
GLuint stride ); |
extern void _ASMAPI |
_mesa_sse_project_clipped_vertices( GLfloat *first, |
GLfloat *last, |
const GLfloat *m, |
GLuint stride, |
const GLubyte *clipmask ); |
#endif |
void _mesa_init_sse_transform_asm( void ) |
{ |
#ifdef USE_SSE_ASM |
ASSIGN_XFORM_GROUP( sse, 2 ); |
ASSIGN_XFORM_GROUP( sse, 3 ); |
#if 1 |
/* TODO: Finish these off. |
*/ |
_mesa_transform_tab[4][MATRIX_GENERAL] = |
_mesa_sse_transform_points4_general; |
_mesa_transform_tab[4][MATRIX_3D] = |
_mesa_sse_transform_points4_3d; |
_mesa_transform_tab[4][MATRIX_IDENTITY] = |
_mesa_sse_transform_points4_identity; |
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT] = |
_mesa_sse_transform_normals_no_rot; |
_mesa_normal_tab[NORM_TRANSFORM | NORM_RESCALE] = |
_mesa_sse_transform_rescale_normals; |
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_RESCALE] = |
_mesa_sse_transform_rescale_normals_no_rot; |
#else |
ASSIGN_XFORM_GROUP( sse, 4 ); |
ASSIGN_NORM_GROUP( sse ); |
#endif |
#ifdef DEBUG |
_math_test_all_transform_functions( "SSE" ); |
_math_test_all_normal_transform_functions( "SSE" ); |
#endif |
#endif |
} |
/shark/trunk/ports/mesa/src/tnl/t_vb_gentex.c |
---|
File deleted |
/shark/trunk/ports/mesa/src/tnl/t_import_array.h |
---|
File deleted |
/shark/trunk/ports/mesa/src/tnl/t_import_array.c |
---|
File deleted |
/shark/trunk/ports/mesa/src/tnl/t_array_import.c |
---|
0,0 → 1,432 |
/* $Id: t_array_import.c,v 1.1 2003-02-28 11:48:06 pj Exp $ */ |
/* |
* Mesa 3-D graphics library |
* Version: 4.1 |
* |
* Copyright (C) 1999-2002 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Keith Whitwell <keith@tungstengraphics.com> |
*/ |
#include "glheader.h" |
#include "context.h" |
#include "macros.h" |
#include "imports.h" |
#include "mmath.h" |
#include "state.h" |
#include "mtypes.h" |
#include "array_cache/acache.h" |
#include "math/m_translate.h" |
#include "t_array_import.h" |
#include "t_context.h" |
#include "t_imm_debug.h" |
static void _tnl_import_vertex( GLcontext *ctx, |
GLboolean writeable, |
GLboolean stride ) |
{ |
struct gl_client_array *tmp; |
GLboolean is_writeable = 0; |
struct vertex_arrays *inputs = &TNL_CONTEXT(ctx)->array_inputs; |
tmp = _ac_import_vertex(ctx, |
GL_FLOAT, |
stride ? 4*sizeof(GLfloat) : 0, |
0, |
writeable, |
&is_writeable); |
inputs->Obj.data = (GLfloat (*)[4]) tmp->Ptr; |
inputs->Obj.start = (GLfloat *) tmp->Ptr; |
inputs->Obj.stride = tmp->StrideB; |
inputs->Obj.size = tmp->Size; |
inputs->Obj.flags &= ~(VEC_BAD_STRIDE|VEC_NOT_WRITEABLE); |
if (inputs->Obj.stride != 4*sizeof(GLfloat)) |
inputs->Obj.flags |= VEC_BAD_STRIDE; |
if (!is_writeable) |
inputs->Obj.flags |= VEC_NOT_WRITEABLE; |
} |
static void _tnl_import_normal( GLcontext *ctx, |
GLboolean writeable, |
GLboolean stride ) |
{ |
struct gl_client_array *tmp; |
GLboolean is_writeable = 0; |
struct vertex_arrays *inputs = &TNL_CONTEXT(ctx)->array_inputs; |
tmp = _ac_import_normal(ctx, GL_FLOAT, |
stride ? 3*sizeof(GLfloat) : 0, writeable, |
&is_writeable); |
inputs->Normal.data = (GLfloat (*)[4]) tmp->Ptr; |
inputs->Normal.start = (GLfloat *) tmp->Ptr; |
inputs->Normal.stride = tmp->StrideB; |
inputs->Normal.flags &= ~(VEC_BAD_STRIDE|VEC_NOT_WRITEABLE); |
if (inputs->Normal.stride != 3*sizeof(GLfloat)) |
inputs->Normal.flags |= VEC_BAD_STRIDE; |
if (!is_writeable) |
inputs->Normal.flags |= VEC_NOT_WRITEABLE; |
} |
static void _tnl_import_color( GLcontext *ctx, |
GLenum type, |
GLboolean writeable, |
GLboolean stride ) |
{ |
struct gl_client_array *tmp; |
GLboolean is_writeable = 0; |
struct vertex_arrays *inputs = &TNL_CONTEXT(ctx)->array_inputs; |
tmp = _ac_import_color(ctx, |
type, |
stride ? 4*sizeof(GLfloat) : 0, |
4, |
writeable, |
&is_writeable); |
inputs->Color = *tmp; |
} |
static void _tnl_import_secondarycolor( GLcontext *ctx, |
GLenum type, |
GLboolean writeable, |
GLboolean stride ) |
{ |
struct gl_client_array *tmp; |
GLboolean is_writeable = 0; |
struct vertex_arrays *inputs = &TNL_CONTEXT(ctx)->array_inputs; |
tmp = _ac_import_secondarycolor(ctx, |
type, |
stride ? 4*sizeof(GLfloat) : 0, |
4, |
writeable, |
&is_writeable); |
inputs->SecondaryColor = *tmp; |
} |
static void _tnl_import_fogcoord( GLcontext *ctx, |
GLboolean writeable, |
GLboolean stride ) |
{ |
struct vertex_arrays *inputs = &TNL_CONTEXT(ctx)->array_inputs; |
struct gl_client_array *tmp; |
GLboolean is_writeable = 0; |
tmp = _ac_import_fogcoord(ctx, GL_FLOAT, |
stride ? sizeof(GLfloat) : 0, writeable, |
&is_writeable); |
inputs->FogCoord.data = (GLfloat (*)[4]) tmp->Ptr; |
inputs->FogCoord.start = (GLfloat *) tmp->Ptr; |
inputs->FogCoord.stride = tmp->StrideB; |
inputs->FogCoord.flags &= ~(VEC_BAD_STRIDE|VEC_NOT_WRITEABLE); |
if (inputs->FogCoord.stride != sizeof(GLfloat)) |
inputs->FogCoord.flags |= VEC_BAD_STRIDE; |
if (!is_writeable) |
inputs->FogCoord.flags |= VEC_NOT_WRITEABLE; |
} |
static void _tnl_import_index( GLcontext *ctx, |
GLboolean writeable, |
GLboolean stride ) |
{ |
struct vertex_arrays *inputs = &TNL_CONTEXT(ctx)->array_inputs; |
struct gl_client_array *tmp; |
GLboolean is_writeable = 0; |
tmp = _ac_import_index(ctx, GL_UNSIGNED_INT, |
stride ? sizeof(GLuint) : 0, writeable, |
&is_writeable); |
inputs->Index.data = (GLuint *) tmp->Ptr; |
inputs->Index.start = (GLuint *) tmp->Ptr; |
inputs->Index.stride = tmp->StrideB; |
inputs->Index.flags &= ~(VEC_BAD_STRIDE|VEC_NOT_WRITEABLE); |
if (inputs->Index.stride != sizeof(GLuint)) |
inputs->Index.flags |= VEC_BAD_STRIDE; |
if (!is_writeable) |
inputs->Index.flags |= VEC_NOT_WRITEABLE; |
} |
static void _tnl_import_texcoord( GLcontext *ctx, |
GLuint unit, |
GLboolean writeable, |
GLboolean stride ) |
{ |
struct vertex_arrays *inputs = &TNL_CONTEXT(ctx)->array_inputs; |
struct gl_client_array *tmp; |
GLboolean is_writeable = 0; |
tmp = _ac_import_texcoord(ctx, unit, GL_FLOAT, |
stride ? 4 * sizeof(GLfloat) : 0, |
0, |
writeable, |
&is_writeable); |
inputs->TexCoord[unit].data = (GLfloat (*)[4]) tmp->Ptr; |
inputs->TexCoord[unit].start = (GLfloat *) tmp->Ptr; |
inputs->TexCoord[unit].stride = tmp->StrideB; |
inputs->TexCoord[unit].size = tmp->Size; |
inputs->TexCoord[unit].flags &= ~(VEC_BAD_STRIDE|VEC_NOT_WRITEABLE); |
if (inputs->TexCoord[unit].stride != 4*sizeof(GLfloat)) |
inputs->TexCoord[unit].flags |= VEC_BAD_STRIDE; |
if (!is_writeable) |
inputs->TexCoord[unit].flags |= VEC_NOT_WRITEABLE; |
} |
static void _tnl_import_edgeflag( GLcontext *ctx, |
GLboolean writeable, |
GLboolean stride ) |
{ |
struct vertex_arrays *inputs = &TNL_CONTEXT(ctx)->array_inputs; |
struct gl_client_array *tmp; |
GLboolean is_writeable = 0; |
tmp = _ac_import_edgeflag(ctx, GL_UNSIGNED_BYTE, |
stride ? sizeof(GLubyte) : 0, |
0, |
&is_writeable); |
inputs->EdgeFlag.data = (GLubyte *) tmp->Ptr; |
inputs->EdgeFlag.start = (GLubyte *) tmp->Ptr; |
inputs->EdgeFlag.stride = tmp->StrideB; |
inputs->EdgeFlag.flags &= ~(VEC_BAD_STRIDE|VEC_NOT_WRITEABLE); |
if (inputs->EdgeFlag.stride != sizeof(GLubyte)) |
inputs->EdgeFlag.flags |= VEC_BAD_STRIDE; |
if (!is_writeable) |
inputs->EdgeFlag.flags |= VEC_NOT_WRITEABLE; |
} |
static void _tnl_import_attrib( GLcontext *ctx, |
GLuint index, |
GLboolean writeable, |
GLboolean stride ) |
{ |
struct vertex_arrays *inputs = &TNL_CONTEXT(ctx)->array_inputs; |
struct gl_client_array *tmp; |
GLboolean is_writeable = 0; |
tmp = _ac_import_attrib(ctx, index, GL_FLOAT, |
stride ? 4 * sizeof(GLfloat) : 0, |
4, /* want GLfloat[4] */ |
writeable, |
&is_writeable); |
inputs->Attribs[index].data = (GLfloat (*)[4]) tmp->Ptr; |
inputs->Attribs[index].start = (GLfloat *) tmp->Ptr; |
inputs->Attribs[index].stride = tmp->StrideB; |
inputs->Attribs[index].size = tmp->Size; |
inputs->Attribs[index].flags &= ~(VEC_BAD_STRIDE|VEC_NOT_WRITEABLE); |
if (inputs->Attribs[index].stride != 4 * sizeof(GLfloat)) |
inputs->Attribs[index].flags |= VEC_BAD_STRIDE; |
if (!is_writeable) |
inputs->Attribs[index].flags |= VEC_NOT_WRITEABLE; |
} |
/** |
* Callback for VB stages that need to improve the quality of arrays |
* bound to the VB. This is only necessary for client arrays which |
* have not been transformed at any point in the pipeline. |
* \param required - bitmask of VERT_*_BIT flags |
* \param flags - bitmask of VEC_* flags (ex: VEC_NOT_WRITABLE) |
*/ |
static void _tnl_upgrade_client_data( GLcontext *ctx, |
GLuint required, |
GLuint flags ) |
{ |
GLuint i; |
struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; |
GLboolean writeable = (flags & VEC_NOT_WRITEABLE) != 0; |
GLboolean stride = (flags & VEC_BAD_STRIDE) != 0; |
struct vertex_arrays *inputs = &TNL_CONTEXT(ctx)->array_inputs; |
GLuint ca_flags = 0; |
(void) inputs; |
if (writeable || stride) ca_flags |= CA_CLIENT_DATA; |
if ((required & VERT_BIT_CLIP) && VB->ClipPtr == VB->ObjPtr) |
required |= VERT_BIT_POS; |
/* _tnl_print_vert_flags("_tnl_upgrade_client_data", required); */ |
if ((required & VERT_BIT_POS) && (VB->ObjPtr->flags & flags)) { |
ASSERT(VB->ObjPtr == &inputs->Obj); |
_tnl_import_vertex( ctx, writeable, stride ); |
VB->importable_data &= ~(VERT_BIT_POS|VERT_BIT_CLIP); |
} |
if ((required & VERT_BIT_NORMAL) && (VB->NormalPtr->flags & flags)) { |
ASSERT(VB->NormalPtr == &inputs->Normal); |
_tnl_import_normal( ctx, writeable, stride ); |
VB->importable_data &= ~VERT_BIT_NORMAL; |
} |
if ((required & VERT_BIT_COLOR0) && (VB->ColorPtr[0]->Flags & ca_flags)) { |
ASSERT(VB->ColorPtr[0] == &inputs->Color); |
_tnl_import_color( ctx, GL_FLOAT, writeable, stride ); |
VB->importable_data &= ~VERT_BIT_COLOR0; |
} |
if ((required & VERT_BIT_COLOR1) && |
(VB->SecondaryColorPtr[0]->Flags & ca_flags)) { |
ASSERT(VB->SecondaryColorPtr[0] == &inputs->SecondaryColor); |
_tnl_import_secondarycolor( ctx, GL_FLOAT, writeable, stride ); |
VB->importable_data &= ~VERT_BIT_COLOR1; |
} |
if ((required & VERT_BIT_FOG) |
&& (VB->FogCoordPtr->flags & flags)) { |
ASSERT(VB->FogCoordPtr == &inputs->FogCoord); |
_tnl_import_fogcoord( ctx, writeable, stride ); |
VB->importable_data &= ~VERT_BIT_FOG; |
} |
if ((required & VERT_BIT_INDEX) && (VB->IndexPtr[0]->flags & flags)) { |
ASSERT(VB->IndexPtr[0] == &inputs->Index); |
_tnl_import_index( ctx, writeable, stride ); |
VB->importable_data &= ~VERT_BIT_INDEX; |
} |
if (required & VERT_BITS_TEX_ANY) |
for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++) |
if ((required & VERT_BIT_TEX(i)) && (VB->TexCoordPtr[i]->flags & flags)) { |
ASSERT(VB->TexCoordPtr[i] == &inputs->TexCoord[i]); |
_tnl_import_texcoord( ctx, i, writeable, stride ); |
VB->importable_data &= ~VERT_BIT_TEX(i); |
} |
/* XXX not sure what to do here for vertex program arrays */ |
} |
void _tnl_vb_bind_arrays( GLcontext *ctx, GLint start, GLsizei count ) |
{ |
TNLcontext *tnl = TNL_CONTEXT(ctx); |
struct vertex_buffer *VB = &tnl->vb; |
GLuint inputs = tnl->pipeline.inputs; |
struct vertex_arrays *tmp = &tnl->array_inputs; |
/* _mesa_debug(ctx, "%s %d..%d // %d..%d\n", __FUNCTION__, */ |
/* start, count, ctx->Array.LockFirst, ctx->Array.LockCount); */ |
/* _tnl_print_vert_flags(" inputs", inputs); */ |
/* _tnl_print_vert_flags(" _Enabled", ctx->Array._Enabled); */ |
/* _tnl_print_vert_flags(" importable", inputs & VERT_BITS_FIXUP); */ |
VB->Count = count - start; |
VB->FirstClipped = VB->Count; |
VB->Elts = NULL; |
VB->MaterialMask = NULL; |
VB->Material = NULL; |
VB->Flag = NULL; |
VB->Primitive = tnl->tmp_primitive; |
VB->PrimitiveLength = tnl->tmp_primitive_length; |
VB->import_data = _tnl_upgrade_client_data; |
VB->importable_data = inputs & VERT_BITS_FIXUP; |
if (ctx->Array.LockCount) { |
ASSERT(start == (GLint) ctx->Array.LockFirst); |
ASSERT(count == (GLint) ctx->Array.LockCount); |
} |
_ac_import_range( ctx, start, count ); |
if (inputs & VERT_BIT_POS) { |
_tnl_import_vertex( ctx, 0, 0 ); |
tmp->Obj.count = VB->Count; |
VB->ObjPtr = &tmp->Obj; |
} |
if (inputs & VERT_BIT_NORMAL) { |
_tnl_import_normal( ctx, 0, 0 ); |
tmp->Normal.count = VB->Count; |
VB->NormalPtr = &tmp->Normal; |
} |
if (inputs & VERT_BIT_COLOR0) { |
_tnl_import_color( ctx, 0, 0, 0 ); |
VB->ColorPtr[0] = &tmp->Color; |
VB->ColorPtr[1] = 0; |
} |
if (inputs & VERT_BITS_TEX_ANY) { |
GLuint unit; |
for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) { |
if (inputs & VERT_BIT_TEX(unit)) { |
_tnl_import_texcoord( ctx, unit, GL_FALSE, GL_FALSE ); |
tmp->TexCoord[unit].count = VB->Count; |
VB->TexCoordPtr[unit] = &tmp->TexCoord[unit]; |
} |
} |
} |
if (inputs & (VERT_BIT_INDEX | VERT_BIT_FOG | |
VERT_BIT_EDGEFLAG | VERT_BIT_COLOR1)) { |
if (inputs & VERT_BIT_INDEX) { |
_tnl_import_index( ctx, 0, 0 ); |
tmp->Index.count = VB->Count; |
VB->IndexPtr[0] = &tmp->Index; |
VB->IndexPtr[1] = 0; |
} |
if (inputs & VERT_BIT_FOG) { |
_tnl_import_fogcoord( ctx, 0, 0 ); |
tmp->FogCoord.count = VB->Count; |
VB->FogCoordPtr = &tmp->FogCoord; |
} |
if (inputs & VERT_BIT_EDGEFLAG) { |
_tnl_import_edgeflag( ctx, GL_TRUE, sizeof(GLboolean) ); |
VB->EdgeFlag = (GLboolean *) tmp->EdgeFlag.data; |
} |
if (inputs & VERT_BIT_COLOR1) { |
_tnl_import_secondarycolor( ctx, 0, 0, 0 ); |
VB->SecondaryColorPtr[0] = &tmp->SecondaryColor; |
VB->SecondaryColorPtr[1] = 0; |
} |
} |
/* XXX not 100% sure this is finished. Keith should probably inspect. */ |
if (ctx->VertexProgram.Enabled) { |
GLuint index; |
for (index = 0; index < VERT_ATTRIB_MAX; index++) { |
/* XXX check program->InputsRead to reduce work here */ |
_tnl_import_attrib( ctx, index, GL_FALSE, GL_TRUE ); |
VB->AttribPtr[index] = &tmp->Attribs[index]; |
} |
} |
} |
/shark/trunk/ports/mesa/src/tnl/t_array_import.h |
---|
0,0 → 1,37 |
/* $Id: t_array_import.h,v 1.1 2003-02-28 11:48:06 pj Exp $ */ |
/* |
* Mesa 3-D graphics library |
* Version: 3.5 |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#ifndef _T_ARRAY_IMPORT_H |
#define _T_ARRAY_IMPORT_H |
#include "mtypes.h" |
#include "t_context.h" |
extern void _tnl_vb_bind_arrays( GLcontext *ctx, GLint start, GLsizei count ); |
extern void _tnl_array_import_init( GLcontext *ctx ); |
#endif |
/shark/trunk/ports/mesa/src/tnl/t_vb_texgen.c |
---|
0,0 → 1,692 |
/* $Id: t_vb_texgen.c,v 1.1 2003-02-28 11:48:08 pj Exp $ */ |
/* |
* Mesa 3-D graphics library |
* Version: 3.5 |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Brian Paul |
* Keith Whitwell <keith@tungstengraphics.com> |
*/ |
#include "glheader.h" |
#include "colormac.h" |
#include "context.h" |
#include "macros.h" |
#include "mmath.h" |
#include "imports.h" |
#include "mtypes.h" |
#include "math/m_xform.h" |
#include "t_context.h" |
#include "t_pipeline.h" |
/*********************************************************************** |
* Automatic texture coordinate generation (texgen) code. |
*/ |
struct texgen_stage_data; |
typedef void (*texgen_func)( GLcontext *ctx, |
struct texgen_stage_data *store, |
GLuint unit); |
struct texgen_stage_data { |
/* Per-texunit derived state. |
*/ |
GLuint TexgenSize[MAX_TEXTURE_UNITS]; |
GLuint TexgenHoles[MAX_TEXTURE_UNITS]; |
texgen_func TexgenFunc[MAX_TEXTURE_UNITS]; |
/* Temporary values used in texgen. |
*/ |
GLfloat (*tmp_f)[3]; |
GLfloat *tmp_m; |
/* Buffered outputs of the stage. |
*/ |
GLvector4f texcoord[MAX_TEXTURE_UNITS]; |
}; |
#define TEXGEN_STAGE_DATA(stage) ((struct texgen_stage_data *)stage->privatePtr) |
static GLuint all_bits[5] = { |
0, |
VEC_SIZE_1, |
VEC_SIZE_2, |
VEC_SIZE_3, |
VEC_SIZE_4, |
}; |
#define VEC_SIZE_FLAGS (VEC_SIZE_1|VEC_SIZE_2|VEC_SIZE_3|VEC_SIZE_4) |
#define TEXGEN_NEED_M (TEXGEN_SPHERE_MAP) |
#define TEXGEN_NEED_F (TEXGEN_SPHERE_MAP | \ |
TEXGEN_REFLECTION_MAP_NV) |
static void build_m3( GLfloat f[][3], GLfloat m[], |
const GLvector4f *normal, |
const GLvector4f *eye ) |
{ |
GLuint stride = eye->stride; |
GLfloat *coord = (GLfloat *)eye->start; |
GLuint count = eye->count; |
const GLfloat *norm = normal->start; |
GLuint i; |
for (i=0;i<count;i++,STRIDE_F(coord,stride),STRIDE_F(norm,normal->stride)) { |
GLfloat u[3], two_nu, fx, fy, fz; |
COPY_3V( u, coord ); |
NORMALIZE_3FV( u ); |
two_nu = 2.0F * DOT3(norm,u); |
fx = f[i][0] = u[0] - norm[0] * two_nu; |
fy = f[i][1] = u[1] - norm[1] * two_nu; |
fz = f[i][2] = u[2] - norm[2] * two_nu; |
m[i] = fx * fx + fy * fy + (fz + 1.0F) * (fz + 1.0F); |
if (m[i] != 0.0F) { |
m[i] = 0.5F / (GLfloat) GL_SQRT(m[i]); |
} |
} |
} |
static void build_m2( GLfloat f[][3], GLfloat m[], |
const GLvector4f *normal, |
const GLvector4f *eye ) |
{ |
GLuint stride = eye->stride; |
GLfloat *coord = eye->start; |
GLuint count = eye->count; |
GLfloat *norm = normal->start; |
GLuint i; |
for (i=0;i<count;i++,STRIDE_F(coord,stride),STRIDE_F(norm,normal->stride)) { |
GLfloat u[3], two_nu, fx, fy, fz; |
COPY_2V( u, coord ); |
u[2] = 0; |
NORMALIZE_3FV( u ); |
two_nu = 2.0F * DOT3(norm,u); |
fx = f[i][0] = u[0] - norm[0] * two_nu; |
fy = f[i][1] = u[1] - norm[1] * two_nu; |
fz = f[i][2] = u[2] - norm[2] * two_nu; |
m[i] = fx * fx + fy * fy + (fz + 1.0F) * (fz + 1.0F); |
if (m[i] != 0.0F) { |
m[i] = 0.5F / (GLfloat) GL_SQRT(m[i]); |
} |
} |
} |
typedef void (*build_m_func)( GLfloat f[][3], |
GLfloat m[], |
const GLvector4f *normal, |
const GLvector4f *eye ); |
static build_m_func build_m_tab[5] = { |
0, |
0, |
build_m2, |
build_m3, |
build_m3 |
}; |
/* This is unusual in that we respect the stride of the output vector |
* (f). This allows us to pass in either a texcoord vector4f, or a |
* temporary vector3f. |
*/ |
static void build_f3( GLfloat *f, |
GLuint fstride, |
const GLvector4f *normal, |
const GLvector4f *eye ) |
{ |
GLuint stride = eye->stride; |
GLfloat *coord = eye->start; |
GLuint count = eye->count; |
GLfloat *norm = normal->start; |
GLuint i; |
for (i=0;i<count;i++) { |
GLfloat u[3], two_nu; |
COPY_3V( u, coord ); |
NORMALIZE_3FV( u ); |
two_nu = 2.0F * DOT3(norm,u); |
f[0] = u[0] - norm[0] * two_nu; |
f[1] = u[1] - norm[1] * two_nu; |
f[2] = u[2] - norm[2] * two_nu; |
STRIDE_F(coord,stride); |
STRIDE_F(f,fstride); |
STRIDE_F(norm, normal->stride); |
} |
} |
static void build_f2( GLfloat *f, |
GLuint fstride, |
const GLvector4f *normal, |
const GLvector4f *eye ) |
{ |
GLuint stride = eye->stride; |
GLfloat *coord = eye->start; |
GLuint count = eye->count; |
GLfloat *norm = normal->start; |
GLuint i; |
for (i=0;i<count;i++) { |
GLfloat u[3], two_nu; |
COPY_2V( u, coord ); |
u[2] = 0; |
NORMALIZE_3FV( u ); |
two_nu = 2.0F * DOT3(norm,u); |
f[0] = u[0] - norm[0] * two_nu; |
f[1] = u[1] - norm[1] * two_nu; |
f[2] = u[2] - norm[2] * two_nu; |
STRIDE_F(coord,stride); |
STRIDE_F(f,fstride); |
STRIDE_F(norm, normal->stride); |
} |
} |
typedef void (*build_f_func)( GLfloat *f, |
GLuint fstride, |
const GLvector4f *normal_vec, |
const GLvector4f *eye ); |
/* Just treat 4-vectors as 3-vectors. |
*/ |
static build_f_func build_f_tab[5] = { |
0, |
0, |
build_f2, |
build_f3, |
build_f3 |
}; |
/* Special case texgen functions. |
*/ |
static void texgen_reflection_map_nv( GLcontext *ctx, |
struct texgen_stage_data *store, |
GLuint unit ) |
{ |
struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; |
GLvector4f *in = VB->TexCoordPtr[unit]; |
GLvector4f *out = &store->texcoord[unit]; |
build_f_tab[VB->EyePtr->size]( out->start, |
out->stride, |
VB->NormalPtr, |
VB->EyePtr ); |
if (in) { |
out->flags |= (in->flags & VEC_SIZE_FLAGS) | VEC_SIZE_3; |
out->count = in->count; |
out->size = MAX2(in->size, 3); |
if (in->size == 4) |
_mesa_copy_tab[0x8]( out, in ); |
} |
else { |
out->flags |= VEC_SIZE_3; |
out->size = 3; |
out->count = in->count; |
} |
} |
static void texgen_normal_map_nv( GLcontext *ctx, |
struct texgen_stage_data *store, |
GLuint unit ) |
{ |
struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; |
GLvector4f *in = VB->TexCoordPtr[unit]; |
GLvector4f *out = &store->texcoord[unit]; |
GLvector4f *normal = VB->NormalPtr; |
GLfloat (*texcoord)[4] = (GLfloat (*)[4])out->start; |
GLuint count = VB->Count; |
GLuint i; |
const GLfloat *norm = normal->start; |
for (i=0;i<count;i++, STRIDE_F(norm, normal->stride)) { |
texcoord[i][0] = norm[0]; |
texcoord[i][1] = norm[1]; |
texcoord[i][2] = norm[2]; |
} |
if (in) { |
out->flags |= (in->flags & VEC_SIZE_FLAGS) | VEC_SIZE_3; |
out->count = in->count; |
out->size = MAX2(in->size, 3); |
if (in->size == 4) |
_mesa_copy_tab[0x8]( out, in ); |
} |
else { |
out->flags |= VEC_SIZE_3; |
out->size = 3; |
out->count = in->count; |
} |
} |
static void texgen_sphere_map( GLcontext *ctx, |
struct texgen_stage_data *store, |
GLuint unit ) |
{ |
struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; |
GLvector4f *in = VB->TexCoordPtr[unit]; |
GLvector4f *out = &store->texcoord[unit]; |
GLfloat (*texcoord)[4] = (GLfloat (*)[4]) out->start; |
GLuint count = VB->Count; |
GLuint i; |
GLfloat (*f)[3] = store->tmp_f; |
GLfloat *m = store->tmp_m; |
/* _mesa_debug(NULL, "%s normstride %d eyestride %d\n", */ |
/* __FUNCTION__, VB->NormalPtr->stride, */ |
/* VB->EyePtr->stride); */ |
(build_m_tab[VB->EyePtr->size])( store->tmp_f, |
store->tmp_m, |
VB->NormalPtr, |
VB->EyePtr ); |
for (i=0;i<count;i++) { |
texcoord[i][0] = f[i][0] * m[i] + 0.5F; |
texcoord[i][1] = f[i][1] * m[i] + 0.5F; |
} |
if (in) { |
out->size = MAX2(in->size,2); |
out->count = in->count; |
out->flags |= (in->flags & VEC_SIZE_FLAGS) | VEC_SIZE_2; |
if (in->size > 2) |
_mesa_copy_tab[all_bits[in->size] & ~0x3]( out, in ); |
} else { |
out->size = 2; |
out->flags |= VEC_SIZE_2; |
out->count = in->count; |
} |
} |
static void texgen( GLcontext *ctx, |
struct texgen_stage_data *store, |
GLuint unit ) |
{ |
TNLcontext *tnl = TNL_CONTEXT(ctx); |
struct vertex_buffer *VB = &tnl->vb; |
GLvector4f *in = VB->TexCoordPtr[unit]; |
GLvector4f *out = &store->texcoord[unit]; |
struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; |
const GLvector4f *obj = VB->ObjPtr; |
const GLvector4f *eye = VB->EyePtr; |
const GLvector4f *normal = VB->NormalPtr; |
GLfloat (*texcoord)[4] = (GLfloat (*)[4])out->data; |
GLfloat *indata; |
GLuint count = VB->Count; |
GLfloat (*f)[3] = store->tmp_f; |
GLfloat *m = store->tmp_m; |
GLuint holes = 0; |
if (texUnit->_GenFlags & TEXGEN_NEED_M) { |
build_m_tab[in->size]( store->tmp_f, store->tmp_m, normal, eye ); |
} else if (texUnit->_GenFlags & TEXGEN_NEED_F) { |
build_f_tab[in->size]( (GLfloat *)store->tmp_f, 3, normal, eye ); |
} |
if (!in) { |
ASSERT(0); |
in = out; |
in->count = VB->Count; |
out->size = store->TexgenSize[unit]; |
out->flags |= texUnit->TexGenEnabled; |
out->count = VB->Count; |
holes = store->TexgenHoles[unit]; |
} |
else { |
GLuint copy = (all_bits[in->size] & ~texUnit->TexGenEnabled); |
if (copy) |
_mesa_copy_tab[copy]( out, in ); |
out->size = MAX2(in->size, store->TexgenSize[unit]); |
out->flags |= (in->flags & VEC_SIZE_FLAGS) | texUnit->TexGenEnabled; |
out->count = in->count; |
holes = ~all_bits[in->size] & store->TexgenHoles[unit]; |
} |
if (holes) { |
if (holes & VEC_DIRTY_2) _mesa_vector4f_clean_elem(out, count, 2); |
if (holes & VEC_DIRTY_1) _mesa_vector4f_clean_elem(out, count, 1); |
if (holes & VEC_DIRTY_0) _mesa_vector4f_clean_elem(out, count, 0); |
} |
if (texUnit->TexGenEnabled & S_BIT) { |
GLuint i; |
switch (texUnit->GenModeS) { |
case GL_OBJECT_LINEAR: |
_mesa_dotprod_tab[obj->size]( (GLfloat *)out->data, |
sizeof(out->data[0]), obj, |
texUnit->ObjectPlaneS ); |
break; |
case GL_EYE_LINEAR: |
_mesa_dotprod_tab[eye->size]( (GLfloat *)out->data, |
sizeof(out->data[0]), eye, |
texUnit->EyePlaneS ); |
break; |
case GL_SPHERE_MAP: |
for (indata=in->start,i=0 ; i<count ;i++, STRIDE_F(indata,in->stride)) |
texcoord[i][0] = indata[0] * m[i] + 0.5F; |
break; |
case GL_REFLECTION_MAP_NV: |
for (i=0;i<count;i++) |
texcoord[i][0] = f[i][0]; |
break; |
case GL_NORMAL_MAP_NV: { |
const GLfloat *norm = normal->start; |
for (i=0;i<count;i++, STRIDE_F(norm, normal->stride)) { |
texcoord[i][0] = norm[0]; |
} |
break; |
} |
default: |
_mesa_problem(ctx, "Bad S texgen"); |
} |
} |
if (texUnit->TexGenEnabled & T_BIT) { |
GLuint i; |
switch (texUnit->GenModeT) { |
case GL_OBJECT_LINEAR: |
_mesa_dotprod_tab[obj->size]( &(out->data[0][1]), |
sizeof(out->data[0]), obj, |
texUnit->ObjectPlaneT ); |
break; |
case GL_EYE_LINEAR: |
_mesa_dotprod_tab[eye->size]( &(out->data[0][1]), |
sizeof(out->data[0]), eye, |
texUnit->EyePlaneT ); |
break; |
case GL_SPHERE_MAP: |
for (indata=in->start,i=0; i<count ;i++,STRIDE_F(indata,in->stride)) |
texcoord[i][1] = indata[1] * m[i] + 0.5F; |
break; |
case GL_REFLECTION_MAP_NV: |
for (i=0;i<count;i++) |
texcoord[i][0] = f[i][0]; |
break; |
case GL_NORMAL_MAP_NV: { |
const GLfloat *norm = normal->start; |
for (i=0;i<count;i++, STRIDE_F(norm, normal->stride)) { |
texcoord[i][1] = norm[1]; |
} |
break; |
} |
default: |
_mesa_problem(ctx, "Bad T texgen"); |
} |
} |
if (texUnit->TexGenEnabled & R_BIT) { |
GLuint i; |
switch (texUnit->GenModeR) { |
case GL_OBJECT_LINEAR: |
_mesa_dotprod_tab[obj->size]( &(out->data[0][2]), |
sizeof(out->data[0]), obj, |
texUnit->ObjectPlaneR ); |
break; |
case GL_EYE_LINEAR: |
_mesa_dotprod_tab[eye->size]( &(out->data[0][2]), |
sizeof(out->data[0]), eye, |
texUnit->EyePlaneR ); |
break; |
case GL_REFLECTION_MAP_NV: |
for (i=0;i<count;i++) |
texcoord[i][2] = f[i][2]; |
break; |
case GL_NORMAL_MAP_NV: { |
const GLfloat *norm = normal->start; |
for (i=0;i<count;i++,STRIDE_F(norm, normal->stride)) { |
texcoord[i][2] = norm[2]; |
} |
break; |
} |
default: |
_mesa_problem(ctx, "Bad R texgen"); |
} |
} |
if (texUnit->TexGenEnabled & Q_BIT) { |
switch (texUnit->GenModeQ) { |
case GL_OBJECT_LINEAR: |
_mesa_dotprod_tab[obj->size]( &(out->data[0][3]), |
sizeof(out->data[0]), obj, |
texUnit->ObjectPlaneQ ); |
break; |
case GL_EYE_LINEAR: |
_mesa_dotprod_tab[eye->size]( &(out->data[0][3]), |
sizeof(out->data[0]), eye, |
texUnit->EyePlaneQ ); |
break; |
default: |
_mesa_problem(ctx, "Bad Q texgen"); |
} |
} |
} |
static GLboolean run_texgen_stage( GLcontext *ctx, |
struct gl_pipeline_stage *stage ) |
{ |
struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; |
struct texgen_stage_data *store = TEXGEN_STAGE_DATA( stage ); |
GLuint i; |
for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++) |
if (ctx->Texture._TexGenEnabled & ENABLE_TEXGEN(i)) { |
if (stage->changed_inputs & (VERT_BIT_EYE | VERT_BIT_NORMAL | VERT_BIT_TEX(i))) |
store->TexgenFunc[i]( ctx, store, i ); |
VB->TexCoordPtr[i] = &store->texcoord[i]; |
} |
return GL_TRUE; |
} |
static GLboolean run_validate_texgen_stage( GLcontext *ctx, |
struct gl_pipeline_stage *stage ) |
{ |
struct texgen_stage_data *store = TEXGEN_STAGE_DATA(stage); |
GLuint i; |
for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++) { |
struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; |
if (texUnit->TexGenEnabled) { |
GLuint sz; |
if (texUnit->TexGenEnabled & R_BIT) |
sz = 4; |
else if (texUnit->TexGenEnabled & Q_BIT) |
sz = 3; |
else if (texUnit->TexGenEnabled & T_BIT) |
sz = 2; |
else |
sz = 1; |
store->TexgenSize[i] = sz; |
store->TexgenHoles[i] = (all_bits[sz] & ~texUnit->TexGenEnabled); |
store->TexgenFunc[i] = texgen; |
if (texUnit->TexGenEnabled == (S_BIT|T_BIT|R_BIT)) { |
if (texUnit->_GenFlags == TEXGEN_REFLECTION_MAP_NV) { |
store->TexgenFunc[i] = texgen_reflection_map_nv; |
} |
else if (texUnit->_GenFlags == TEXGEN_NORMAL_MAP_NV) { |
store->TexgenFunc[i] = texgen_normal_map_nv; |
} |
} |
else if (texUnit->TexGenEnabled == (S_BIT|T_BIT) && |
texUnit->_GenFlags == TEXGEN_SPHERE_MAP) { |
store->TexgenFunc[i] = texgen_sphere_map; |
} |
} |
} |
stage->run = run_texgen_stage; |
return stage->run( ctx, stage ); |
} |
static void check_texgen( GLcontext *ctx, struct gl_pipeline_stage *stage ) |
{ |
GLuint i; |
stage->active = 0; |
if (ctx->Texture._TexGenEnabled && !ctx->VertexProgram.Enabled) { |
GLuint inputs = 0; |
GLuint outputs = 0; |
if (ctx->Texture._GenFlags & TEXGEN_OBJ_LINEAR) |
inputs |= VERT_BIT_POS; |
if (ctx->Texture._GenFlags & TEXGEN_NEED_EYE_COORD) |
inputs |= VERT_BIT_EYE; |
if (ctx->Texture._GenFlags & TEXGEN_NEED_NORMALS) |
inputs |= VERT_BIT_NORMAL; |
for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++) |
if (ctx->Texture._TexGenEnabled & ENABLE_TEXGEN(i)) |
{ |
outputs |= VERT_BIT_TEX(i); |
/* Need the original input in case it contains a Q coord: |
* (sigh) |
*/ |
inputs |= VERT_BIT_TEX(i); |
/* Something for Feedback? */ |
} |
if (stage->privatePtr) |
stage->run = run_validate_texgen_stage; |
stage->active = 1; |
stage->inputs = inputs; |
stage->outputs = outputs; |
} |
} |
/* Called the first time stage->run() is invoked. |
*/ |
static GLboolean alloc_texgen_data( GLcontext *ctx, |
struct gl_pipeline_stage *stage ) |
{ |
struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; |
struct texgen_stage_data *store; |
GLuint i; |
stage->privatePtr = CALLOC(sizeof(*store)); |
store = TEXGEN_STAGE_DATA(stage); |
if (!store) |
return GL_FALSE; |
for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++) |
_mesa_vector4f_alloc( &store->texcoord[i], 0, VB->Size, 32 ); |
store->tmp_f = (GLfloat (*)[3]) MALLOC(VB->Size * sizeof(GLfloat) * 3); |
store->tmp_m = (GLfloat *) MALLOC(VB->Size * sizeof(GLfloat)); |
/* Now validate and run the stage. |
*/ |
stage->run = run_validate_texgen_stage; |
return stage->run( ctx, stage ); |
} |
static void free_texgen_data( struct gl_pipeline_stage *stage ) |
{ |
struct texgen_stage_data *store = TEXGEN_STAGE_DATA(stage); |
GLuint i; |
if (store) { |
for (i = 0 ; i < MAX_TEXTURE_UNITS ; i++) |
if (store->texcoord[i].data) |
_mesa_vector4f_free( &store->texcoord[i] ); |
if (store->tmp_f) FREE( store->tmp_f ); |
if (store->tmp_m) FREE( store->tmp_m ); |
FREE( store ); |
stage->privatePtr = NULL; |
} |
} |
const struct gl_pipeline_stage _tnl_texgen_stage = |
{ |
"texgen", /* name */ |
_NEW_TEXTURE, /* when to call check() */ |
_NEW_TEXTURE, /* when to invalidate stored data */ |
GL_FALSE, /* active? */ |
0, /* inputs */ |
0, /* outputs */ |
0, /* changed_inputs */ |
NULL, /* private data */ |
free_texgen_data, /* destructor */ |
check_texgen, /* check */ |
alloc_texgen_data /* run -- initially set to alloc data */ |
}; |
/shark/trunk/ports/mesa/src/tnl/t_imm_exec.c |
---|
1,4 → 1,4 |
/* $Id: t_imm_exec.c,v 1.2 2003-04-24 14:22:20 giacomo Exp $ */ |
/* $Id: t_imm_exec.c,v 1.1 2003-02-28 11:48:07 pj Exp $ */ |
/* |
* Mesa 3-D graphics library |
46,7 → 46,7 |
#include "math/m_xform.h" |
#include "t_context.h" |
#include "t_import_array.h" |
#include "t_array_import.h" |
#include "t_imm_alloc.h" |
#include "t_imm_api.h" |
#include "t_imm_debug.h" |
/shark/trunk/ports/mesa/src/tnl/t_array_api.c |
---|
1,4 → 1,4 |
/* $Id: t_array_api.c,v 1.2 2003-04-24 14:22:20 giacomo Exp $ */ |
/* $Id: t_array_api.c,v 1.1 2003-02-28 11:48:06 pj Exp $ */ |
/* |
* Mesa 3-D graphics library |
42,7 → 42,7 |
#include "array_cache/acache.h" |
#include "t_array_api.h" |
#include "t_import_array.h" |
#include "t_array_import.h" |
#include "t_imm_api.h" |
#include "t_imm_exec.h" |
#include "t_context.h" |
/shark/trunk/ports/mesa/src/swrast/s_aatempline.h |
---|
File deleted |
/shark/trunk/ports/mesa/src/swrast/s_aaline.c |
---|
1,4 → 1,4 |
/* $Id: s_aaline.c,v 1.2 2003-04-24 14:22:20 giacomo Exp $ */ |
/* $Id: s_aaline.c,v 1.1 2003-02-28 11:49:40 pj Exp $ */ |
/* |
* Mesa 3-D graphics library |
468,7 → 468,7 |
#define DO_Z |
#define DO_FOG |
#define DO_INDEX |
#include "s_aatempline.h" |
#include "s_aalinetemp.h" |
#define NAME(x) aa_rgba_##x |
475,7 → 475,7 |
#define DO_Z |
#define DO_FOG |
#define DO_RGBA |
#include "s_aatempline.h" |
#include "s_aalinetemp.h" |
#define NAME(x) aa_tex_rgba_##x |
483,7 → 483,7 |
#define DO_FOG |
#define DO_RGBA |
#define DO_TEX |
#include "s_aatempline.h" |
#include "s_aalinetemp.h" |
#define NAME(x) aa_multitex_rgba_##x |
491,7 → 491,7 |
#define DO_FOG |
#define DO_RGBA |
#define DO_MULTITEX |
#include "s_aatempline.h" |
#include "s_aalinetemp.h" |
#define NAME(x) aa_multitex_spec_##x |
500,7 → 500,7 |
#define DO_RGBA |
#define DO_MULTITEX |
#define DO_SPEC |
#include "s_aatempline.h" |
#include "s_aalinetemp.h" |
/shark/trunk/ports/mesa/src/swrast/s_aalinetemp.h |
---|
0,0 → 1,315 |
/* $Id: s_aalinetemp.h,v 1.1 2003-02-28 11:49:40 pj Exp $ */ |
/* |
* Mesa 3-D graphics library |
* Version: 4.1 |
* |
* Copyright (C) 1999-2002 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/* |
* Antialiased line template. |
*/ |
/* |
* Function to render each fragment in the AA line. |
*/ |
static void |
NAME(plot)(GLcontext *ctx, struct LineInfo *line, int ix, int iy) |
{ |
const GLfloat fx = (GLfloat) ix; |
const GLfloat fy = (GLfloat) iy; |
const GLfloat coverage = compute_coveragef(line, ix, iy); |
const GLuint i = line->span.end; |
if (coverage == 0.0) |
return; |
line->span.end++; |
line->span.array->coverage[i] = coverage; |
line->span.array->x[i] = ix; |
line->span.array->y[i] = iy; |
/* |
* Compute Z, color, texture coords, fog for the fragment by |
* solving the plane equations at (ix,iy). |
*/ |
#ifdef DO_Z |
line->span.array->z[i] = (GLdepth) solve_plane(fx, fy, line->zPlane); |
#endif |
#ifdef DO_FOG |
line->span.array->fog[i] = solve_plane(fx, fy, line->fPlane); |
#endif |
#ifdef DO_RGBA |
line->span.array->rgba[i][RCOMP] = solve_plane_chan(fx, fy, line->rPlane); |
line->span.array->rgba[i][GCOMP] = solve_plane_chan(fx, fy, line->gPlane); |
line->span.array->rgba[i][BCOMP] = solve_plane_chan(fx, fy, line->bPlane); |
line->span.array->rgba[i][ACOMP] = solve_plane_chan(fx, fy, line->aPlane); |
#endif |
#ifdef DO_INDEX |
line->span.array->index[i] = (GLint) solve_plane(fx, fy, line->iPlane); |
#endif |
#ifdef DO_SPEC |
line->span.array->spec[i][RCOMP] = solve_plane_chan(fx, fy, line->srPlane); |
line->span.array->spec[i][GCOMP] = solve_plane_chan(fx, fy, line->sgPlane); |
line->span.array->spec[i][BCOMP] = solve_plane_chan(fx, fy, line->sbPlane); |
#endif |
#ifdef DO_TEX |
{ |
const GLfloat invQ = solve_plane_recip(fx, fy, line->vPlane[0]); |
line->span.array->texcoords[0][i][0] = solve_plane(fx, fy, line->sPlane[0]) * invQ; |
line->span.array->texcoords[0][i][1] = solve_plane(fx, fy, line->tPlane[0]) * invQ; |
line->span.array->texcoords[0][i][2] = solve_plane(fx, fy, line->uPlane[0]) * invQ; |
line->span.array->lambda[0][i] = compute_lambda(line->sPlane[0], line->tPlane[0], invQ, |
line->texWidth[0], line->texHeight[0]); |
} |
#elif defined(DO_MULTITEX) |
{ |
GLuint unit; |
for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) { |
if (ctx->Texture.Unit[unit]._ReallyEnabled) { |
const GLfloat invQ = solve_plane_recip(fx, fy, line->vPlane[unit]); |
line->span.array->texcoords[unit][i][0] = solve_plane(fx, fy, line->sPlane[unit]) * invQ; |
line->span.array->texcoords[unit][i][1] = solve_plane(fx, fy, line->tPlane[unit]) * invQ; |
line->span.array->texcoords[unit][i][2] = solve_plane(fx, fy, line->uPlane[unit]) * invQ; |
line->span.array->lambda[unit][i] = compute_lambda(line->sPlane[unit], |
line->tPlane[unit], invQ, |
line->texWidth[unit], line->texHeight[unit]); |
} |
} |
} |
#endif |
if (line->span.end == MAX_WIDTH) { |
#if defined(DO_TEX) || defined(DO_MULTITEX) |
_mesa_write_texture_span(ctx, &(line->span)); |
#elif defined(DO_RGBA) |
_mesa_write_rgba_span(ctx, &(line->span)); |
#else |
_mesa_write_index_span(ctx, &(line->span)); |
#endif |
line->span.end = 0; /* reset counter */ |
} |
} |
/* |
* Line setup |
*/ |
static void |
NAME(line)(GLcontext *ctx, const SWvertex *v0, const SWvertex *v1) |
{ |
SWcontext *swrast = SWRAST_CONTEXT(ctx); |
GLfloat tStart, tEnd; /* segment start, end along line length */ |
GLboolean inSegment; |
GLint iLen, i; |
/* Init the LineInfo struct */ |
struct LineInfo line; |
line.x0 = v0->win[0]; |
line.y0 = v0->win[1]; |
line.x1 = v1->win[0]; |
line.y1 = v1->win[1]; |
line.dx = line.x1 - line.x0; |
line.dy = line.y1 - line.y0; |
line.len = (GLfloat) sqrt(line.dx * line.dx + line.dy * line.dy); |
line.halfWidth = 0.5F * ctx->Line.Width; |
if (line.len == 0.0 || IS_INF_OR_NAN(line.len)) |
return; |
INIT_SPAN(line.span, GL_LINE, 0, 0, SPAN_XY | SPAN_COVERAGE); |
line.xAdj = line.dx / line.len * line.halfWidth; |
line.yAdj = line.dy / line.len * line.halfWidth; |
#ifdef DO_Z |
line.span.arrayMask |= SPAN_Z; |
compute_plane(line.x0, line.y0, line.x1, line.y1, |
v0->win[2], v1->win[2], line.zPlane); |
#endif |
#ifdef DO_FOG |
line.span.arrayMask |= SPAN_FOG; |
compute_plane(line.x0, line.y0, line.x1, line.y1, |
v0->fog, v1->fog, line.fPlane); |
#endif |
#ifdef DO_RGBA |
line.span.arrayMask |= SPAN_RGBA; |
if (ctx->Light.ShadeModel == GL_SMOOTH) { |
compute_plane(line.x0, line.y0, line.x1, line.y1, |
v0->color[RCOMP], v1->color[RCOMP], line.rPlane); |
compute_plane(line.x0, line.y0, line.x1, line.y1, |
v0->color[GCOMP], v1->color[GCOMP], line.gPlane); |
compute_plane(line.x0, line.y0, line.x1, line.y1, |
v0->color[BCOMP], v1->color[BCOMP], line.bPlane); |
compute_plane(line.x0, line.y0, line.x1, line.y1, |
v0->color[ACOMP], v1->color[ACOMP], line.aPlane); |
} |
else { |
constant_plane(v1->color[RCOMP], line.rPlane); |
constant_plane(v1->color[GCOMP], line.gPlane); |
constant_plane(v1->color[BCOMP], line.bPlane); |
constant_plane(v1->color[ACOMP], line.aPlane); |
} |
#endif |
#ifdef DO_SPEC |
line.span.arrayMask |= SPAN_SPEC; |
if (ctx->Light.ShadeModel == GL_SMOOTH) { |
compute_plane(line.x0, line.y0, line.x1, line.y1, |
v0->specular[RCOMP], v1->specular[RCOMP], line.srPlane); |
compute_plane(line.x0, line.y0, line.x1, line.y1, |
v0->specular[GCOMP], v1->specular[GCOMP], line.sgPlane); |
compute_plane(line.x0, line.y0, line.x1, line.y1, |
v0->specular[BCOMP], v1->specular[BCOMP], line.sbPlane); |
} |
else { |
constant_plane(v1->specular[RCOMP], line.srPlane); |
constant_plane(v1->specular[GCOMP], line.sgPlane); |
constant_plane(v1->specular[BCOMP], line.sbPlane); |
} |
#endif |
#ifdef DO_INDEX |
line.span.arrayMask |= SPAN_INDEX; |
if (ctx->Light.ShadeModel == GL_SMOOTH) { |
compute_plane(line.x0, line.y0, line.x1, line.y1, |
(GLfloat) v0->index, (GLfloat) v1->index, line.iPlane); |
} |
else { |
constant_plane((GLfloat) v1->index, line.iPlane); |
} |
#endif |
#ifdef DO_TEX |
{ |
const struct gl_texture_object *obj = ctx->Texture.Unit[0]._Current; |
const struct gl_texture_image *texImage = obj->Image[obj->BaseLevel]; |
const GLfloat invW0 = v0->win[3]; |
const GLfloat invW1 = v1->win[3]; |
const GLfloat s0 = v0->texcoord[0][0] * invW0; |
const GLfloat s1 = v1->texcoord[0][0] * invW1; |
const GLfloat t0 = v0->texcoord[0][1] * invW0; |
const GLfloat t1 = v1->texcoord[0][1] * invW0; |
const GLfloat r0 = v0->texcoord[0][2] * invW0; |
const GLfloat r1 = v1->texcoord[0][2] * invW0; |
const GLfloat q0 = v0->texcoord[0][3] * invW0; |
const GLfloat q1 = v1->texcoord[0][3] * invW0; |
line.span.arrayMask |= (SPAN_TEXTURE | SPAN_LAMBDA); |
compute_plane(line.x0, line.y0, line.x1, line.y1, s0, s1, line.sPlane[0]); |
compute_plane(line.x0, line.y0, line.x1, line.y1, t0, t1, line.tPlane[0]); |
compute_plane(line.x0, line.y0, line.x1, line.y1, r0, r1, line.uPlane[0]); |
compute_plane(line.x0, line.y0, line.x1, line.y1, q0, q1, line.vPlane[0]); |
line.texWidth[0] = (GLfloat) texImage->Width; |
line.texHeight[0] = (GLfloat) texImage->Height; |
} |
#elif defined(DO_MULTITEX) |
{ |
GLuint u; |
line.span.arrayMask |= (SPAN_TEXTURE | SPAN_LAMBDA); |
for (u = 0; u < ctx->Const.MaxTextureUnits; u++) { |
if (ctx->Texture.Unit[u]._ReallyEnabled) { |
const struct gl_texture_object *obj = ctx->Texture.Unit[u]._Current; |
const struct gl_texture_image *texImage = obj->Image[obj->BaseLevel]; |
const GLfloat invW0 = v0->win[3]; |
const GLfloat invW1 = v1->win[3]; |
const GLfloat s0 = v0->texcoord[u][0] * invW0; |
const GLfloat s1 = v1->texcoord[u][0] * invW1; |
const GLfloat t0 = v0->texcoord[u][1] * invW0; |
const GLfloat t1 = v1->texcoord[u][1] * invW0; |
const GLfloat r0 = v0->texcoord[u][2] * invW0; |
const GLfloat r1 = v1->texcoord[u][2] * invW0; |
const GLfloat q0 = v0->texcoord[u][3] * invW0; |
const GLfloat q1 = v1->texcoord[u][3] * invW0; |
compute_plane(line.x0, line.y0, line.x1, line.y1, s0, s1, line.sPlane[u]); |
compute_plane(line.x0, line.y0, line.x1, line.y1, t0, t1, line.tPlane[u]); |
compute_plane(line.x0, line.y0, line.x1, line.y1, r0, r1, line.uPlane[u]); |
compute_plane(line.x0, line.y0, line.x1, line.y1, q0, q1, line.vPlane[u]); |
line.texWidth[u] = (GLfloat) texImage->Width; |
line.texHeight[u] = (GLfloat) texImage->Height; |
} |
} |
} |
#endif |
tStart = tEnd = 0.0; |
inSegment = GL_FALSE; |
iLen = (GLint) line.len; |
if (ctx->Line.StippleFlag) { |
for (i = 0; i < iLen; i++) { |
const GLuint bit = (swrast->StippleCounter / ctx->Line.StippleFactor) & 0xf; |
if ((1 << bit) & ctx->Line.StipplePattern) { |
/* stipple bit is on */ |
const GLfloat t = (GLfloat) i / (GLfloat) line.len; |
if (!inSegment) { |
/* start new segment */ |
inSegment = GL_TRUE; |
tStart = t; |
} |
else { |
/* still in the segment, extend it */ |
tEnd = t; |
} |
} |
else { |
/* stipple bit is off */ |
if (inSegment && (tEnd > tStart)) { |
/* draw the segment */ |
segment(ctx, &line, NAME(plot), tStart, tEnd); |
inSegment = GL_FALSE; |
} |
else { |
/* still between segments, do nothing */ |
} |
} |
swrast->StippleCounter++; |
} |
if (inSegment) { |
/* draw the final segment of the line */ |
segment(ctx, &line, NAME(plot), tStart, 1.0F); |
} |
} |
else { |
/* non-stippled */ |
segment(ctx, &line, NAME(plot), 0.0, 1.0); |
} |
#if defined(DO_TEX) || defined(DO_MULTITEX) |
_mesa_write_texture_span(ctx, &(line.span)); |
#elif defined(DO_RGBA) |
_mesa_write_rgba_span(ctx, &(line.span)); |
#else |
_mesa_write_index_span(ctx, &(line.span)); |
#endif |
} |
#undef DO_Z |
#undef DO_FOG |
#undef DO_RGBA |
#undef DO_INDEX |
#undef DO_SPEC |
#undef DO_TEX |
#undef DO_MULTITEX |
#undef NAME |
/shark/trunk/ports/mesa/src/texformat_tmp.h |
---|
0,0 → 1,461 |
/* $Id: texformat_tmp.h,v 1.1 2003-02-28 11:42:05 pj Exp $ */ |
/* |
* Mesa 3-D graphics library |
* Version: 4.1 |
* |
* Copyright (C) 1999-2002 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Gareth Hughes |
* Brian Paul |
*/ |
/* |
* This template file generates texel fetch functions for 1-D, 2-D and 3-D |
* texture images. |
*/ |
#if DIM == 1 |
#define CHAN_SRC( t, i, j, k, sz ) \ |
((GLchan *)(t)->Data + (i) * (sz)) |
#define UBYTE_SRC( t, i, j, k, sz ) \ |
((GLubyte *)(t)->Data + (i) * (sz)) |
#define USHORT_SRC( t, i, j, k ) \ |
((GLushort *)(t)->Data + (i)) |
#define FLOAT_SRC( t, i, j, k ) \ |
((GLfloat *)(t)->Data + (i)) |
#define FETCH(x) fetch_1d_texel_##x |
#elif DIM == 2 |
#define CHAN_SRC( t, i, j, k, sz ) \ |
((GLchan *)(t)->Data + ((t)->RowStride * (j) + (i)) * (sz)) |
#define UBYTE_SRC( t, i, j, k, sz ) \ |
((GLubyte *)(t)->Data + ((t)->RowStride * (j) + (i)) * (sz)) |
#define USHORT_SRC( t, i, j, k ) \ |
((GLushort *)(t)->Data + ((t)->RowStride * (j) + (i))) |
#define FLOAT_SRC( t, i, j, k ) \ |
((GLfloat *)(t)->Data + ((t)->RowStride * (j) + (i))) |
#define FETCH(x) fetch_2d_texel_##x |
#elif DIM == 3 |
#define CHAN_SRC( t, i, j, k, sz ) \ |
(GLchan *)(t)->Data + (((t)->Height * (k) + (j)) * \ |
(t)->RowStride + (i)) * (sz) |
#define UBYTE_SRC( t, i, j, k, sz ) \ |
((GLubyte *)(t)->Data + (((t)->Height * (k) + (j)) * \ |
(t)->RowStride + (i)) * (sz)) |
#define USHORT_SRC( t, i, j, k ) \ |
((GLushort *)(t)->Data + (((t)->Height * (k) + (j)) * \ |
(t)->RowStride + (i))) |
#define FLOAT_SRC( t, i, j, k ) \ |
((GLfloat *)(t)->Data + (((t)->Height * (k) + (j)) * \ |
(t)->RowStride + (i))) |
#define FETCH(x) fetch_3d_texel_##x |
#else |
#error illegal number of texture dimensions |
#endif |
static void FETCH(rgba)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLchan *src = CHAN_SRC( texImage, i, j, k, 4 ); |
GLchan *rgba = (GLchan *) texel; |
COPY_CHAN4( rgba, src ); |
} |
static void FETCH(rgb)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLchan *src = CHAN_SRC( texImage, i, j, k, 3 ); |
GLchan *rgba = (GLchan *) texel; |
rgba[RCOMP] = src[0]; |
rgba[GCOMP] = src[1]; |
rgba[BCOMP] = src[2]; |
rgba[ACOMP] = CHAN_MAX; |
} |
static void FETCH(alpha)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLchan *src = CHAN_SRC( texImage, i, j, k, 1 ); |
GLchan *rgba = (GLchan *) texel; |
rgba[RCOMP] = 0; |
rgba[GCOMP] = 0; |
rgba[BCOMP] = 0; |
rgba[ACOMP] = src[0]; |
} |
static void FETCH(luminance)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLchan *src = CHAN_SRC( texImage, i, j, k, 1 ); |
GLchan *rgba = (GLchan *) texel; |
rgba[RCOMP] = src[0]; |
rgba[GCOMP] = src[0]; |
rgba[BCOMP] = src[0]; |
rgba[ACOMP] = CHAN_MAX; |
} |
static void FETCH(luminance_alpha)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLchan *src = CHAN_SRC( texImage, i, j, k, 2 ); |
GLchan *rgba = (GLchan *) texel; |
rgba[RCOMP] = src[0]; |
rgba[GCOMP] = src[0]; |
rgba[BCOMP] = src[0]; |
rgba[ACOMP] = src[1]; |
} |
static void FETCH(intensity)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLchan *src = CHAN_SRC( texImage, i, j, k, 1 ); |
GLchan *rgba = (GLchan *) texel; |
rgba[RCOMP] = src[0]; |
rgba[GCOMP] = src[0]; |
rgba[BCOMP] = src[0]; |
rgba[ACOMP] = src[0]; |
} |
static void FETCH(color_index)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLchan *src = CHAN_SRC( texImage, i, j, k, 1 ); |
GLchan *index = (GLchan *) texel; |
*index = *src; |
} |
static void FETCH(depth_component)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLfloat *src = FLOAT_SRC( texImage, i, j, k ); |
GLfloat *depth = (GLfloat *) texel; |
*depth = *src; |
} |
static void FETCH(rgba8888)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 4 ); |
GLchan *rgba = (GLchan *) texel; |
rgba[RCOMP] = UBYTE_TO_CHAN( src[3] ); |
rgba[GCOMP] = UBYTE_TO_CHAN( src[2] ); |
rgba[BCOMP] = UBYTE_TO_CHAN( src[1] ); |
rgba[ACOMP] = UBYTE_TO_CHAN( src[0] ); |
} |
static void FETCH(argb8888)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 4 ); |
GLchan *rgba = (GLchan *) texel; |
rgba[RCOMP] = UBYTE_TO_CHAN( src[2] ); |
rgba[GCOMP] = UBYTE_TO_CHAN( src[1] ); |
rgba[BCOMP] = UBYTE_TO_CHAN( src[0] ); |
rgba[ACOMP] = UBYTE_TO_CHAN( src[3] ); |
} |
static void FETCH(rgb888)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 3 ); |
GLchan *rgba = (GLchan *) texel; |
rgba[RCOMP] = UBYTE_TO_CHAN( src[2] ); |
rgba[GCOMP] = UBYTE_TO_CHAN( src[1] ); |
rgba[BCOMP] = UBYTE_TO_CHAN( src[0] ); |
rgba[ACOMP] = CHAN_MAX; |
} |
static void FETCH(rgb565)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLushort *src = USHORT_SRC( texImage, i, j, k ); |
const GLushort s = *src; |
GLchan *rgba = (GLchan *) texel; |
rgba[RCOMP] = UBYTE_TO_CHAN( ((s >> 8) & 0xf8) * 255 / 0xf8 ); |
rgba[GCOMP] = UBYTE_TO_CHAN( ((s >> 3) & 0xfc) * 255 / 0xfc ); |
rgba[BCOMP] = UBYTE_TO_CHAN( ((s << 3) & 0xf8) * 255 / 0xf8 ); |
rgba[ACOMP] = CHAN_MAX; |
} |
static void FETCH(argb4444)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLushort *src = USHORT_SRC( texImage, i, j, k ); |
const GLushort s = *src; |
GLchan *rgba = (GLchan *) texel; |
rgba[RCOMP] = UBYTE_TO_CHAN( ((s >> 8) & 0xf) * 255 / 0xf ); |
rgba[GCOMP] = UBYTE_TO_CHAN( ((s >> 4) & 0xf) * 255 / 0xf ); |
rgba[BCOMP] = UBYTE_TO_CHAN( ((s ) & 0xf) * 255 / 0xf ); |
rgba[ACOMP] = UBYTE_TO_CHAN( ((s >> 12) & 0xf) * 255 / 0xf ); |
} |
static void FETCH(argb1555)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLushort *src = USHORT_SRC( texImage, i, j, k ); |
const GLushort s = *src; |
GLchan *rgba = (GLchan *) texel; |
rgba[RCOMP] = UBYTE_TO_CHAN( ((s >> 10) & 0x1f) * 255 / 0x1f ); |
rgba[GCOMP] = UBYTE_TO_CHAN( ((s >> 5) & 0x1f) * 255 / 0x1f ); |
rgba[BCOMP] = UBYTE_TO_CHAN( ((s ) & 0x1f) * 255 / 0x1f ); |
rgba[ACOMP] = UBYTE_TO_CHAN( ((s >> 15) & 0x01) * 255 ); |
} |
static void FETCH(al88)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 2 ); |
GLchan *rgba = (GLchan *) texel; |
rgba[RCOMP] = UBYTE_TO_CHAN( src[0] ); |
rgba[GCOMP] = UBYTE_TO_CHAN( src[0] ); |
rgba[BCOMP] = UBYTE_TO_CHAN( src[0] ); |
rgba[ACOMP] = UBYTE_TO_CHAN( src[1] ); |
} |
static void FETCH(rgb332)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 1 ); |
const GLubyte s = *src; |
GLchan *rgba = (GLchan *) texel; |
rgba[RCOMP] = UBYTE_TO_CHAN( ((s ) & 0xe0) * 255 / 0xe0 ); |
rgba[GCOMP] = UBYTE_TO_CHAN( ((s << 3) & 0xe0) * 255 / 0xe0 ); |
rgba[BCOMP] = UBYTE_TO_CHAN( ((s << 5) & 0xc0) * 255 / 0xc0 ); |
rgba[ACOMP] = CHAN_MAX; |
} |
static void FETCH(a8)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 1 ); |
GLchan *rgba = (GLchan *) texel; |
rgba[RCOMP] = 0; |
rgba[GCOMP] = 0; |
rgba[BCOMP] = 0; |
rgba[ACOMP] = UBYTE_TO_CHAN( src[0] ); |
} |
static void FETCH(l8)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 1 ); |
GLchan *rgba = (GLchan *) texel; |
rgba[RCOMP] = UBYTE_TO_CHAN( src[0] ); |
rgba[GCOMP] = UBYTE_TO_CHAN( src[0] ); |
rgba[BCOMP] = UBYTE_TO_CHAN( src[0] ); |
rgba[ACOMP] = CHAN_MAX; |
} |
static void FETCH(i8)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 1 ); |
GLchan *rgba = (GLchan *) texel; |
rgba[RCOMP] = UBYTE_TO_CHAN( src[0] ); |
rgba[GCOMP] = UBYTE_TO_CHAN( src[0] ); |
rgba[BCOMP] = UBYTE_TO_CHAN( src[0] ); |
rgba[ACOMP] = UBYTE_TO_CHAN( src[0] ); |
} |
static void FETCH(ci8)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 1 ); |
GLchan *index = (GLchan *) texel; |
*index = UBYTE_TO_CHAN( *src ); |
} |
/* XXX this may break if GLchan != GLubyte */ |
static void FETCH(ycbcr)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLushort *src0 = USHORT_SRC( texImage, (i & ~1), j, k ); /* even */ |
const GLushort *src1 = src0 + 1; /* odd */ |
const GLubyte y0 = (*src0 >> 8) & 0xff; /* luminance */ |
const GLubyte cb = *src0 & 0xff; /* chroma U */ |
const GLubyte y1 = (*src1 >> 8) & 0xff; /* luminance */ |
const GLubyte cr = *src1 & 0xff; /* chroma V */ |
GLchan *rgba = (GLchan *) texel; |
GLint r, g, b; |
if (i & 1) { |
/* odd pixel: use y1,cr,cb */ |
r = (GLint) (1.164 * (y1-16) + 1.596 * (cr-128)); |
g = (GLint) (1.164 * (y1-16) - 0.813 * (cr-128) - 0.391 * (cb-128)); |
b = (GLint) (1.164 * (y1-16) + 2.018 * (cb-128)); |
} |
else { |
/* even pixel: use y0,cr,cb */ |
r = (GLint) (1.164 * (y0-16) + 1.596 * (cr-128)); |
g = (GLint) (1.164 * (y0-16) - 0.813 * (cr-128) - 0.391 * (cb-128)); |
b = (GLint) (1.164 * (y0-16) + 2.018 * (cb-128)); |
} |
rgba[RCOMP] = CLAMP(r, 0, CHAN_MAX); |
rgba[GCOMP] = CLAMP(g, 0, CHAN_MAX); |
rgba[BCOMP] = CLAMP(b, 0, CHAN_MAX); |
rgba[ACOMP] = CHAN_MAX; |
} |
/* XXX this may break if GLchan != GLubyte */ |
static void FETCH(ycbcr_rev)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLushort *src0 = USHORT_SRC( texImage, (i & ~1), j, k ); /* even */ |
const GLushort *src1 = src0 + 1; /* odd */ |
const GLubyte y0 = *src0 & 0xff; /* luminance */ |
const GLubyte cr = (*src0 >> 8) & 0xff; /* chroma U */ |
const GLubyte y1 = *src1 & 0xff; /* luminance */ |
const GLubyte cb = (*src1 >> 8) & 0xff; /* chroma V */ |
GLchan *rgba = (GLchan *) texel; |
GLint r, g, b; |
if (i & 1) { |
/* odd pixel: use y1,cr,cb */ |
r = (GLint) (1.164 * (y1-16) + 1.596 * (cr-128)); |
g = (GLint) (1.164 * (y1-16) - 0.813 * (cr-128) - 0.391 * (cb-128)); |
b = (GLint) (1.164 * (y1-16) + 2.018 * (cb-128)); |
} |
else { |
/* even pixel: use y0,cr,cb */ |
r = (GLint) (1.164 * (y0-16) + 1.596 * (cr-128)); |
g = (GLint) (1.164 * (y0-16) - 0.813 * (cr-128) - 0.391 * (cb-128)); |
b = (GLint) (1.164 * (y0-16) + 2.018 * (cb-128)); |
} |
rgba[RCOMP] = CLAMP(r, 0, CHAN_MAX); |
rgba[GCOMP] = CLAMP(g, 0, CHAN_MAX); |
rgba[BCOMP] = CLAMP(b, 0, CHAN_MAX); |
rgba[ACOMP] = CHAN_MAX; |
} |
/* big-endian */ |
#if 0 |
static void FETCH(abgr8888)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 4 ); |
GLchan *rgba = (GLchan *) texel; |
rgba[RCOMP] = UBYTE_TO_CHAN( src[3] ); |
rgba[GCOMP] = UBYTE_TO_CHAN( src[2] ); |
rgba[BCOMP] = UBYTE_TO_CHAN( src[1] ); |
rgba[ACOMP] = UBYTE_TO_CHAN( src[0] ); |
} |
static void FETCH(bgra8888)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 4 ); |
GLchan *rgba = (GLchan *) texel; |
rgba[RCOMP] = UBYTE_TO_CHAN( src[2] ); |
rgba[GCOMP] = UBYTE_TO_CHAN( src[1] ); |
rgba[BCOMP] = UBYTE_TO_CHAN( src[0] ); |
rgba[ACOMP] = UBYTE_TO_CHAN( src[3] ); |
} |
static void FETCH(bgr888)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 3 ); |
GLchan *rgba = (GLchan *) texel; |
rgba[RCOMP] = UBYTE_TO_CHAN( src[2] ); |
rgba[GCOMP] = UBYTE_TO_CHAN( src[1] ); |
rgba[BCOMP] = UBYTE_TO_CHAN( src[0] ); |
rgba[ACOMP] = CHAN_MAX; |
} |
static void FETCH(bgr565)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLushort *src = USHORT_SRC( texImage, i, j, k ); |
const GLushort s = *src; |
GLchan *rgba = (GLchan *) texel; |
rgba[RCOMP] = UBYTE_TO_CHAN( ((s >> 8) & 0xf8) * 255 / 0xf8 ); |
rgba[GCOMP] = UBYTE_TO_CHAN( ((s >> 3) & 0xfc) * 255 / 0xfc ); |
rgba[BCOMP] = UBYTE_TO_CHAN( ((s << 3) & 0xf8) * 255 / 0xf8 ); |
rgba[ACOMP] = CHAN_MAX; |
} |
static void FETCH(bgra4444)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLushort *src = USHORT_SRC( texImage, i, j, k ); |
const GLushort s = *src; |
GLchan *rgba = (GLchan *) texel; |
rgba[RCOMP] = UBYTE_TO_CHAN( ((s >> 8) & 0xf) * 255 / 0xf ); |
rgba[GCOMP] = UBYTE_TO_CHAN( ((s >> 4) & 0xf) * 255 / 0xf ); |
rgba[BCOMP] = UBYTE_TO_CHAN( ((s ) & 0xf) * 255 / 0xf ); |
rgba[ACOMP] = UBYTE_TO_CHAN( ((s >> 12) & 0xf) * 255 / 0xf ); |
} |
static void FETCH(bgra5551)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLushort *src = USHORT_SRC( texImage, i, j, k ); |
const GLushort s = *src; |
GLchan *rgba = (GLchan *) texel; |
rgba[RCOMP] = UBYTE_TO_CHAN( ((s >> 10) & 0x1f) * 255 / 0x1f ); |
rgba[GCOMP] = UBYTE_TO_CHAN( ((s >> 5) & 0x1f) * 255 / 0x1f ); |
rgba[BCOMP] = UBYTE_TO_CHAN( ((s ) & 0x1f) * 255 / 0x1f ); |
rgba[ACOMP] = UBYTE_TO_CHAN( ((s >> 15) & 0x01) * 255 ); |
} |
static void FETCH(la88)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 2 ); |
GLchan *rgba = (GLchan *) texel; |
rgba[RCOMP] = UBYTE_TO_CHAN( src[0] ); |
rgba[GCOMP] = UBYTE_TO_CHAN( src[0] ); |
rgba[BCOMP] = UBYTE_TO_CHAN( src[0] ); |
rgba[ACOMP] = UBYTE_TO_CHAN( src[1] ); |
} |
static void FETCH(bgr233)( const struct gl_texture_image *texImage, |
GLint i, GLint j, GLint k, GLvoid *texel ) |
{ |
const GLubyte *src = UBYTE_SRC( texImage, i, j, k, 1 ); |
const GLubyte s = *src; |
GLchan *rgba = (GLchan *) texel; |
rgba[RCOMP] = UBYTE_TO_CHAN( ((s ) & 0xe0) * 255 / 0xe0 ); |
rgba[GCOMP] = UBYTE_TO_CHAN( ((s << 3) & 0xe0) * 255 / 0xe0 ); |
rgba[BCOMP] = UBYTE_TO_CHAN( ((s << 5) & 0xc0) * 255 / 0xc0 ); |
rgba[ACOMP] = CHAN_MAX; |
} |
#endif |
#undef CHAN_SRC |
#undef UBYTE_SRC |
#undef USHORT_SRC |
#undef FLOAT_SRC |
#undef FETCH |
#undef DIM |
/shark/trunk/ports/mesa/src/texformat.c |
---|
1,4 → 1,4 |
/* $Id: texformat.c,v 1.2 2003-04-24 14:22:20 giacomo Exp $ */ |
/* $Id: texformat.c,v 1.1 2003-02-28 11:42:04 pj Exp $ */ |
/* |
* Mesa 3-D graphics library |
42,13 → 42,13 |
/* Texel fetch routines for all supported formats: |
*/ |
#define DIM 1 |
#include "tmp_texformat.h" |
#include "texformat_tmp.h" |
#define DIM 2 |
#include "tmp_texformat.h" |
#include "texformat_tmp.h" |
#define DIM 3 |
#include "tmp_texformat.h" |
#include "texformat_tmp.h" |
/* Have to have this so the FetchTexel function pointer is never NULL. |
*/ |