Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
70 | giacomo | 1 | /* $Id: sse_xform4.s,v 1.1 2003-03-13 12:11:49 giacomo Exp $ */ |
2 | |||
3 | /* |
||
4 | * Mesa 3-D graphics library |
||
5 | * Version: 3.5 |
||
6 | * |
||
7 | * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
||
8 | * |
||
9 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
10 | * copy of this software and associated documentation files (the "Software"), |
||
11 | * to deal in the Software without restriction, including without limitation |
||
12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
13 | * and/or sell copies of the Software, and to permit persons to whom the |
||
14 | * Software is furnished to do so, subject to the following conditions: |
||
15 | * |
||
16 | * The above copyright notice and this permission notice shall be included |
||
17 | * in all copies or substantial portions of the Software. |
||
18 | * |
||
19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
22 | * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
||
23 | * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
||
24 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | */ |
||
26 | |||
27 | #include "matypes.h" |
||
28 | #include "xform_args.h" |
||
29 | |||
30 | SEG_TEXT |
||
31 | |||
32 | #define FRAME_OFFSET 8 |
||
33 | |||
34 | #define SRC(i) REGOFF(i * 4, ESI) |
||
35 | #define DST(i) REGOFF(i * 4, EDI) |
||
36 | #define MAT(i) REGOFF(i * 4, EDX) |
||
37 | |||
38 | #define SELECT(r0, r1, r2, r3) CONST( r0 * 64 + r1 * 16 + r2 * 4 + r3 ) |
||
39 | |||
40 | |||
41 | ALIGNTEXT16 |
||
42 | GLOBL GLNAME( _mesa_sse_transform_points4_general ) |
||
43 | GLNAME( _mesa_sse_transform_points4_general ): |
||
44 | |||
45 | PUSH_L( ESI ) |
||
46 | PUSH_L( EDI ) |
||
47 | |||
48 | MOV_L( ARG_SOURCE, ESI ) |
||
49 | MOV_L( ARG_DEST, EDI ) |
||
50 | |||
51 | MOV_L( ARG_MATRIX, EDX ) |
||
52 | MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
||
53 | |||
54 | TEST_L( ECX, ECX ) /* verify non-zero count */ |
||
55 | JE( LLBL( sse_general_done ) ) |
||
56 | |||
57 | MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
||
58 | OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
||
59 | |||
60 | MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
||
61 | MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )/* set dest size */ |
||
62 | |||
63 | MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
||
64 | MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
||
65 | |||
66 | PREFETCHT0( REGIND(ESI) ) |
||
67 | |||
68 | MOVAPS( MAT(0), XMM4 ) /* m3 | m2 | m1 | m0 */ |
||
69 | MOVAPS( MAT(4), XMM5 ) /* m7 | m6 | m5 | m4 */ |
||
70 | MOVAPS( MAT(8), XMM6 ) /* m11 | m10 | m9 | m8 */ |
||
71 | MOVAPS( MAT(12), XMM7 ) /* m15 | m14 | m13 | m12 */ |
||
72 | |||
73 | ALIGNTEXT16 |
||
74 | LLBL( sse_general_loop ): |
||
75 | |||
76 | MOVSS( SRC(0), XMM0 ) /* ox */ |
||
77 | SHUFPS( CONST(0x0), XMM0, XMM0 ) /* ox | ox | ox | ox */ |
||
78 | MULPS( XMM4, XMM0 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ |
||
79 | |||
80 | MOVSS( SRC(1), XMM1 ) /* oy */ |
||
81 | SHUFPS( CONST(0x0), XMM1, XMM1 ) /* oy | oy | oy | oy */ |
||
82 | MULPS( XMM5, XMM1 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */ |
||
83 | |||
84 | MOVSS( SRC(2), XMM2 ) /* oz */ |
||
85 | SHUFPS( CONST(0x0), XMM2, XMM2 ) /* oz | oz | oz | oz */ |
||
86 | MULPS( XMM6, XMM2 ) /* oz*m11 | oz*m10 | oz*m9 | oz*m8 */ |
||
87 | |||
88 | MOVSS( SRC(3), XMM3 ) /* ow */ |
||
89 | SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ow | ow | ow | ow */ |
||
90 | MULPS( XMM7, XMM3 ) /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */ |
||
91 | |||
92 | ADDPS( XMM1, XMM0 ) /* ox*m3+oy*m7 | ... */ |
||
93 | ADDPS( XMM2, XMM0 ) /* ox*m3+oy*m7+oz*m11 | ... */ |
||
94 | ADDPS( XMM3, XMM0 ) /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */ |
||
95 | MOVAPS( XMM0, DST(0) ) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */ |
||
96 | |||
97 | ADD_L( CONST(16), EDI ) |
||
98 | ADD_L( EAX, ESI ) |
||
99 | |||
100 | DEC_L( ECX ) |
||
101 | JNZ( LLBL( sse_general_loop ) ) |
||
102 | |||
103 | LLBL( sse_general_done ): |
||
104 | |||
105 | POP_L( EDI ) |
||
106 | POP_L( ESI ) |
||
107 | RET |
||
108 | |||
109 | |||
110 | |||
111 | |||
112 | ALIGNTEXT4 |
||
113 | GLOBL GLNAME( _mesa_sse_transform_points4_3d ) |
||
114 | GLNAME( _mesa_sse_transform_points4_3d ): |
||
115 | |||
116 | PUSH_L( ESI ) |
||
117 | PUSH_L( EDI ) |
||
118 | |||
119 | MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ |
||
120 | MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ |
||
121 | |||
122 | MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
||
123 | MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
||
124 | |||
125 | TEST_L( ECX, ECX) |
||
126 | JZ( LLBL(K_GTP43P3DR_finish) ) /* count was zero; go to finish */ |
||
127 | |||
128 | MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
||
129 | OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
||
130 | |||
131 | MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
||
132 | MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )/* set dest size */ |
||
133 | |||
134 | SHL_L( CONST(4), ECX ) /* count *= 16 */ |
||
135 | MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
||
136 | |||
137 | MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
||
138 | ADD_L( EDI, ECX ) /* count += dest ptr */ |
||
139 | |||
140 | MOVAPS( MAT(0), XMM0 ) /* m3 | m2 | m1 | m0 */ |
||
141 | MOVAPS( MAT(4), XMM1 ) /* m7 | m6 | m5 | m4 */ |
||
142 | MOVAPS( MAT(8), XMM2 ) /* m11 | m10 | m9 | m8 */ |
||
143 | MOVAPS( MAT(12), XMM3 ) /* m15 | m14 | m13 | m12 */ |
||
144 | |||
145 | ALIGNTEXT32 |
||
146 | LLBL( K_GTP43P3DR_top ): |
||
147 | MOVSS( SRC(0), XMM4 ) /* ox */ |
||
148 | SHUFPS( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox | ox */ |
||
149 | MULPS( XMM0, XMM4 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ |
||
150 | |||
151 | MOVSS( SRC(1), XMM5 ) /* oy */ |
||
152 | SHUFPS( CONST(0x0), XMM5, XMM5 ) /* oy | oy | oy | oy */ |
||
153 | MULPS( XMM1, XMM5 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */ |
||
154 | |||
155 | MOVSS( SRC(2), XMM6 ) /* oz */ |
||
156 | SHUFPS( CONST(0x0), XMM6, XMM6 ) /* oz | oz | oz | oz */ |
||
157 | MULPS( XMM2, XMM6 ) /* oz*m11 | oz*m10 | oz*m9 | oz*m8 */ |
||
158 | |||
159 | MOVSS( SRC(3), XMM7 ) /* ow */ |
||
160 | SHUFPS( CONST(0x0), XMM7, XMM7 ) /* ow | ow | ow | ow */ |
||
161 | MULPS( XMM3, XMM7 ) /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */ |
||
162 | |||
163 | ADDPS( XMM5, XMM4 ) /* ox*m3+oy*m7 | ... */ |
||
164 | ADDPS( XMM6, XMM4 ) /* ox*m3+oy*m7+oz*m11 | ... */ |
||
165 | ADDPS( XMM7, XMM4 ) /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */ |
||
166 | MOVAPS( XMM4, DST(0) ) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */ |
||
167 | |||
168 | MOVSS( SRC(3), XMM4 ) /* ow */ |
||
169 | MOVSS( XMM4, DST(3) ) /* ->D(3) */ |
||
170 | |||
171 | LLBL( K_GTP43P3DR_skip ): |
||
172 | ADD_L( CONST(16), EDI ) |
||
173 | ADD_L( EAX, ESI ) |
||
174 | CMP_L( ECX, EDI ) |
||
175 | JNE( LLBL(K_GTP43P3DR_top) ) |
||
176 | |||
177 | LLBL( K_GTP43P3DR_finish ): |
||
178 | POP_L( EDI ) |
||
179 | POP_L( ESI ) |
||
180 | RET |
||
181 | |||
182 | |||
183 | ALIGNTEXT16 |
||
184 | GLOBL GLNAME( _mesa_sse_transform_points4_identity ) |
||
185 | GLNAME( _mesa_sse_transform_points4_identity ): |
||
186 | |||
187 | PUSH_L( ESI ) |
||
188 | PUSH_L( EDI ) |
||
189 | |||
190 | MOV_L( ARG_SOURCE, ESI ) |
||
191 | MOV_L( ARG_DEST, EDI ) |
||
192 | |||
193 | MOV_L( ARG_MATRIX, EDX ) |
||
194 | MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
||
195 | |||
196 | TEST_L( ECX, ECX ) /* verify non-zero count */ |
||
197 | JE( LLBL( sse_identity_done ) ) |
||
198 | |||
199 | MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
||
200 | OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
||
201 | |||
202 | MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
||
203 | MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )/* set dest size */ |
||
204 | |||
205 | MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
||
206 | MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
||
207 | |||
208 | ALIGNTEXT16 |
||
209 | LLBL( sse_identity_loop ): |
||
210 | |||
211 | PREFETCHNTA( REGOFF(32, ESI) ) |
||
212 | |||
213 | MOVAPS( REGIND(ESI), XMM0 ) |
||
214 | ADD_L( EAX, ESI ) |
||
215 | |||
216 | MOVAPS( XMM0, REGIND(EDI) ) |
||
217 | ADD_L( CONST(16), EDI ) |
||
218 | |||
219 | DEC_L( ECX ) |
||
220 | JNZ( LLBL( sse_identity_loop ) ) |
||
221 | |||
222 | LLBL( sse_identity_done ): |
||
223 | |||
224 | POP_L( EDI ) |
||
225 | POP_L( ESI ) |
||
226 | RET |