Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
70 | giacomo | 1 | /* $Id: sse_xform2.s,v 1.1 2003-03-13 12:11:49 giacomo Exp $ */ |
2 | |||
3 | /* |
||
4 | * Mesa 3-D graphics library |
||
5 | * Version: 3.5 |
||
6 | * |
||
7 | * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
||
8 | * |
||
9 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
10 | * copy of this software and associated documentation files (the "Software"), |
||
11 | * to deal in the Software without restriction, including without limitation |
||
12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
13 | * and/or sell copies of the Software, and to permit persons to whom the |
||
14 | * Software is furnished to do so, subject to the following conditions: |
||
15 | * |
||
16 | * The above copyright notice and this permission notice shall be included |
||
17 | * in all copies or substantial portions of the Software. |
||
18 | * |
||
19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
22 | * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
||
23 | * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
||
24 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | */ |
||
26 | |||
27 | /** TODO: |
||
28 | * - insert PREFETCH instructions to avoid cache-misses ! |
||
29 | * - some more optimizations are possible... |
||
30 | * - for 40-50% more performance in the SSE-functions, the |
||
31 | * data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! |
||
32 | */ |
||
33 | |||
34 | #include "matypes.h" |
||
35 | #include "xform_args.h" |
||
36 | |||
37 | SEG_TEXT |
||
38 | |||
39 | #define S(i) REGOFF(i * 4, ESI) |
||
40 | #define D(i) REGOFF(i * 4, EDI) |
||
41 | #define M(i) REGOFF(i * 4, EDX) |
||
42 | |||
43 | |||
44 | ALIGNTEXT4 |
||
45 | GLOBL GLNAME(_mesa_sse_transform_points2_general) |
||
46 | GLNAME( _mesa_sse_transform_points2_general ): |
||
47 | |||
48 | #define FRAME_OFFSET 8 |
||
49 | PUSH_L ( ESI ) |
||
50 | PUSH_L ( EDI ) |
||
51 | |||
52 | MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
||
53 | MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
||
54 | |||
55 | MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
||
56 | MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
||
57 | |||
58 | TEST_L( ECX, ECX ) |
||
59 | JZ( LLBL(K_GTP2GR_finish) ) /* count was zero; go to finish */ |
||
60 | |||
61 | MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
||
62 | OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
||
63 | |||
64 | MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
||
65 | MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
||
66 | |||
67 | SHL_L( CONST(4), ECX ) /* count *= 16 */ |
||
68 | MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
||
69 | |||
70 | MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
||
71 | ADD_L( EDI, ECX ) /* count += dest ptr */ |
||
72 | |||
73 | ALIGNTEXT32 |
||
74 | MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ |
||
75 | MOVAPS( M(4), XMM1 ) /* m7 | m6 | m5 | m4 */ |
||
76 | MOVAPS( M(12), XMM2 ) /* m15 | m14 | m13 | m12 */ |
||
77 | |||
78 | ALIGNTEXT32 |
||
79 | LLBL(K_GTP2GR_top): |
||
80 | MOVSS( S(0), XMM3 ) /* ox */ |
||
81 | SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox | ox | ox */ |
||
82 | MULPS( XMM0, XMM3 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ |
||
83 | MOVSS( S(1), XMM4 ) /* oy */ |
||
84 | SHUFPS( CONST(0x0), XMM4, XMM4 ) /* oy | oy | oy | oy */ |
||
85 | MULPS( XMM1, XMM4 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */ |
||
86 | |||
87 | ADDPS( XMM4, XMM3 ) |
||
88 | ADDPS( XMM2, XMM3 ) |
||
89 | MOVAPS( XMM3, D(0) ) |
||
90 | |||
91 | LLBL(K_GTP2GR_skip): |
||
92 | ADD_L ( CONST(16), EDI ) |
||
93 | ADD_L ( EAX, ESI ) |
||
94 | CMP_L ( ECX, EDI ) |
||
95 | JNE ( LLBL(K_GTP2GR_top) ) |
||
96 | |||
97 | LLBL(K_GTP2GR_finish): |
||
98 | POP_L ( EDI ) |
||
99 | POP_L ( ESI ) |
||
100 | RET |
||
101 | #undef FRAME_OFFSET |
||
102 | |||
103 | |||
104 | ALIGNTEXT4 |
||
105 | GLOBL GLNAME(_mesa_sse_transform_points2_identity) |
||
106 | GLNAME( _mesa_sse_transform_points2_identity ): |
||
107 | |||
108 | #define FRAME_OFFSET 8 |
||
109 | PUSH_L ( ESI ) |
||
110 | PUSH_L ( EDI ) |
||
111 | |||
112 | MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
||
113 | MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
||
114 | |||
115 | MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
||
116 | |||
117 | TEST_L( ECX, ECX) |
||
118 | JZ( LLBL(K_GTP2IR_finish) ) /* count was zero; go to finish */ |
||
119 | |||
120 | MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
||
121 | OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
||
122 | |||
123 | MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
||
124 | MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
||
125 | |||
126 | SHL_L( CONST(4), ECX ) /* count *= 16 */ |
||
127 | MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
||
128 | |||
129 | MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
||
130 | ADD_L( EDI, ECX ) /* count += dest ptr */ |
||
131 | |||
132 | CMP_L( ESI, EDI ) |
||
133 | JE( LLBL(K_GTP2IR_finish) ) |
||
134 | |||
135 | |||
136 | ALIGNTEXT32 |
||
137 | LLBL(K_GTP2IR_top): |
||
138 | MOV_L ( S(0), EDX ) |
||
139 | MOV_L ( EDX, D(0) ) |
||
140 | MOV_L ( S(1), EDX ) |
||
141 | MOV_L ( EDX, D(1) ) |
||
142 | |||
143 | LLBL(K_GTP2IR_skip): |
||
144 | ADD_L ( CONST(16), EDI ) |
||
145 | ADD_L ( EAX, ESI ) |
||
146 | CMP_L ( ECX, EDI ) |
||
147 | JNE ( LLBL(K_GTP2IR_top) ) |
||
148 | |||
149 | LLBL(K_GTP2IR_finish): |
||
150 | POP_L ( EDI ) |
||
151 | POP_L ( ESI ) |
||
152 | RET |
||
153 | #undef FRAME_OFFSET |
||
154 | |||
155 | |||
156 | ALIGNTEXT4 |
||
157 | GLOBL GLNAME(_mesa_sse_transform_points2_3d_no_rot) |
||
158 | GLNAME(_mesa_sse_transform_points2_3d_no_rot): |
||
159 | |||
160 | #define FRAME_OFFSET 8 |
||
161 | PUSH_L( ESI ) |
||
162 | PUSH_L( EDI ) |
||
163 | |||
164 | MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
||
165 | MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
||
166 | |||
167 | MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
||
168 | MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
||
169 | |||
170 | TEST_L( ECX, ECX) |
||
171 | JZ( LLBL(K_GTP23DNRR_finish) ) /* count was zero; go to finish */ |
||
172 | |||
173 | MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
||
174 | OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
||
175 | |||
176 | MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
||
177 | MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
||
178 | |||
179 | SHL_L( CONST(4), ECX ) /* count *= 16 */ |
||
180 | MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
||
181 | |||
182 | MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
||
183 | ADD_L( EDI, ECX ) /* count += dest ptr */ |
||
184 | |||
185 | |||
186 | ALIGNTEXT32 |
||
187 | MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */ |
||
188 | MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */ |
||
189 | UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */ |
||
190 | MOVLPS ( M(12), XMM2 ) /* - | - | m13 | m12 */ |
||
191 | MOVSS ( M(14), XMM3 ) /* - | - | - | m14 */ |
||
192 | |||
193 | ALIGNTEXT32 |
||
194 | LLBL(K_GTP23DNRR_top): |
||
195 | MOVLPS ( S(0), XMM0 ) /* - | - | oy | ox */ |
||
196 | MULPS ( XMM1, XMM0 ) /* - | - | oy*m5 | ox*m0 */ |
||
197 | ADDPS ( XMM2, XMM0 ) /* - | - | +m13 | +m12 */ |
||
198 | MOVLPS ( XMM0, D(0) ) /* -> D(1) | -> D(0) */ |
||
199 | |||
200 | MOVSS ( XMM3, D(2) ) /* -> D(2) */ |
||
201 | |||
202 | LLBL(K_GTP23DNRR_skip): |
||
203 | ADD_L ( CONST(16), EDI ) |
||
204 | ADD_L ( EAX, ESI ) |
||
205 | CMP_L ( ECX, EDI ) |
||
206 | JNE ( LLBL(K_GTP23DNRR_top) ) |
||
207 | |||
208 | LLBL(K_GTP23DNRR_finish): |
||
209 | POP_L ( EDI ) |
||
210 | POP_L ( ESI ) |
||
211 | RET |
||
212 | #undef FRAME_OFFSET |
||
213 | |||
214 | |||
215 | ALIGNTEXT4 |
||
216 | GLOBL GLNAME(_mesa_sse_transform_points2_perspective) |
||
217 | GLNAME(_mesa_sse_transform_points2_perspective): |
||
218 | |||
219 | #define FRAME_OFFSET 8 |
||
220 | PUSH_L ( ESI ) |
||
221 | PUSH_L ( EDI ) |
||
222 | |||
223 | MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
||
224 | MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
||
225 | |||
226 | MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
||
227 | MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
||
228 | |||
229 | TEST_L( ECX, ECX) |
||
230 | JZ( LLBL(K_GTP23PR_finish) ) /* count was zero; go to finish */ |
||
231 | |||
232 | MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
||
233 | OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
||
234 | |||
235 | MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
||
236 | MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
||
237 | |||
238 | SHL_L( CONST(4), ECX ) /* count *= 16 */ |
||
239 | MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
||
240 | |||
241 | MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
||
242 | ADD_L( EDI, ECX ) /* count += dest ptr */ |
||
243 | |||
244 | ALIGNTEXT32 |
||
245 | MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */ |
||
246 | MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */ |
||
247 | UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */ |
||
248 | MOVSS ( M(14), XMM3 ) /* m14 */ |
||
249 | XORPS ( XMM0, XMM0 ) /* 0 | 0 | 0 | 0 */ |
||
250 | |||
251 | ALIGNTEXT32 |
||
252 | LLBL(K_GTP23PR_top): |
||
253 | MOVLPS( S(0), XMM4 ) /* oy | ox */ |
||
254 | MULPS( XMM1, XMM4 ) /* oy*m5 | ox*m0 */ |
||
255 | MOVLPS( XMM4, D(0) ) /* ->D(1) | ->D(0) */ |
||
256 | MOVSS( XMM3, D(2) ) /* ->D(2) */ |
||
257 | MOVSS( XMM0, D(3) ) /* ->D(3) */ |
||
258 | |||
259 | LLBL(K_GTP23PR_skip): |
||
260 | ADD_L( CONST(16), EDI ) |
||
261 | ADD_L( EAX, ESI ) |
||
262 | CMP_L( ECX, EDI ) |
||
263 | JNE( LLBL(K_GTP23PR_top) ) |
||
264 | |||
265 | LLBL(K_GTP23PR_finish): |
||
266 | POP_L ( EDI ) |
||
267 | POP_L ( ESI ) |
||
268 | RET |
||
269 | #undef FRAME_OFFSET |
||
270 | |||
271 | |||
272 | |||
273 | ALIGNTEXT4 |
||
274 | GLOBL GLNAME(_mesa_sse_transform_points2_2d) |
||
275 | GLNAME(_mesa_sse_transform_points2_2d): |
||
276 | |||
277 | #define FRAME_OFFSET 8 |
||
278 | PUSH_L( ESI ) |
||
279 | PUSH_L( EDI ) |
||
280 | |||
281 | MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
||
282 | MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
||
283 | |||
284 | MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
||
285 | MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
||
286 | |||
287 | TEST_L( ECX, ECX) |
||
288 | JZ( LLBL(K_GTP23P2DR_finish) ) /* count was zero; go to finish */ |
||
289 | |||
290 | MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
||
291 | OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
||
292 | |||
293 | MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
||
294 | MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
||
295 | |||
296 | SHL_L( CONST(4), ECX ) /* count *= 16 */ |
||
297 | MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
||
298 | |||
299 | MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
||
300 | ADD_L( EDI, ECX ) /* count += dest ptr */ |
||
301 | |||
302 | ALIGNTEXT32 |
||
303 | MOVLPS( M(0), XMM0 ) /* m1 | m0 */ |
||
304 | MOVLPS( M(4), XMM1 ) /* m5 | m4 */ |
||
305 | MOVLPS( M(12), XMM2 ) /* m13 | m12 */ |
||
306 | |||
307 | ALIGNTEXT32 |
||
308 | LLBL(K_GTP23P2DR_top): |
||
309 | MOVSS( S(0), XMM3 ) /* ox */ |
||
310 | SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox */ |
||
311 | MULPS( XMM0, XMM3 ) /* ox*m1 | ox*m0 */ |
||
312 | |||
313 | MOVSS( S(1), XMM4 ) /* oy */ |
||
314 | SHUFPS( CONST(0x0), XMM4, XMM4 ) /* oy | oy */ |
||
315 | MULPS( XMM1, XMM4 ) /* oy*m5 | oy*m4 */ |
||
316 | |||
317 | ADDPS( XMM4, XMM3 ) |
||
318 | ADDPS( XMM2, XMM3 ) |
||
319 | MOVLPS( XMM3, D(0) ) /* ->D(1) | ->D(0) */ |
||
320 | |||
321 | LLBL(K_GTP23P2DR_skip): |
||
322 | ADD_L ( CONST(16), EDI ) |
||
323 | ADD_L ( EAX, ESI ) |
||
324 | CMP_L ( ECX, EDI ) |
||
325 | JNE ( LLBL(K_GTP23P2DR_top) ) |
||
326 | |||
327 | LLBL(K_GTP23P2DR_finish): |
||
328 | POP_L ( EDI ) |
||
329 | POP_L ( ESI ) |
||
330 | RET |
||
331 | #undef FRAME_OFFSET |
||
332 | |||
333 | |||
334 | |||
335 | ALIGNTEXT4 |
||
336 | GLOBL GLNAME(_mesa_sse_transform_points2_2d_no_rot) |
||
337 | GLNAME(_mesa_sse_transform_points2_2d_no_rot): |
||
338 | |||
339 | #define FRAME_OFFSET 8 |
||
340 | PUSH_L( ESI ) |
||
341 | PUSH_L( EDI ) |
||
342 | |||
343 | MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
||
344 | MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
||
345 | |||
346 | MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
||
347 | MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
||
348 | |||
349 | TEST_L( ECX, ECX) |
||
350 | JZ( LLBL(K_GTP23P2DNRR_finish) ) /* count was zero; go to finish */ |
||
351 | |||
352 | MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
||
353 | OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
||
354 | |||
355 | MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
||
356 | MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
||
357 | |||
358 | SHL_L( CONST(4), ECX ) /* count *= 16 */ |
||
359 | MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
||
360 | |||
361 | MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
||
362 | ADD_L( EDI, ECX ) /* count += dest ptr */ |
||
363 | |||
364 | ALIGNTEXT32 |
||
365 | MOVSS ( M(0), XMM1 ) /* m0 */ |
||
366 | MOVSS ( M(5), XMM2 ) /* m5 */ |
||
367 | UNPCKLPS ( XMM2, XMM1 ) /* m5 | m0 */ |
||
368 | MOVLPS ( M(12), XMM2 ) /* m13 | m12 */ |
||
369 | |||
370 | ALIGNTEXT32 |
||
371 | LLBL(K_GTP23P2DNRR_top): |
||
372 | MOVLPS( S(0), XMM0 ) /* oy | ox */ |
||
373 | MULPS( XMM1, XMM0 ) /* oy*m5 | ox*m0 */ |
||
374 | ADDPS( XMM2, XMM0 ) /* +m13 | +m12 */ |
||
375 | MOVLPS( XMM0, D(0) ) /* ->D(1) | ->D(0) */ |
||
376 | |||
377 | LLBL(K_GTP23P2DNRR_skip): |
||
378 | ADD_L( CONST(16), EDI ) |
||
379 | ADD_L( EAX, ESI ) |
||
380 | CMP_L( ECX, EDI ) |
||
381 | JNE( LLBL(K_GTP23P2DNRR_top) ) |
||
382 | |||
383 | LLBL(K_GTP23P2DNRR_finish): |
||
384 | POP_L( EDI ) |
||
385 | POP_L( ESI ) |
||
386 | RET |
||
387 | #undef FRAME_OFFSET |
||
388 | |||
389 | |||
390 | |||
391 | ALIGNTEXT4 |
||
392 | GLOBL GLNAME(_mesa_sse_transform_points2_3d) |
||
393 | GLNAME(_mesa_sse_transform_points2_3d): |
||
394 | |||
395 | #define FRAME_OFFSET 8 |
||
396 | PUSH_L( ESI ) |
||
397 | PUSH_L( EDI ) |
||
398 | |||
399 | MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
||
400 | MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
||
401 | |||
402 | MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
||
403 | MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
||
404 | |||
405 | TEST_L( ECX, ECX) |
||
406 | JZ( LLBL(K_GTP23P3DR_finish) ) /* count was zero; go to finish */ |
||
407 | |||
408 | MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
||
409 | OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
||
410 | |||
411 | MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
||
412 | MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
||
413 | |||
414 | SHL_L( CONST(4), ECX ) /* count *= 16 */ |
||
415 | MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
||
416 | |||
417 | MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
||
418 | ADD_L( EDI, ECX ) /* count += dest ptr */ |
||
419 | |||
420 | ALIGNTEXT32 |
||
421 | MOVAPS( M(0), XMM0 ) /* m2 | m1 | m0 */ |
||
422 | MOVAPS( M(4), XMM1 ) /* m6 | m5 | m4 */ |
||
423 | MOVAPS( M(12), XMM2 ) /* m14 | m13 | m12 */ |
||
424 | |||
425 | ALIGNTEXT32 |
||
426 | LLBL(K_GTP23P3DR_top): |
||
427 | MOVSS( S(0), XMM3 ) /* ox */ |
||
428 | SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox | ox */ |
||
429 | MULPS( XMM0, XMM3 ) /* ox*m2 | ox*m1 | ox*m0 */ |
||
430 | |||
431 | MOVSS( S(1), XMM4 ) /* oy */ |
||
432 | SHUFPS( CONST(0x0), XMM4, XMM4 ) /* oy | oy | oy */ |
||
433 | MULPS( XMM1, XMM4 ) /* oy*m6 | oy*m5 | oy*m4 */ |
||
434 | |||
435 | ADDPS( XMM4, XMM3 ) |
||
436 | ADDPS( XMM2, XMM3 ) |
||
437 | |||
438 | MOVLPS( XMM3, D(0) ) /* ->D(1) | ->D(0) */ |
||
439 | UNPCKHPS( XMM3, XMM3 ) |
||
440 | MOVSS( XMM3, D(2) ) /* ->D(2) */ |
||
441 | |||
442 | LLBL(K_GTP23P3DR_skip): |
||
443 | ADD_L( CONST(16), EDI ) |
||
444 | ADD_L( EAX, ESI ) |
||
445 | CMP_L( ECX, EDI ) |
||
446 | JNE( LLBL(K_GTP23P3DR_top) ) |
||
447 | |||
448 | LLBL(K_GTP23P3DR_finish): |
||
449 | POP_L( EDI ) |
||
450 | POP_L( ESI ) |
||
451 | RET |
||
452 | #undef FRAME_OFFSET |