Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
57 | pj | 1 | /* $Id: 3dnow_normal.s,v 1.1 2003-02-28 11:49:38 pj Exp $ */ |
2 | |||
3 | /* |
||
4 | * Mesa 3-D graphics library |
||
5 | * Version: 4.1 |
||
6 | * |
||
7 | * Copyright (C) 1999-2002 Brian Paul All Rights Reserved. |
||
8 | * |
||
9 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
10 | * copy of this software and associated documentation files (the "Software"), |
||
11 | * to deal in the Software without restriction, including without limitation |
||
12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
13 | * and/or sell copies of the Software, and to permit persons to whom the |
||
14 | * Software is furnished to do so, subject to the following conditions: |
||
15 | * |
||
16 | * The above copyright notice and this permission notice shall be included |
||
17 | * in all copies or substantial portions of the Software. |
||
18 | * |
||
19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
22 | * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
||
23 | * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
||
24 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | */ |
||
26 | |||
27 | /* |
||
28 | * 3Dnow assembly code by Holger Waechtler |
||
29 | */ |
||
30 | |||
31 | #include "matypes.h" |
||
32 | #include "norm_args.h" |
||
33 | |||
34 | SEG_TEXT |
||
35 | |||
36 | #define M(i) REGOFF(i * 4, ECX) |
||
37 | #define STRIDE REGOFF(12, ESI) |
||
38 | |||
39 | |||
40 | ALIGNTEXT16 |
||
41 | GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals) |
||
42 | GLNAME(_mesa_3dnow_transform_normalize_normals): |
||
43 | |||
44 | #define FRAME_OFFSET 12 |
||
45 | |||
46 | PUSH_L ( EDI ) |
||
47 | PUSH_L ( ESI ) |
||
48 | PUSH_L ( EBP ) |
||
49 | |||
50 | MOV_L ( ARG_LENGTHS, EDI ) |
||
51 | MOV_L ( ARG_IN, ESI ) |
||
52 | MOV_L ( ARG_DEST, EAX ) |
||
53 | MOV_L ( REGOFF(V3F_COUNT, ESI), EBP ) /* dest->count = in->count */ |
||
54 | MOV_L ( EBP, REGOFF(V3F_COUNT, EAX) ) |
||
55 | MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */ |
||
56 | MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ |
||
57 | MOV_L ( ARG_MAT, ECX ) |
||
58 | MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ |
||
59 | |||
60 | CMP_L ( CONST(0), EBP ) /* count > 0 ?? */ |
||
61 | JE ( LLBL (G3TN_end) ) |
||
62 | |||
63 | MOV_L ( REGOFF (V3F_COUNT, ESI), EBP ) |
||
64 | FEMMS |
||
65 | |||
66 | PUSH_L ( EBP ) |
||
67 | PUSH_L ( EAX ) |
||
68 | PUSH_L ( EDX ) /* save counter & pointer for */ |
||
69 | /* the normalize pass */ |
||
70 | #undef FRAME_OFFSET |
||
71 | #define FRAME_OFFSET 24 |
||
72 | |||
73 | MOVQ ( M(0), MM3 ) /* m1 | m0 */ |
||
74 | MOVQ ( M(4), MM4 ) /* m5 | m4 */ |
||
75 | |||
76 | MOVD ( M(2), MM5 ) /* | m2 */ |
||
77 | PUNPCKLDQ ( M(6), MM5 ) /* m6 | m2 */ |
||
78 | |||
79 | MOVQ ( M(8), MM6 ) /* m9 | m8 */ |
||
80 | MOVQ ( M(10), MM7 ) /* | m10 */ |
||
81 | |||
82 | CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */ |
||
83 | JNE ( LLBL (G3TN_scale_end ) ) |
||
84 | |||
85 | MOVD ( ARG_SCALE, MM0 ) /* | scale */ |
||
86 | PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */ |
||
87 | |||
88 | PFMUL ( MM0, MM3 ) /* scale * m1 | scale * m0 */ |
||
89 | PFMUL ( MM0, MM4 ) /* scale * m5 | scale * m4 */ |
||
90 | PFMUL ( MM0, MM5 ) /* scale * m6 | scale * m2 */ |
||
91 | PFMUL ( MM0, MM6 ) /* scale * m9 | scale * m8 */ |
||
92 | PFMUL ( MM0, MM7 ) /* | scale * m10 */ |
||
93 | |||
94 | ALIGNTEXT32 |
||
95 | LLBL (G3TN_scale_end): |
||
96 | LLBL (G3TN_transform): |
||
97 | MOVQ ( REGIND (EDX), MM0 ) /* x1 | x0 */ |
||
98 | MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */ |
||
99 | |||
100 | MOVQ ( MM0, MM1 ) /* x1 | x0 */ |
||
101 | PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */ |
||
102 | |||
103 | PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */ |
||
104 | ADD_L ( CONST(16), EAX ) /* next r */ |
||
105 | |||
106 | PREFETCHW ( REGIND(EAX) ) |
||
107 | |||
108 | PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */ |
||
109 | PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */ |
||
110 | |||
111 | PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */ |
||
112 | PFADD ( MM2, MM0 ) /* x0*m4+x1*m5+x2*m6| x0*m0+...+x2**/ |
||
113 | |||
114 | MOVQ ( REGIND (EDX), MM1 ) /* x1 | x0 */ |
||
115 | MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */ |
||
116 | |||
117 | PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */ |
||
118 | MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */ |
||
119 | |||
120 | PFMUL ( MM7, MM2 ) /* | x2*m10 */ |
||
121 | PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */ |
||
122 | |||
123 | PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m*/ |
||
124 | ADD_L ( STRIDE, EDX ) /* next normal */ |
||
125 | |||
126 | PREFETCH ( REGIND(EDX) ) |
||
127 | |||
128 | MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */ |
||
129 | DEC_L ( EBP ) /* decrement normal counter */ |
||
130 | JA ( LLBL (G3TN_transform) ) |
||
131 | |||
132 | |||
133 | POP_L ( EDX ) /* end of transform --- */ |
||
134 | POP_L ( EAX ) /* now normalizing ... */ |
||
135 | POP_L ( EBP ) |
||
136 | |||
137 | CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */ |
||
138 | JE ( LLBL (G3TN_norm ) ) /* calculate lengths */ |
||
139 | |||
140 | |||
141 | ALIGNTEXT32 |
||
142 | LLBL (G3TN_norm_w_lengths): |
||
143 | |||
144 | PREFETCHW ( REGOFF(12,EAX) ) |
||
145 | |||
146 | MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ |
||
147 | MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */ |
||
148 | |||
149 | MOVD ( REGIND (EDI), MM3 ) /* | length (x) */ |
||
150 | PFMUL ( MM3, MM1 ) /* | x2 (normalize*/ |
||
151 | |||
152 | PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */ |
||
153 | PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalize*/ |
||
154 | |||
155 | ADD_L ( STRIDE, EDX ) /* next normal */ |
||
156 | ADD_L ( CONST(4), EDI ) /* next length */ |
||
157 | |||
158 | PREFETCH ( REGIND(EDI) ) |
||
159 | |||
160 | MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */ |
||
161 | MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */ |
||
162 | |||
163 | ADD_L ( CONST(16), EAX ) /* next r */ |
||
164 | DEC_L ( EBP ) /* decrement normal counter */ |
||
165 | |||
166 | JA ( LLBL (G3TN_norm_w_lengths) ) |
||
167 | JMP ( LLBL (G3TN_exit_3dnow) ) |
||
168 | |||
169 | ALIGNTEXT32 |
||
170 | LLBL (G3TN_norm): |
||
171 | |||
172 | PREFETCHW ( REGIND(EAX) ) |
||
173 | |||
174 | MOVQ ( MM0, MM3 ) /* x1 | x0 */ |
||
175 | MOVQ ( MM1, MM4 ) /* | x2 */ |
||
176 | |||
177 | PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */ |
||
178 | ADD_L ( CONST(16), EAX ) /* next r */ |
||
179 | |||
180 | PFMUL ( MM1, MM4 ) /* | x2*x2 */ |
||
181 | PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */ |
||
182 | |||
183 | PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1+x2**/ |
||
184 | PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */ |
||
185 | |||
186 | MOVQ ( MM5, MM4 ) |
||
187 | PUNPCKLDQ ( MM3, MM3 ) |
||
188 | |||
189 | DEC_L ( EBP ) /* decrement normal counter */ |
||
190 | PFMUL ( MM5, MM5 ) |
||
191 | |||
192 | PFRSQIT1 ( MM3, MM5 ) |
||
193 | PFRCPIT2 ( MM4, MM5 ) |
||
194 | |||
195 | PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalize*/ |
||
196 | |||
197 | MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */ |
||
198 | PFMUL ( MM5, MM1 ) /* | x2 (normalize*/ |
||
199 | |||
200 | MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */ |
||
201 | MOVQ ( REGIND (EAX), MM0 ) /* x1 | x0 */ |
||
202 | |||
203 | MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */ |
||
204 | JA ( LLBL (G3TN_norm) ) |
||
205 | |||
206 | LLBL (G3TN_exit_3dnow): |
||
207 | FEMMS |
||
208 | |||
209 | LLBL (G3TN_end): |
||
210 | POP_L ( EBP ) |
||
211 | POP_L ( ESI ) |
||
212 | POP_L ( EDI ) |
||
213 | RET |
||
214 | |||
215 | |||
216 | |||
217 | ALIGNTEXT16 |
||
218 | GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot) |
||
219 | GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot): |
||
220 | |||
221 | #undef FRAME_OFFSET |
||
222 | #define FRAME_OFFSET 12 |
||
223 | |||
224 | PUSH_L ( EDI ) |
||
225 | PUSH_L ( ESI ) |
||
226 | PUSH_L ( EBP ) |
||
227 | |||
228 | MOV_L ( ARG_LENGTHS, EDI ) |
||
229 | MOV_L ( ARG_IN, ESI ) |
||
230 | MOV_L ( ARG_DEST, EAX ) |
||
231 | MOV_L ( REGOFF(V3F_COUNT, ESI), EBP ) /* dest->count = in->count */ |
||
232 | MOV_L ( EBP, REGOFF(V3F_COUNT, EAX) ) |
||
233 | MOV_L ( ARG_MAT, ECX ) |
||
234 | MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ |
||
235 | MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ |
||
236 | MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */ |
||
237 | |||
238 | CMP_L ( CONST(0), EBP ) /* count > 0 ?? */ |
||
239 | JE ( LLBL (G3TNNR_end) ) |
||
240 | |||
241 | FEMMS |
||
242 | |||
243 | MOVD ( M(0), MM0 ) /* | m0 */ |
||
244 | PUNPCKLDQ ( M(5), MM0 ) /* m5 | m0 */ |
||
245 | |||
246 | MOVD ( M(10), MM2 ) /* | m10 */ |
||
247 | PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */ |
||
248 | |||
249 | CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */ |
||
250 | JNE ( LLBL (G3TNNR_scale_end ) ) |
||
251 | |||
252 | MOVD ( ARG_SCALE, MM7 ) /* | scale */ |
||
253 | PUNPCKLDQ ( MM7, MM7 ) /* scale | scale */ |
||
254 | |||
255 | PFMUL ( MM7, MM0 ) /* scale * m5 | scale * m0 */ |
||
256 | PFMUL ( MM7, MM2 ) /* scale * m10 | scale * m10 */ |
||
257 | |||
258 | ALIGNTEXT32 |
||
259 | LLBL (G3TNNR_scale_end): |
||
260 | CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */ |
||
261 | JE ( LLBL (G3TNNR_norm) ) /* need to calculate lengths */ |
||
262 | |||
263 | MOVD ( REGIND(EDI), MM3 ) /* | length (x) */ |
||
264 | |||
265 | |||
266 | ALIGNTEXT32 |
||
267 | LLBL (G3TNNR_norm_w_lengths): /* use precalculated lengths */ |
||
268 | |||
269 | PREFETCHW ( REGIND(EAX) ) |
||
270 | |||
271 | MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */ |
||
272 | MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */ |
||
273 | |||
274 | PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */ |
||
275 | ADD_L ( STRIDE, EDX ) /* next normal */ |
||
276 | |||
277 | PREFETCH ( REGIND(EDX) ) |
||
278 | |||
279 | PFMUL ( MM2, MM7 ) /* | x2*m10 */ |
||
280 | ADD_L ( CONST(16), EAX ) /* next r */ |
||
281 | |||
282 | PFMUL ( MM3, MM7 ) /* | x2 (normalized) */ |
||
283 | PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */ |
||
284 | |||
285 | ADD_L ( CONST(4), EDI ) /* next length */ |
||
286 | PFMUL ( MM3, MM6 ) /* x1 (normalized) | x0 (normalized) */ |
||
287 | |||
288 | DEC_L ( EBP ) /* decrement normal counter */ |
||
289 | MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */ |
||
290 | |||
291 | MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */ |
||
292 | MOVD ( REGIND(EDI), MM3 ) /* | length (x) */ |
||
293 | |||
294 | JA ( LLBL (G3TNNR_norm_w_lengths) ) |
||
295 | JMP ( LLBL (G3TNNR_exit_3dnow) ) |
||
296 | |||
297 | ALIGNTEXT32 |
||
298 | LLBL (G3TNNR_norm): /* need to calculate lengths */ |
||
299 | |||
300 | PREFETCHW ( REGIND(EAX) ) |
||
301 | |||
302 | MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */ |
||
303 | MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */ |
||
304 | |||
305 | PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */ |
||
306 | ADD_L ( CONST(16), EAX ) /* next r */ |
||
307 | |||
308 | PFMUL ( MM2, MM7 ) /* | x2*m10 */ |
||
309 | MOVQ ( MM6, MM3 ) /* x1 (transformed)| x0 (transformed) */ |
||
310 | |||
311 | MOVQ ( MM7, MM4 ) /* | x2 (transformed) */ |
||
312 | PFMUL ( MM6, MM3 ) /* x1*x1 | x0*x0 */ |
||
313 | |||
314 | |||
315 | PFMUL ( MM7, MM4 ) /* | x2*x2 */ |
||
316 | PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1 */ |
||
317 | |||
318 | PFADD ( MM4, MM3 ) /* | x0*x0+x1*x1+x2*x2*/ |
||
319 | ADD_L ( STRIDE, EDX ) /* next normal */ |
||
320 | |||
321 | PREFETCH ( REGIND(EDX) ) |
||
322 | |||
323 | PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */ |
||
324 | MOVQ ( MM5, MM4 ) |
||
325 | |||
326 | PUNPCKLDQ ( MM3, MM3 ) |
||
327 | PFMUL ( MM5, MM5 ) |
||
328 | |||
329 | PFRSQIT1 ( MM3, MM5 ) |
||
330 | DEC_L ( EBP ) /* decrement normal counter */ |
||
331 | |||
332 | PFRCPIT2 ( MM4, MM5 ) |
||
333 | PFMUL ( MM5, MM6 ) /* x1 (normalized) | x0 (normalized) */ |
||
334 | |||
335 | MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */ |
||
336 | PFMUL ( MM5, MM7 ) /* | x2 (normalized) */ |
||
337 | |||
338 | MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */ |
||
339 | JA ( LLBL (G3TNNR_norm) ) |
||
340 | |||
341 | |||
342 | LLBL (G3TNNR_exit_3dnow): |
||
343 | FEMMS |
||
344 | |||
345 | LLBL (G3TNNR_end): |
||
346 | POP_L ( EBP ) |
||
347 | POP_L ( ESI ) |
||
348 | POP_L ( EDI ) |
||
349 | RET |
||
350 | |||
351 | |||
352 | |||
353 | |||
354 | |||
355 | |||
356 | ALIGNTEXT16 |
||
357 | GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot) |
||
358 | GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot): |
||
359 | |||
360 | #undef FRAME_OFFSET |
||
361 | #define FRAME_OFFSET 12 |
||
362 | |||
363 | PUSH_L ( EDI ) |
||
364 | PUSH_L ( ESI ) |
||
365 | PUSH_L ( EBP ) |
||
366 | |||
367 | MOV_L ( ARG_IN, EAX ) |
||
368 | MOV_L ( ARG_DEST, EDX ) |
||
369 | MOV_L ( REGOFF(V3F_COUNT, EAX), EBP ) /* dest->count = in->count */ |
||
370 | MOV_L ( EBP, REGOFF(V3F_COUNT, EDX) ) |
||
371 | MOV_L ( ARG_IN, ESI ) |
||
372 | MOV_L ( ARG_MAT, ECX ) |
||
373 | MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ |
||
374 | MOV_L ( REGOFF(V3F_START, EDX), EAX ) /* dest->start */ |
||
375 | MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */ |
||
376 | |||
377 | CMP_L ( CONST(0), EBP ) |
||
378 | JE ( LLBL (G3TRNR_end) ) |
||
379 | |||
380 | FEMMS |
||
381 | |||
382 | MOVD ( ARG_SCALE, MM6 ) /* | scale */ |
||
383 | PUNPCKLDQ ( MM6, MM6 ) /* scale | scale */ |
||
384 | |||
385 | MOVD ( REGIND(ECX), MM0 ) /* | m0 */ |
||
386 | PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */ |
||
387 | |||
388 | PFMUL ( MM6, MM0 ) /* scale*m5 | scale*m0 */ |
||
389 | MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */ |
||
390 | |||
391 | PFMUL ( MM6, MM2 ) /* | scale*m10 */ |
||
392 | |||
393 | ALIGNTEXT32 |
||
394 | LLBL (G3TRNR_rescale): |
||
395 | |||
396 | PREFETCHW ( REGIND(EAX) ) |
||
397 | |||
398 | MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */ |
||
399 | MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */ |
||
400 | |||
401 | PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */ |
||
402 | ADD_L ( STRIDE, EDX ) /* next normal */ |
||
403 | |||
404 | PREFETCH ( REGIND(EDX) ) |
||
405 | |||
406 | PFMUL ( MM2, MM5 ) /* | x2*m10 */ |
||
407 | ADD_L ( CONST(16), EAX ) /* next r */ |
||
408 | |||
409 | DEC_L ( EBP ) /* decrement normal counter */ |
||
410 | MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */ |
||
411 | |||
412 | MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */ |
||
413 | JA ( LLBL (G3TRNR_rescale) ) /* cnt > 0 ? -> process next normal */ |
||
414 | |||
415 | FEMMS |
||
416 | |||
417 | LLBL (G3TRNR_end): |
||
418 | POP_L ( EBP ) |
||
419 | POP_L ( ESI ) |
||
420 | POP_L ( EDI ) |
||
421 | RET |
||
422 | |||
423 | |||
424 | |||
425 | |||
426 | |||
427 | ALIGNTEXT16 |
||
428 | GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals) |
||
429 | GLNAME(_mesa_3dnow_transform_rescale_normals): |
||
430 | |||
431 | #undef FRAME_OFFSET |
||
432 | #define FRAME_OFFSET 8 |
||
433 | |||
434 | PUSH_L ( EDI ) |
||
435 | PUSH_L ( ESI ) |
||
436 | |||
437 | MOV_L ( ARG_IN, ESI ) |
||
438 | MOV_L ( ARG_DEST, EAX ) |
||
439 | MOV_L ( ARG_MAT, ECX ) |
||
440 | MOV_L ( REGOFF(V3F_COUNT, ESI), EDI ) /* dest->count = in->count */ |
||
441 | MOV_L ( EDI, REGOFF(V3F_COUNT, EAX) ) |
||
442 | MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ |
||
443 | MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */ |
||
444 | MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ |
||
445 | |||
446 | CMP_L ( CONST(0), EDI ) |
||
447 | JE ( LLBL (G3TR_end) ) |
||
448 | |||
449 | FEMMS |
||
450 | |||
451 | MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */ |
||
452 | |||
453 | MOVQ ( REGOFF(16,ECX), MM4 ) /* m5 | m4 */ |
||
454 | MOVD ( ARG_SCALE, MM0 ) /* scale */ |
||
455 | |||
456 | MOVD ( REGOFF(8,ECX), MM5 ) /* | m2 */ |
||
457 | PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */ |
||
458 | |||
459 | PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) |
||
460 | PFMUL ( MM0, MM3 ) /* scale*m1 | scale*m0 */ |
||
461 | |||
462 | MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8*/ |
||
463 | PFMUL ( MM0, MM4 ) /* scale*m5 | scale*m4 */ |
||
464 | |||
465 | MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */ |
||
466 | PFMUL ( MM0, MM5 ) /* scale*m6 | scale*m2 */ |
||
467 | |||
468 | PFMUL ( MM0, MM6 ) /* scale*m9 | scale*m8 */ |
||
469 | |||
470 | PFMUL ( MM0, MM7 ) /* | scale*m10 */ |
||
471 | |||
472 | ALIGNTEXT32 |
||
473 | LLBL (G3TR_rescale): |
||
474 | |||
475 | PREFETCHW ( REGIND(EAX) ) |
||
476 | |||
477 | MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */ |
||
478 | MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */ |
||
479 | |||
480 | MOVQ ( MM0, MM1 ) /* x1 | x0 */ |
||
481 | PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */ |
||
482 | |||
483 | PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */ |
||
484 | ADD_L ( CONST(16), EAX ) /* next r */ |
||
485 | |||
486 | PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */ |
||
487 | PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */ |
||
488 | |||
489 | MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */ |
||
490 | |||
491 | PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */ |
||
492 | PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */ |
||
493 | |||
494 | MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */ |
||
495 | ADD_L ( STRIDE, EDX ) /* next normal */ |
||
496 | |||
497 | PREFETCH ( REGIND(EDX) ) |
||
498 | |||
499 | MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */ |
||
500 | PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */ |
||
501 | |||
502 | PFMUL ( MM7, MM2 ) /* | x2*m10 */ |
||
503 | PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */ |
||
504 | |||
505 | PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */ |
||
506 | MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */ |
||
507 | |||
508 | DEC_L ( EDI ) /* decrement normal counter */ |
||
509 | JA ( LLBL (G3TR_rescale) ) |
||
510 | |||
511 | FEMMS |
||
512 | |||
513 | LLBL (G3TR_end): |
||
514 | POP_L ( ESI ) |
||
515 | POP_L ( EDI ) |
||
516 | RET |
||
517 | |||
518 | |||
519 | |||
520 | |||
521 | |||
522 | |||
523 | |||
524 | ALIGNTEXT16 |
||
525 | GLOBL GLNAME(_mesa_3dnow_transform_normals_no_rot) |
||
526 | GLNAME(_mesa_3dnow_transform_normals_no_rot): |
||
527 | |||
528 | #undef FRAME_OFFSET |
||
529 | #define FRAME_OFFSET 8 |
||
530 | |||
531 | PUSH_L ( EDI ) |
||
532 | PUSH_L ( ESI ) |
||
533 | |||
534 | MOV_L ( ARG_IN, ESI ) |
||
535 | MOV_L ( ARG_DEST, EAX ) |
||
536 | MOV_L ( ARG_MAT, ECX ) |
||
537 | MOV_L ( REGOFF(V3F_COUNT, ESI), EDI ) /* dest->count = in->count */ |
||
538 | MOV_L ( EDI, REGOFF(V3F_COUNT, EAX) ) |
||
539 | MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ |
||
540 | MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */ |
||
541 | MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ |
||
542 | |||
543 | CMP_L ( CONST(0), EDI ) |
||
544 | JE ( LLBL (G3TNR_end) ) |
||
545 | |||
546 | FEMMS |
||
547 | |||
548 | MOVD ( REGIND(ECX), MM0 ) /* | m0 */ |
||
549 | PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */ |
||
550 | |||
551 | MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */ |
||
552 | PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */ |
||
553 | |||
554 | ALIGNTEXT32 |
||
555 | LLBL (G3TNR_transform): |
||
556 | |||
557 | PREFETCHW ( REGIND(EAX) ) |
||
558 | |||
559 | MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */ |
||
560 | MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */ |
||
561 | |||
562 | PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */ |
||
563 | ADD_L ( STRIDE, EDX) /* next normal */ |
||
564 | |||
565 | PREFETCH ( REGIND(EDX) ) |
||
566 | |||
567 | PFMUL ( MM2, MM5 ) /* | x2*m10 */ |
||
568 | ADD_L ( CONST(16), EAX ) /* next r */ |
||
569 | |||
570 | DEC_L ( EDI ) /* decrement normal counter */ |
||
571 | MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */ |
||
572 | |||
573 | MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */ |
||
574 | JA ( LLBL (G3TNR_transform) ) |
||
575 | |||
576 | FEMMS |
||
577 | |||
578 | LLBL (G3TNR_end): |
||
579 | POP_L ( ESI ) |
||
580 | POP_L ( EDI ) |
||
581 | RET |
||
582 | |||
583 | |||
584 | |||
585 | |||
586 | |||
587 | |||
588 | |||
589 | |||
590 | ALIGNTEXT16 |
||
591 | GLOBL GLNAME(_mesa_3dnow_transform_normals) |
||
592 | GLNAME(_mesa_3dnow_transform_normals): |
||
593 | |||
594 | #undef FRAME_OFFSET |
||
595 | #define FRAME_OFFSET 8 |
||
596 | |||
597 | PUSH_L ( EDI ) |
||
598 | PUSH_L ( ESI ) |
||
599 | |||
600 | MOV_L ( ARG_IN, ESI ) |
||
601 | MOV_L ( ARG_DEST, EAX ) |
||
602 | MOV_L ( ARG_MAT, ECX ) |
||
603 | MOV_L ( REGOFF(V3F_COUNT, ESI), EDI ) /* dest->count = in->count */ |
||
604 | MOV_L ( EDI, REGOFF(V3F_COUNT, EAX) ) |
||
605 | MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ |
||
606 | MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */ |
||
607 | MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ |
||
608 | |||
609 | CMP_L ( CONST(0), EDI ) /* count > 0 ?? */ |
||
610 | JE ( LLBL (G3T_end) ) |
||
611 | |||
612 | FEMMS |
||
613 | |||
614 | MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */ |
||
615 | MOVQ ( REGOFF(16, ECX), MM4 ) /* m5 | m4 */ |
||
616 | |||
617 | MOVD ( REGOFF(8, ECX), MM5 ) /* | m2 */ |
||
618 | PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) /* m6 | m2 */ |
||
619 | |||
620 | MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8 */ |
||
621 | MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */ |
||
622 | |||
623 | ALIGNTEXT32 |
||
624 | LLBL (G3T_transform): |
||
625 | |||
626 | PREFETCHW ( REGIND(EAX) ) |
||
627 | |||
628 | MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */ |
||
629 | MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */ |
||
630 | |||
631 | MOVQ ( MM0, MM1 ) /* x1 | x0 */ |
||
632 | PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */ |
||
633 | |||
634 | PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */ |
||
635 | ADD_L ( CONST(16), EAX ) /* next r */ |
||
636 | |||
637 | PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */ |
||
638 | PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */ |
||
639 | |||
640 | PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */ |
||
641 | PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */ |
||
642 | |||
643 | MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */ |
||
644 | MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */ |
||
645 | |||
646 | PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */ |
||
647 | MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */ |
||
648 | |||
649 | PFMUL ( MM7, MM2 ) /* | x2*m10 */ |
||
650 | ADD_L ( STRIDE, EDX ) /* next normal */ |
||
651 | |||
652 | PREFETCH ( REGIND(EDX) ) |
||
653 | |||
654 | PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */ |
||
655 | PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */ |
||
656 | |||
657 | MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */ |
||
658 | DEC_L ( EDI ) /* decrement normal counter */ |
||
659 | |||
660 | JA ( LLBL (G3T_transform) ) |
||
661 | |||
662 | FEMMS |
||
663 | |||
664 | LLBL (G3T_end): |
||
665 | POP_L ( ESI ) |
||
666 | POP_L ( EDI ) |
||
667 | RET |
||
668 | |||
669 | |||
670 | |||
671 | |||
672 | |||
673 | |||
674 | ALIGNTEXT16 |
||
675 | GLOBL GLNAME(_mesa_3dnow_normalize_normals) |
||
676 | GLNAME(_mesa_3dnow_normalize_normals): |
||
677 | |||
678 | #undef FRAME_OFFSET |
||
679 | #define FRAME_OFFSET 12 |
||
680 | |||
681 | PUSH_L ( EDI ) |
||
682 | PUSH_L ( ESI ) |
||
683 | PUSH_L ( EBP ) |
||
684 | |||
685 | MOV_L ( ARG_IN, ESI ) |
||
686 | MOV_L ( ARG_DEST, EAX ) |
||
687 | MOV_L ( REGOFF(V3F_COUNT, ESI), EBP ) /* dest->count = in->count */ |
||
688 | MOV_L ( EBP, REGOFF(V3F_COUNT, EAX) ) |
||
689 | MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ |
||
690 | MOV_L ( REGOFF(V3F_START, ESI), ECX ) /* in->start */ |
||
691 | MOV_L ( ARG_LENGTHS, EDX ) |
||
692 | |||
693 | CMP_L ( CONST(0), EBP ) /* count > 0 ?? */ |
||
694 | JE ( LLBL (G3N_end) ) |
||
695 | |||
696 | FEMMS |
||
697 | |||
698 | CMP_L ( CONST(0), EDX ) /* lengths == 0 ? */ |
||
699 | JE ( LLBL (G3N_norm2) ) /* calculate lengths */ |
||
700 | |||
701 | ALIGNTEXT32 |
||
702 | LLBL (G3N_norm1): /* use precalculated lengths */ |
||
703 | |||
704 | PREFETCH ( REGIND(EAX) ) |
||
705 | |||
706 | MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */ |
||
707 | MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */ |
||
708 | |||
709 | MOVD ( REGIND(EDX), MM3 ) /* | length (x) */ |
||
710 | PFMUL ( MM3, MM1 ) /* | x2 (normalized) */ |
||
711 | |||
712 | PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */ |
||
713 | ADD_L ( STRIDE, ECX ) /* next normal */ |
||
714 | |||
715 | PREFETCH ( REGIND(ECX) ) |
||
716 | |||
717 | PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalized) */ |
||
718 | MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */ |
||
719 | |||
720 | MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */ |
||
721 | ADD_L ( CONST(16), EAX ) /* next r */ |
||
722 | |||
723 | ADD_L ( CONST(4), EDX ) /* next length */ |
||
724 | DEC_L ( EBP ) /* decrement normal counter */ |
||
725 | |||
726 | JA ( LLBL (G3N_norm1) ) |
||
727 | |||
728 | JMP ( LLBL (G3N_end1) ) |
||
729 | |||
730 | ALIGNTEXT32 |
||
731 | LLBL (G3N_norm2): /* need to calculate lengths */ |
||
732 | |||
733 | PREFETCHW ( REGIND(EAX) ) |
||
734 | |||
735 | MOVQ ( MM0, MM3 ) /* x1 | x0 */ |
||
736 | ADD_L ( STRIDE, ECX ) /* next normal */ |
||
737 | |||
738 | PREFETCH ( REGIND(ECX) ) |
||
739 | |||
740 | MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */ |
||
741 | MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */ |
||
742 | |||
743 | PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */ |
||
744 | MOVQ ( MM1, MM4 ) /* | x2 */ |
||
745 | |||
746 | ADD_L ( CONST(16), EAX ) /* next r */ |
||
747 | PFMUL ( MM1, MM4 ) /* | x2*x2 */ |
||
748 | |||
749 | PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */ |
||
750 | PFACC ( MM3, MM3 ) /* x0*x0+...+x2*x2 | x0*x0+x1*x1+x2*x2*/ |
||
751 | |||
752 | PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */ |
||
753 | MOVQ ( MM5, MM4 ) |
||
754 | |||
755 | PUNPCKLDQ ( MM3, MM3 ) |
||
756 | PFMUL ( MM5, MM5 ) |
||
757 | |||
758 | PFRSQIT1 ( MM3, MM5 ) |
||
759 | DEC_L ( EBP ) /* decrement normal counter */ |
||
760 | |||
761 | PFRCPIT2 ( MM4, MM5 ) |
||
762 | |||
763 | PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalized) */ |
||
764 | MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */ |
||
765 | |||
766 | PFMUL ( MM5, MM1 ) /* | x2 (normalized) */ |
||
767 | MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */ |
||
768 | |||
769 | JA ( LLBL (G3N_norm2) ) |
||
770 | |||
771 | LLBL (G3N_end1): |
||
772 | FEMMS |
||
773 | |||
774 | LLBL (G3N_end): |
||
775 | POP_L ( EBP ) |
||
776 | POP_L ( ESI ) |
||
777 | POP_L ( EDI ) |
||
778 | RET |
||
779 | |||
780 | |||
781 | |||
782 | |||
783 | |||
784 | |||
785 | ALIGNTEXT16 |
||
786 | GLOBL GLNAME(_mesa_3dnow_rescale_normals) |
||
787 | GLNAME(_mesa_3dnow_rescale_normals): |
||
788 | |||
789 | #undef FRAME_OFFSET |
||
790 | #define FRAME_OFFSET 8 |
||
791 | PUSH_L ( EDI ) |
||
792 | PUSH_L ( ESI ) |
||
793 | |||
794 | MOV_L ( ARG_IN, ESI ) |
||
795 | MOV_L ( ARG_DEST, EAX ) |
||
796 | MOV_L ( REGOFF(V3F_COUNT, ESI), EDX ) /* dest->count = in->count */ |
||
797 | MOV_L ( EDX, REGOFF(V3F_COUNT, EAX) ) |
||
798 | MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ |
||
799 | MOV_L ( REGOFF(V3F_START, ESI), ECX ) /* in->start */ |
||
800 | |||
801 | CMP_L ( CONST(0), EDX ) |
||
802 | JE ( LLBL (G3R_end) ) |
||
803 | |||
804 | FEMMS |
||
805 | |||
806 | MOVD ( ARG_SCALE, MM0 ) /* scale */ |
||
807 | PUNPCKLDQ ( MM0, MM0 ) |
||
808 | |||
809 | ALIGNTEXT32 |
||
810 | LLBL (G3R_rescale): |
||
811 | |||
812 | PREFETCHW ( REGIND(EAX) ) |
||
813 | |||
814 | MOVQ ( REGIND(ECX), MM1 ) /* x1 | x0 */ |
||
815 | MOVD ( REGOFF(8, ECX), MM2 ) /* | x2 */ |
||
816 | |||
817 | PFMUL ( MM0, MM1 ) /* x1*scale | x0*scale */ |
||
818 | ADD_L ( STRIDE, ECX ) /* next normal */ |
||
819 | |||
820 | PREFETCH ( REGIND(ECX) ) |
||
821 | |||
822 | PFMUL ( MM0, MM2 ) /* | x2*scale */ |
||
823 | ADD_L ( CONST(16), EAX ) /* next r */ |
||
824 | |||
825 | MOVQ ( MM1, REGOFF(-16, EAX) ) /* write r0, r1 */ |
||
826 | MOVD ( MM2, REGOFF(-8, EAX) ) /* write r2 */ |
||
827 | |||
828 | DEC_L ( EDX ) /* decrement normal counter */ |
||
829 | JA ( LLBL (G3R_rescale) ) |
||
830 | |||
831 | FEMMS |
||
832 | |||
833 | LLBL (G3R_end): |
||
834 | POP_L ( ESI ) |
||
835 | POP_L ( EDI ) |
||
836 | RET |