Subversion Repositories shark

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
57 pj 1
/* $Id: 3dnow_normal.s,v 1.1 2003-02-28 11:49:38 pj Exp $ */
2
 
3
/*
4
 * Mesa 3-D graphics library
5
 * Version:  4.1
6
 *
7
 * Copyright (C) 1999-2002  Brian Paul   All Rights Reserved.
8
 *
9
 * Permission is hereby granted, free of charge, to any person obtaining a
10
 * copy of this software and associated documentation files (the "Software"),
11
 * to deal in the Software without restriction, including without limitation
12
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13
 * and/or sell copies of the Software, and to permit persons to whom the
14
 * Software is furnished to do so, subject to the following conditions:
15
 *
16
 * The above copyright notice and this permission notice shall be included
17
 * in all copies or substantial portions of the Software.
18
 *
19
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22
 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
23
 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
 */
26
 
27
/*
28
 * 3Dnow assembly code by Holger Waechtler
29
 */
30
 
31
#include "matypes.h"
32
#include "norm_args.h"
33
 
34
        SEG_TEXT
35
 
36
#define M(i)    REGOFF(i * 4, ECX)
37
#define STRIDE  REGOFF(12, ESI)
38
 
39
 
40
ALIGNTEXT16
41
GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals)
42
GLNAME(_mesa_3dnow_transform_normalize_normals):
43
 
44
 #define FRAME_OFFSET 12
45
 
46
    PUSH_L     ( EDI )
47
    PUSH_L     ( ESI )
48
    PUSH_L     ( EBP )
49
 
50
    MOV_L      ( ARG_LENGTHS, EDI )
51
    MOV_L      ( ARG_IN, ESI )
52
    MOV_L      ( ARG_DEST, EAX )
53
    MOV_L      ( REGOFF(V3F_COUNT, ESI), EBP ) /*  dest->count = in->count   */
54
    MOV_L      ( EBP, REGOFF(V3F_COUNT, EAX) )
55
    MOV_L      ( REGOFF(V3F_START, ESI), EDX ) /*  in->start    */
56
    MOV_L      ( REGOFF(V3F_START, EAX), EAX ) /*  dest->start  */
57
    MOV_L      ( ARG_MAT, ECX )
58
    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
59
 
60
    CMP_L      ( CONST(0), EBP )        /*   count > 0 ??  */
61
    JE         ( LLBL (G3TN_end) )
62
 
63
    MOV_L      ( REGOFF (V3F_COUNT, ESI), EBP )
64
    FEMMS
65
 
66
    PUSH_L     ( EBP )
67
    PUSH_L     ( EAX )
68
    PUSH_L     ( EDX )                  /*  save counter & pointer for   */
69
                                        /*  the normalize pass           */
70
 #undef  FRAME_OFFSET
71
 #define FRAME_OFFSET 24
72
 
73
    MOVQ       ( M(0), MM3 )            /*  m1              | m0         */
74
    MOVQ       ( M(4), MM4 )            /*  m5              | m4         */
75
 
76
    MOVD       ( M(2), MM5 )            /*                  | m2         */
77
    PUNPCKLDQ  ( M(6), MM5 )            /*  m6              | m2         */
78
 
79
    MOVQ       ( M(8), MM6 )            /*  m9              | m8         */
80
    MOVQ       ( M(10), MM7 )           /*                  | m10        */
81
 
82
    CMP_L      ( CONST(0), EDI )        /*  lengths == 0 ?                 */
83
    JNE        ( LLBL (G3TN_scale_end ) )
84
 
85
    MOVD       ( ARG_SCALE, MM0 )       /*               | scale           */
86
    PUNPCKLDQ  ( MM0, MM0 )             /* scale         | scale           */
87
 
88
    PFMUL      ( MM0, MM3 )             /* scale * m1    | scale * m0      */
89
    PFMUL      ( MM0, MM4 )             /* scale * m5    | scale * m4      */
90
    PFMUL      ( MM0, MM5 )             /* scale * m6    | scale * m2      */
91
    PFMUL      ( MM0, MM6 )             /* scale * m9    | scale * m8      */
92
    PFMUL      ( MM0, MM7 )             /*               | scale * m10     */
93
 
94
ALIGNTEXT32
95
LLBL (G3TN_scale_end):
96
LLBL (G3TN_transform):
97
    MOVQ       ( REGIND (EDX), MM0 )    /*  x1              | x0         */
98
    MOVD       ( REGOFF (8, EDX), MM2 ) /*                  | x2         */
99
 
100
    MOVQ       ( MM0, MM1 )             /*  x1              | x0           */
101
    PUNPCKLDQ  ( MM2, MM2 )             /*  x2              | x2           */
102
 
103
    PFMUL      ( MM3, MM0 )             /*  x1*m1           | x0*m0        */
104
    ADD_L      ( CONST(16), EAX )       /*  next r                         */
105
 
106
    PREFETCHW  ( REGIND(EAX) )
107
 
108
    PFMUL      ( MM4, MM1 )             /*  x1*m5           | x0*m4        */
109
    PFACC      ( MM1, MM0 )             /*  x0*m4+x1*m5     | x0*m0+x1*m1  */
110
 
111
    PFMUL      ( MM5, MM2 )             /*  x2*m6           | x2*m2        */
112
    PFADD      ( MM2, MM0 )             /* x0*m4+x1*m5+x2*m6| x0*m0+...+x2**/
113
 
114
    MOVQ       ( REGIND (EDX), MM1 )    /*  x1           | x0              */
115
    MOVQ       ( MM0, REGOFF(-16, EAX) ) /* write r0, r1                   */
116
 
117
    PFMUL      ( MM6, MM1 )             /* x1*m9         | x0*m8           */
118
    MOVD       ( REGOFF (8, EDX), MM2 ) /*               | x2              */
119
 
120
    PFMUL      ( MM7, MM2 )             /*               | x2*m10          */
121
    PFACC      ( MM1, MM1 )             /*  *not used*   | x0*m8+x1*m9     */
122
 
123
    PFADD      ( MM2, MM1 )             /*  *not used*   | x0*m8+x1*m9+x2*m*/
124
    ADD_L      ( STRIDE, EDX )          /*  next normal                    */
125
 
126
    PREFETCH   ( REGIND(EDX) )
127
 
128
    MOVD       ( MM1, REGOFF(-8, EAX) ) /*  write r2                       */
129
    DEC_L      ( EBP )                  /*  decrement normal counter       */
130
    JA         ( LLBL (G3TN_transform) )
131
 
132
 
133
    POP_L      ( EDX )                  /*  end of transform ---           */
134
    POP_L      ( EAX )                  /*    now normalizing ...          */
135
    POP_L      ( EBP )
136
 
137
    CMP_L      ( CONST(0), EDI )        /*  lengths == 0 ?                 */
138
    JE         ( LLBL (G3TN_norm ) )    /*  calculate lengths              */
139
 
140
 
141
ALIGNTEXT32
142
LLBL (G3TN_norm_w_lengths):
143
 
144
    PREFETCHW  ( REGOFF(12,EAX) )
145
 
146
    MOVQ       ( REGIND(EAX), MM0 )     /*  x1              | x0           */
147
    MOVD       ( REGOFF(8, EAX), MM1 )  /*                  | x2           */
148
 
149
    MOVD       ( REGIND (EDI), MM3 )    /*                  | length (x)   */
150
    PFMUL      ( MM3, MM1 )             /*                  | x2 (normalize*/
151
 
152
    PUNPCKLDQ  ( MM3, MM3 )             /*  length (x)      | length (x)   */
153
    PFMUL      ( MM3, MM0 )             /*  x1 (normalized) | x0 (normalize*/
154
 
155
    ADD_L      ( STRIDE, EDX )          /*  next normal                    */
156
    ADD_L      ( CONST(4), EDI )        /*  next length                    */
157
 
158
    PREFETCH   ( REGIND(EDI) )
159
 
160
    MOVQ       ( MM0, REGIND(EAX) )     /*  write new x0, x1               */
161
    MOVD       ( MM1, REGOFF(8, EAX) )  /*  write new x2                   */
162
 
163
    ADD_L      ( CONST(16), EAX )       /*  next r                         */
164
    DEC_L      ( EBP )                  /*  decrement normal counter       */
165
 
166
    JA         ( LLBL (G3TN_norm_w_lengths) )
167
    JMP        ( LLBL (G3TN_exit_3dnow) )
168
 
169
ALIGNTEXT32
170
LLBL (G3TN_norm):
171
 
172
    PREFETCHW  ( REGIND(EAX) )
173
 
174
    MOVQ       ( MM0, MM3 )             /*  x1              | x0           */
175
    MOVQ       ( MM1, MM4 )             /*                  | x2           */
176
 
177
    PFMUL      ( MM0, MM3 )             /*  x1*x1           | x0*x0        */
178
    ADD_L      ( CONST(16), EAX )       /*  next r                         */
179
 
180
    PFMUL      ( MM1, MM4 )             /*                  | x2*x2        */
181
    PFADD      ( MM4, MM3 )             /*                  | x0*x0+x2*x2  */
182
 
183
    PFACC      ( MM3, MM3 )             /* **not used**    | x0*x0+x1*x1+x2**/
184
    PFRSQRT    ( MM3, MM5 )             /*  1/sqrt (x0*x0+x1*x1+x2*x2)     */
185
 
186
    MOVQ       ( MM5, MM4 )
187
    PUNPCKLDQ  ( MM3, MM3 )
188
 
189
    DEC_L      ( EBP )                  /*  decrement normal counter       */
190
    PFMUL      ( MM5, MM5 )
191
 
192
    PFRSQIT1   ( MM3, MM5 )
193
    PFRCPIT2   ( MM4, MM5 )
194
 
195
    PFMUL      ( MM5, MM0 )             /*  x1 (normalized) | x0 (normalize*/
196
 
197
    MOVQ       ( MM0, REGOFF(-16, EAX) ) /*  write new x0, x1              */
198
    PFMUL      ( MM5, MM1 )             /*                 | x2 (normalize*/
199
 
200
    MOVD       ( MM1, REGOFF(-8, EAX) ) /*  write new x2                  */
201
    MOVQ       ( REGIND (EAX), MM0 )    /*  x1             | x0           */
202
 
203
    MOVD       ( REGOFF(8, EAX), MM1 )  /*                 | x2           */
204
    JA         ( LLBL (G3TN_norm) )
205
 
206
LLBL (G3TN_exit_3dnow):
207
    FEMMS
208
 
209
LLBL (G3TN_end):
210
    POP_L      ( EBP )
211
    POP_L      ( ESI )
212
    POP_L      ( EDI )
213
    RET
214
 
215
 
216
 
217
ALIGNTEXT16
218
GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot)
219
GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot):
220
 
221
 #undef FRAME_OFFSET
222
 #define FRAME_OFFSET 12
223
 
224
    PUSH_L     ( EDI )
225
    PUSH_L     ( ESI )
226
    PUSH_L     ( EBP )
227
 
228
    MOV_L      ( ARG_LENGTHS, EDI )
229
    MOV_L      ( ARG_IN, ESI )
230
    MOV_L      ( ARG_DEST, EAX )
231
    MOV_L      ( REGOFF(V3F_COUNT, ESI), EBP ) /*  dest->count = in->count   */
232
    MOV_L      ( EBP, REGOFF(V3F_COUNT, EAX) )
233
    MOV_L      ( ARG_MAT, ECX )
234
    MOV_L      ( REGOFF(V3F_START, EAX), EAX ) /*  dest->start  */
235
    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
236
    MOV_L      ( REGOFF(V3F_START, ESI), EDX ) /*  in->start    */
237
 
238
    CMP_L      ( CONST(0), EBP ) /*   count > 0 ??  */
239
    JE         ( LLBL (G3TNNR_end) )
240
 
241
    FEMMS
242
 
243
    MOVD       ( M(0), MM0 )            /*               | m0                 */
244
    PUNPCKLDQ  ( M(5), MM0 )            /* m5            | m0                 */
245
 
246
    MOVD       ( M(10), MM2 )           /*               | m10                */
247
    PUNPCKLDQ  ( MM2, MM2 )             /* m10           | m10                */
248
 
249
    CMP_L      ( CONST(0), EDI )        /*  lengths == 0 ?                    */
250
    JNE        ( LLBL (G3TNNR_scale_end ) )
251
 
252
    MOVD       ( ARG_SCALE, MM7 )       /*               | scale              */
253
    PUNPCKLDQ  ( MM7, MM7 )             /* scale         | scale              */
254
 
255
    PFMUL      ( MM7, MM0 )             /* scale * m5    | scale * m0         */
256
    PFMUL      ( MM7, MM2 )             /* scale * m10   | scale * m10        */
257
 
258
ALIGNTEXT32
259
LLBL (G3TNNR_scale_end):
260
    CMP_L      ( CONST(0), EDI )        /* lengths == 0 ?                     */
261
    JE         ( LLBL (G3TNNR_norm) )   /* need to calculate lengths          */
262
 
263
    MOVD       ( REGIND(EDI), MM3 )     /*                 | length (x)       */
264
 
265
 
266
ALIGNTEXT32
267
LLBL (G3TNNR_norm_w_lengths):           /* use precalculated lengths          */
268
 
269
    PREFETCHW  ( REGIND(EAX) )
270
 
271
    MOVQ       ( REGIND(EDX), MM6 )     /* x1            | x0                 */
272
    MOVD       ( REGOFF(8, EDX), MM7 )  /*               | x2                 */
273
 
274
    PFMUL      ( MM0, MM6 )             /* x1*m5         | x0*m0              */
275
    ADD_L      ( STRIDE, EDX )          /* next normal                        */
276
 
277
    PREFETCH   ( REGIND(EDX) )
278
 
279
    PFMUL      ( MM2, MM7 )             /*               | x2*m10             */
280
    ADD_L      ( CONST(16), EAX )       /* next r                             */
281
 
282
    PFMUL      ( MM3, MM7 )             /*               | x2 (normalized)  */
283
    PUNPCKLDQ  ( MM3, MM3 )             /* length (x)    | length (x)       */
284
 
285
    ADD_L      ( CONST(4), EDI )        /* next length                        */
286
    PFMUL      ( MM3, MM6 )             /* x1 (normalized) | x0 (normalized)  */
287
 
288
    DEC_L      ( EBP )                  /* decrement normal counter           */
289
    MOVQ       ( MM6, REGOFF(-16, EAX) ) /* write r0, r1                      */
290
 
291
    MOVD       ( MM7, REGOFF(-8, EAX) ) /* write r2                           */
292
    MOVD       ( REGIND(EDI), MM3 )     /*                 | length (x)       */
293
 
294
    JA         ( LLBL (G3TNNR_norm_w_lengths) )
295
    JMP        ( LLBL (G3TNNR_exit_3dnow) )
296
 
297
ALIGNTEXT32
298
LLBL (G3TNNR_norm):                     /* need to calculate lengths          */
299
 
300
    PREFETCHW  ( REGIND(EAX) )
301
 
302
    MOVQ       ( REGIND(EDX), MM6 )     /* x1              | x0               */
303
    MOVD       ( REGOFF(8, EDX), MM7 )  /*                 | x2               */
304
 
305
    PFMUL      ( MM0, MM6 )             /* x1*m5           | x0*m0            */
306
    ADD_L      ( CONST(16), EAX )       /* next r                             */
307
 
308
    PFMUL      ( MM2, MM7 )             /*                 | x2*m10           */
309
    MOVQ       ( MM6, MM3 )             /* x1 (transformed)| x0 (transformed) */
310
 
311
    MOVQ       ( MM7, MM4 )             /*                 | x2 (transformed) */
312
    PFMUL      ( MM6, MM3 )             /* x1*x1           | x0*x0            */
313
 
314
 
315
    PFMUL      ( MM7, MM4 )             /*                 | x2*x2            */
316
    PFACC      ( MM3, MM3 )             /* **not used**    | x0*x0+x1*x1      */
317
 
318
    PFADD      ( MM4, MM3 )             /*                 | x0*x0+x1*x1+x2*x2*/
319
    ADD_L      ( STRIDE, EDX )          /* next normal            */
320
 
321
    PREFETCH   ( REGIND(EDX) )
322
 
323
    PFRSQRT    ( MM3, MM5 )             /* 1/sqrt (x0*x0+x1*x1+x2*x2)         */
324
    MOVQ       ( MM5, MM4 )
325
 
326
    PUNPCKLDQ  ( MM3, MM3 )
327
    PFMUL      ( MM5, MM5 )
328
 
329
    PFRSQIT1   ( MM3, MM5 )
330
    DEC_L      ( EBP )                  /* decrement normal counter           */
331
 
332
    PFRCPIT2   ( MM4, MM5 )
333
    PFMUL      ( MM5, MM6 )             /* x1 (normalized) | x0 (normalized)  */
334
 
335
    MOVQ       ( MM6, REGOFF(-16, EAX) ) /* write r0, r1                      */
336
    PFMUL      ( MM5, MM7 )             /*                 | x2 (normalized)  */
337
 
338
    MOVD       ( MM7, REGOFF(-8, EAX) ) /* write r2                           */
339
    JA         ( LLBL (G3TNNR_norm) )
340
 
341
 
342
LLBL (G3TNNR_exit_3dnow):
343
    FEMMS
344
 
345
LLBL (G3TNNR_end):
346
    POP_L      ( EBP )
347
    POP_L      ( ESI )
348
    POP_L      ( EDI )
349
    RET
350
 
351
 
352
 
353
 
354
 
355
 
356
ALIGNTEXT16
357
GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot)
358
GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot):
359
 
360
 #undef FRAME_OFFSET
361
 #define FRAME_OFFSET 12
362
 
363
    PUSH_L     ( EDI )
364
    PUSH_L     ( ESI )
365
    PUSH_L     ( EBP )
366
 
367
    MOV_L      ( ARG_IN, EAX )
368
    MOV_L      ( ARG_DEST, EDX )
369
    MOV_L      ( REGOFF(V3F_COUNT, EAX), EBP ) /*  dest->count = in->count   */
370
    MOV_L      ( EBP, REGOFF(V3F_COUNT, EDX) )
371
    MOV_L      ( ARG_IN, ESI )
372
    MOV_L      ( ARG_MAT, ECX )
373
    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
374
    MOV_L      ( REGOFF(V3F_START, EDX), EAX ) /*  dest->start  */
375
    MOV_L      ( REGOFF(V3F_START, ESI), EDX ) /*  in->start    */
376
 
377
    CMP_L      ( CONST(0), EBP )
378
    JE         ( LLBL (G3TRNR_end) )
379
 
380
    FEMMS
381
 
382
    MOVD       ( ARG_SCALE, MM6 )       /*               | scale              */
383
    PUNPCKLDQ  ( MM6, MM6 )             /* scale         | scale              */
384
 
385
    MOVD       ( REGIND(ECX), MM0 )     /*               | m0                 */
386
    PUNPCKLDQ  ( REGOFF(20, ECX), MM0 ) /* m5            | m0                 */
387
 
388
    PFMUL      ( MM6, MM0 )             /* scale*m5      | scale*m0           */
389
    MOVD       ( REGOFF(40, ECX), MM2 ) /*               | m10                */
390
 
391
    PFMUL      ( MM6, MM2 )             /*               | scale*m10          */
392
 
393
ALIGNTEXT32
394
LLBL (G3TRNR_rescale):
395
 
396
    PREFETCHW  ( REGIND(EAX) )
397
 
398
    MOVQ       ( REGIND(EDX), MM4 )     /* x1            | x0                 */
399
    MOVD       ( REGOFF(8, EDX), MM5 )  /*               | x2                 */
400
 
401
    PFMUL      ( MM0, MM4 )             /* x1*m5         | x0*m0              */
402
    ADD_L      ( STRIDE, EDX )          /* next normal                        */
403
 
404
    PREFETCH   ( REGIND(EDX) )
405
 
406
    PFMUL      ( MM2, MM5 )             /*               | x2*m10             */
407
    ADD_L      ( CONST(16), EAX )       /* next r                             */
408
 
409
    DEC_L      ( EBP )                  /* decrement normal counter           */
410
    MOVQ       ( MM4, REGOFF(-16, EAX) ) /* write r0, r1                      */
411
 
412
    MOVD       ( MM5, REGOFF(-8, EAX) ) /* write r2                           */
413
    JA         ( LLBL (G3TRNR_rescale) ) /* cnt > 0 ? -> process next normal  */
414
 
415
    FEMMS
416
 
417
LLBL (G3TRNR_end):
418
    POP_L      ( EBP )
419
    POP_L      ( ESI )
420
    POP_L      ( EDI )
421
    RET
422
 
423
 
424
 
425
 
426
 
427
ALIGNTEXT16
428
GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals)
429
GLNAME(_mesa_3dnow_transform_rescale_normals):
430
 
431
 #undef  FRAME_OFFSET
432
 #define FRAME_OFFSET 8
433
 
434
    PUSH_L     ( EDI )
435
    PUSH_L     ( ESI )
436
 
437
    MOV_L      ( ARG_IN, ESI )
438
    MOV_L      ( ARG_DEST, EAX )
439
    MOV_L      ( ARG_MAT, ECX )
440
    MOV_L      ( REGOFF(V3F_COUNT, ESI), EDI ) /*  dest->count = in->count   */
441
    MOV_L      ( EDI, REGOFF(V3F_COUNT, EAX) )
442
    MOV_L      ( REGOFF(V3F_START, EAX), EAX ) /*  dest->start  */
443
    MOV_L      ( REGOFF(V3F_START, ESI), EDX ) /*  in->start    */
444
    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
445
 
446
    CMP_L      ( CONST(0), EDI )
447
    JE         ( LLBL (G3TR_end) )
448
 
449
    FEMMS
450
 
451
    MOVQ       ( REGIND(ECX), MM3 )     /* m1            | m0                 */
452
 
453
    MOVQ       ( REGOFF(16,ECX), MM4 )  /* m5            | m4                 */
454
    MOVD       ( ARG_SCALE, MM0 )       /* scale       */
455
 
456
    MOVD       ( REGOFF(8,ECX), MM5 )   /*               | m2                 */
457
    PUNPCKLDQ  ( MM0, MM0 )             /* scale         | scale              */
458
 
459
    PUNPCKLDQ  ( REGOFF(24, ECX), MM5 )
460
    PFMUL      ( MM0, MM3 )             /* scale*m1      | scale*m0           */
461
 
462
    MOVQ       ( REGOFF(32, ECX), MM6 ) /* m9            | m8*/
463
    PFMUL      ( MM0, MM4 )             /* scale*m5      | scale*m4           */
464
 
465
    MOVD       ( REGOFF(40, ECX), MM7 ) /*               | m10                */
466
    PFMUL      ( MM0, MM5 )             /* scale*m6      | scale*m2           */
467
 
468
    PFMUL      ( MM0, MM6 )             /* scale*m9      | scale*m8           */
469
 
470
    PFMUL      ( MM0, MM7 )             /*               | scale*m10          */
471
 
472
ALIGNTEXT32
473
LLBL (G3TR_rescale):
474
 
475
    PREFETCHW  ( REGIND(EAX) )
476
 
477
    MOVQ       ( REGIND(EDX), MM0 )     /* x1            | x0                 */
478
    MOVD       ( REGOFF(8, EDX), MM2 )  /*               | x2                 */
479
 
480
    MOVQ       ( MM0, MM1 )             /* x1            | x0                 */
481
    PUNPCKLDQ  ( MM2, MM2 )             /* x2            | x2                 */
482
 
483
    PFMUL      ( MM3, MM0 )             /* x1*m1         | x0*m0              */
484
    ADD_L      ( CONST(16), EAX )       /* next r                             */
485
 
486
    PFMUL      ( MM4, MM1 )             /* x1*m5         | x0*m4              */
487
    PFACC      ( MM1, MM0 )             /* x0*m4+x1*m5   | x0*m0+x1*m1        */
488
 
489
    MOVQ       ( REGIND(EDX), MM1 )     /* x1            | x0                 */
490
 
491
    PFMUL      ( MM5, MM2 )             /* x2*m6         | x2*m2              */
492
    PFADD      ( MM2, MM0 )             /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2  */
493
 
494
    MOVD       ( REGOFF(8, EDX), MM2 )  /*               | x2                 */
495
    ADD_L      ( STRIDE, EDX )          /* next normal                    */
496
 
497
    PREFETCH   ( REGIND(EDX) )
498
 
499
    MOVQ       ( MM0, REGOFF(-16, EAX) ) /* write r0, r1                      */
500
    PFMUL      ( MM6, MM1 )             /* x1*m9         | x0*m8              */
501
 
502
    PFMUL      ( MM7, MM2 )             /*               | x2*m10             */
503
    PFACC      ( MM1, MM1 )             /* *not used*    | x0*m8+x1*m9        */
504
 
505
    PFADD      ( MM2, MM1 )             /* *not used*    | x0*m8+x1*m9+x2*m10 */
506
    MOVD       ( MM1, REGOFF(-8, EAX) ) /* write r2                           */
507
 
508
    DEC_L      ( EDI )                  /* decrement normal counter           */
509
    JA         ( LLBL (G3TR_rescale) )
510
 
511
    FEMMS
512
 
513
LLBL (G3TR_end):
514
    POP_L       ( ESI )
515
    POP_L       ( EDI )
516
    RET
517
 
518
 
519
 
520
 
521
 
522
 
523
 
524
ALIGNTEXT16
525
GLOBL GLNAME(_mesa_3dnow_transform_normals_no_rot)
526
GLNAME(_mesa_3dnow_transform_normals_no_rot):
527
 
528
 #undef  FRAME_OFFSET
529
 #define FRAME_OFFSET 8
530
 
531
    PUSH_L     ( EDI )
532
    PUSH_L     ( ESI )
533
 
534
    MOV_L      ( ARG_IN, ESI )
535
    MOV_L      ( ARG_DEST, EAX )
536
    MOV_L      ( ARG_MAT, ECX )
537
    MOV_L      ( REGOFF(V3F_COUNT, ESI), EDI ) /*  dest->count = in->count   */
538
    MOV_L      ( EDI, REGOFF(V3F_COUNT, EAX) )
539
    MOV_L      ( REGOFF(V3F_START, EAX), EAX ) /*  dest->start  */
540
    MOV_L      ( REGOFF(V3F_START, ESI), EDX ) /*  in->start    */
541
    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
542
 
543
    CMP_L      ( CONST(0), EDI )
544
    JE         ( LLBL (G3TNR_end) )
545
 
546
    FEMMS
547
 
548
    MOVD       ( REGIND(ECX), MM0 )     /*               | m0                 */
549
    PUNPCKLDQ  ( REGOFF(20, ECX), MM0 ) /* m5            | m0                 */
550
 
551
    MOVD       ( REGOFF(40, ECX), MM2 ) /*               | m10                */
552
    PUNPCKLDQ  ( MM2, MM2 )             /* m10           | m10                */
553
 
554
ALIGNTEXT32
555
LLBL (G3TNR_transform):
556
 
557
    PREFETCHW  ( REGIND(EAX) )
558
 
559
    MOVQ       ( REGIND(EDX), MM4 )     /* x1            | x0                 */
560
    MOVD       ( REGOFF(8, EDX), MM5 )  /*               | x2                 */
561
 
562
    PFMUL      ( MM0, MM4 )             /* x1*m5         | x0*m0              */
563
    ADD_L      ( STRIDE, EDX)           /* next normal      */
564
 
565
    PREFETCH   ( REGIND(EDX) )
566
 
567
    PFMUL      ( MM2, MM5 )             /*               | x2*m10             */
568
    ADD_L      ( CONST(16), EAX )       /* next r                             */
569
 
570
    DEC_L      ( EDI )                  /* decrement normal counter           */
571
    MOVQ       ( MM4, REGOFF(-16, EAX) ) /* write r0, r1                      */
572
 
573
    MOVD       ( MM5, REGOFF(-8, EAX) ) /* write r2                           */
574
    JA         ( LLBL (G3TNR_transform) )
575
 
576
    FEMMS
577
 
578
LLBL (G3TNR_end):
579
    POP_L       ( ESI )
580
    POP_L       ( EDI )
581
    RET
582
 
583
 
584
 
585
 
586
 
587
 
588
 
589
 
590
ALIGNTEXT16
591
GLOBL GLNAME(_mesa_3dnow_transform_normals)
592
GLNAME(_mesa_3dnow_transform_normals):
593
 
594
 #undef  FRAME_OFFSET
595
 #define FRAME_OFFSET 8
596
 
597
    PUSH_L     ( EDI )
598
    PUSH_L     ( ESI )
599
 
600
    MOV_L      ( ARG_IN, ESI )
601
    MOV_L      ( ARG_DEST, EAX )
602
    MOV_L      ( ARG_MAT, ECX )
603
    MOV_L      ( REGOFF(V3F_COUNT, ESI), EDI ) /*  dest->count = in->count   */
604
    MOV_L      ( EDI, REGOFF(V3F_COUNT, EAX) )
605
    MOV_L      ( REGOFF(V3F_START, EAX), EAX ) /*  dest->start  */
606
    MOV_L      ( REGOFF(V3F_START, ESI), EDX ) /*  in->start    */
607
    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
608
 
609
    CMP_L      ( CONST(0), EDI )        /* count > 0 ??                       */
610
    JE         ( LLBL (G3T_end) )
611
 
612
    FEMMS
613
 
614
    MOVQ       ( REGIND(ECX), MM3 )     /* m1            | m0                 */
615
    MOVQ       ( REGOFF(16, ECX), MM4 ) /* m5            | m4                 */
616
 
617
    MOVD       ( REGOFF(8, ECX), MM5 )  /*               | m2                 */
618
    PUNPCKLDQ  ( REGOFF(24, ECX), MM5 ) /* m6            | m2                 */
619
 
620
    MOVQ       ( REGOFF(32, ECX), MM6 ) /* m9            | m8                 */
621
    MOVD       ( REGOFF(40, ECX), MM7 ) /*               | m10                */
622
 
623
ALIGNTEXT32
624
LLBL (G3T_transform):
625
 
626
    PREFETCHW  ( REGIND(EAX) )
627
 
628
    MOVQ       ( REGIND(EDX), MM0 )     /* x1            | x0                 */
629
    MOVD       ( REGOFF(8, EDX), MM2 )  /*               | x2                 */
630
 
631
    MOVQ       ( MM0, MM1 )             /* x1            | x0                 */
632
    PUNPCKLDQ  ( MM2, MM2 )             /* x2            | x2                 */
633
 
634
    PFMUL      ( MM3, MM0 )             /* x1*m1         | x0*m0              */
635
    ADD_L      ( CONST(16), EAX )       /* next r                             */
636
 
637
    PFMUL      ( MM4, MM1 )             /* x1*m5         | x0*m4              */
638
    PFACC      ( MM1, MM0 )             /* x0*m4+x1*m5   | x0*m0+x1*m1        */
639
 
640
    PFMUL      ( MM5, MM2 )             /* x2*m6         | x2*m2              */
641
    PFADD      ( MM2, MM0 )             /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2  */
642
 
643
    MOVQ       ( REGIND(EDX), MM1 )     /* x1            | x0                 */
644
    MOVQ       ( MM0, REGOFF(-16, EAX) ) /* write r0, r1                      */
645
 
646
    PFMUL      ( MM6, MM1 )             /* x1*m9         | x0*m8              */
647
    MOVD       ( REGOFF(8, EDX), MM2 )  /*               | x2                 */
648
 
649
    PFMUL      ( MM7, MM2 )             /*               | x2*m10             */
650
    ADD_L      ( STRIDE, EDX )          /* next normal               */
651
 
652
    PREFETCH   ( REGIND(EDX) )
653
 
654
    PFACC      ( MM1, MM1 )             /* *not used*    | x0*m8+x1*m9        */
655
    PFADD      ( MM2, MM1 )             /* *not used*    | x0*m8+x1*m9+x2*m10 */
656
 
657
    MOVD       ( MM1, REGOFF(-8, EAX) ) /* write r2                           */
658
    DEC_L      ( EDI )                  /* decrement normal counter           */
659
 
660
    JA         ( LLBL (G3T_transform) )
661
 
662
    FEMMS
663
 
664
LLBL (G3T_end):
665
    POP_L  ( ESI )
666
    POP_L  ( EDI )
667
    RET
668
 
669
 
670
 
671
 
672
 
673
 
674
ALIGNTEXT16
675
GLOBL GLNAME(_mesa_3dnow_normalize_normals)
676
GLNAME(_mesa_3dnow_normalize_normals):
677
 
678
 #undef  FRAME_OFFSET
679
 #define FRAME_OFFSET 12
680
 
681
    PUSH_L     ( EDI )
682
    PUSH_L     ( ESI )
683
    PUSH_L     ( EBP )
684
 
685
    MOV_L      ( ARG_IN, ESI )
686
    MOV_L      ( ARG_DEST, EAX )
687
    MOV_L      ( REGOFF(V3F_COUNT, ESI), EBP ) /*  dest->count = in->count   */
688
    MOV_L      ( EBP, REGOFF(V3F_COUNT, EAX) )
689
    MOV_L      ( REGOFF(V3F_START, EAX), EAX ) /*  dest->start  */
690
    MOV_L      ( REGOFF(V3F_START, ESI), ECX ) /*  in->start    */
691
    MOV_L      ( ARG_LENGTHS, EDX )
692
 
693
    CMP_L      ( CONST(0), EBP ) /* count > 0 ?? */
694
    JE         ( LLBL (G3N_end) )
695
 
696
    FEMMS
697
 
698
    CMP_L      ( CONST(0), EDX )        /* lengths == 0 ?                     */
699
    JE         ( LLBL (G3N_norm2) )     /* calculate lengths                  */
700
 
701
ALIGNTEXT32
702
LLBL (G3N_norm1):                       /* use precalculated lengths          */
703
 
704
    PREFETCH   ( REGIND(EAX) )
705
 
706
    MOVQ       ( REGIND(ECX), MM0 )     /* x1              | x0               */
707
    MOVD       ( REGOFF(8, ECX), MM1 )  /*                 | x2               */
708
 
709
    MOVD       ( REGIND(EDX), MM3 )     /*                 | length (x)       */
710
    PFMUL      ( MM3, MM1 )             /*                 | x2 (normalized)  */
711
 
712
    PUNPCKLDQ  ( MM3, MM3 )             /* length (x)      | length (x)       */
713
    ADD_L      ( STRIDE, ECX )          /* next normal            */
714
 
715
    PREFETCH   ( REGIND(ECX) )
716
 
717
    PFMUL      ( MM3, MM0 )             /* x1 (normalized) | x0 (normalized)  */
718
    MOVQ       ( MM0, REGIND(EAX) )     /* write new x0, x1                   */
719
 
720
    MOVD       ( MM1, REGOFF(8, EAX) )  /* write new x2                       */
721
    ADD_L      ( CONST(16), EAX )       /* next r                             */
722
 
723
    ADD_L      ( CONST(4), EDX )        /* next length                        */
724
    DEC_L      ( EBP )                  /* decrement normal counter           */
725
 
726
    JA         ( LLBL (G3N_norm1) )
727
 
728
    JMP        ( LLBL (G3N_end1) )
729
 
730
ALIGNTEXT32
731
LLBL (G3N_norm2):                       /* need to calculate lengths          */
732
 
733
    PREFETCHW  ( REGIND(EAX) )
734
 
735
    MOVQ       ( MM0, MM3 )             /* x1              | x0               */
736
    ADD_L      ( STRIDE, ECX )          /* next normal    */
737
 
738
    PREFETCH   ( REGIND(ECX) )
739
 
740
    MOVQ       ( REGIND(ECX), MM0 )     /* x1              | x0               */
741
    MOVD       ( REGOFF(8, ECX), MM1 )  /*                 | x2               */
742
 
743
    PFMUL      ( MM0, MM3 )             /* x1*x1           | x0*x0            */
744
    MOVQ       ( MM1, MM4 )             /*                 | x2               */
745
 
746
    ADD_L      ( CONST(16), EAX )       /* next r                             */
747
    PFMUL      ( MM1, MM4 )             /*                 | x2*x2            */
748
 
749
    PFADD      ( MM4, MM3 )             /*                 | x0*x0+x2*x2      */
750
    PFACC      ( MM3, MM3 )             /* x0*x0+...+x2*x2 | x0*x0+x1*x1+x2*x2*/
751
 
752
    PFRSQRT    ( MM3, MM5 )             /* 1/sqrt (x0*x0+x1*x1+x2*x2)         */
753
    MOVQ       ( MM5, MM4 )
754
 
755
    PUNPCKLDQ  ( MM3, MM3 )
756
    PFMUL      ( MM5, MM5 )
757
 
758
    PFRSQIT1   ( MM3, MM5 )
759
    DEC_L      ( EBP )                  /* decrement normal counter           */
760
 
761
    PFRCPIT2   ( MM4, MM5 )
762
 
763
    PFMUL      ( MM5, MM0 )             /* x1 (normalized) | x0 (normalized)  */
764
    MOVQ       ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1                  */
765
 
766
    PFMUL      ( MM5, MM1 )             /*                 | x2 (normalized)  */
767
    MOVD       ( MM1, REGOFF(-8, EAX) ) /* write new x2                       */
768
 
769
    JA         ( LLBL (G3N_norm2) )
770
 
771
LLBL (G3N_end1):
772
    FEMMS
773
 
774
LLBL (G3N_end):
775
    POP_L      ( EBP )
776
    POP_L      ( ESI )
777
    POP_L      ( EDI )
778
    RET
779
 
780
 
781
 
782
 
783
 
784
 
785
ALIGNTEXT16
786
GLOBL GLNAME(_mesa_3dnow_rescale_normals)
787
GLNAME(_mesa_3dnow_rescale_normals):
788
 
789
 #undef  FRAME_OFFSET
790
 #define FRAME_OFFSET 8
791
    PUSH_L     ( EDI )
792
    PUSH_L     ( ESI )
793
 
794
    MOV_L      ( ARG_IN, ESI )
795
    MOV_L      ( ARG_DEST, EAX )
796
    MOV_L      ( REGOFF(V3F_COUNT, ESI), EDX ) /*  dest->count = in->count   */
797
    MOV_L      ( EDX, REGOFF(V3F_COUNT, EAX) )
798
    MOV_L      ( REGOFF(V3F_START, EAX), EAX ) /*  dest->start  */
799
    MOV_L      ( REGOFF(V3F_START, ESI), ECX ) /*  in->start    */
800
 
801
    CMP_L      ( CONST(0), EDX )
802
    JE         ( LLBL (G3R_end) )
803
 
804
    FEMMS
805
 
806
    MOVD       ( ARG_SCALE, MM0 )       /* scale                              */
807
    PUNPCKLDQ  ( MM0, MM0 )
808
 
809
ALIGNTEXT32
810
LLBL (G3R_rescale):
811
 
812
    PREFETCHW  ( REGIND(EAX) )
813
 
814
    MOVQ       ( REGIND(ECX), MM1 )     /* x1            | x0                 */
815
    MOVD       ( REGOFF(8, ECX), MM2 )  /*               | x2                 */
816
 
817
    PFMUL      ( MM0, MM1 )             /* x1*scale      | x0*scale           */
818
    ADD_L      ( STRIDE, ECX )          /* next normal                  */
819
 
820
    PREFETCH   ( REGIND(ECX) )
821
 
822
    PFMUL      ( MM0, MM2 )             /*               | x2*scale           */
823
    ADD_L      ( CONST(16), EAX )       /* next r                             */
824
 
825
    MOVQ       ( MM1, REGOFF(-16, EAX) ) /* write r0, r1                      */
826
    MOVD       ( MM2, REGOFF(-8, EAX) ) /* write r2                           */
827
 
828
    DEC_L      ( EDX )                  /* decrement normal counter           */
829
    JA         ( LLBL (G3R_rescale) )
830
 
831
    FEMMS
832
 
833
LLBL (G3R_end):
834
    POP_L      ( ESI )
835
    POP_L      ( EDI )
836
    RET