Subversion Repositories shark

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
70 giacomo 1
/* $Id: 3dnow_xform4.s,v 1.1 2003-03-13 12:11:48 giacomo Exp $ */
2
 
3
/*
4
 * Mesa 3-D graphics library
5
 * Version:  3.5
6
 *
7
 * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
8
 *
9
 * Permission is hereby granted, free of charge, to any person obtaining a
10
 * copy of this software and associated documentation files (the "Software"),
11
 * to deal in the Software without restriction, including without limitation
12
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13
 * and/or sell copies of the Software, and to permit persons to whom the
14
 * Software is furnished to do so, subject to the following conditions:
15
 *
16
 * The above copyright notice and this permission notice shall be included
17
 * in all copies or substantial portions of the Software.
18
 *
19
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22
 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
23
 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
 */
26
 
27
#include "matypes.h"
28
#include "xform_args.h"
29
 
30
    SEG_TEXT
31
 
32
#define FRAME_OFFSET	4
33
 
34
 
35
ALIGNTEXT16
36
GLOBL GLNAME( _mesa_3dnow_transform_points4_general )
37
GLNAME( _mesa_3dnow_transform_points4_general ):
38
 
39
    PUSH_L    ( ESI )
40
 
41
    MOV_L     ( ARG_DEST, ECX )
42
    MOV_L     ( ARG_MATRIX, ESI )
43
    MOV_L     ( ARG_SOURCE, EAX )
44
    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
45
    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
46
    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
47
    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
48
 
49
    PUSH_L    ( EDI )
50
 
51
    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
52
    MOV_L     ( ESI, ECX )
53
    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
54
    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
55
    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
56
 
57
    TEST_L    ( ESI, ESI )
58
    JZ        ( LLBL( G3TPGR_2 ) )
59
 
60
    PREFETCHW ( REGIND(EDX) )
61
 
62
ALIGNTEXT16
63
LLBL( G3TPGR_1 ):
64
 
65
    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
66
 
67
    MOVQ      ( REGIND(EAX), MM0 )	/* x1            | x0                */
68
    MOVQ      ( REGOFF(8, EAX), MM4 )	/* x3            | x2                */
69
 
70
    ADD_L     ( EDI, EAX )		/* next vertex                       */
71
    PREFETCH  ( REGIND(EAX) )
72
 
73
    MOVQ      ( MM0, MM2 )		/* x1              | x0              */
74
    MOVQ      ( MM4, MM6 )		/* x3              | x2              */
75
 
76
    PUNPCKLDQ ( MM0, MM0 )		/* x0              | x0              */
77
    PUNPCKHDQ ( MM2, MM2 )		/* x1              | x1              */
78
 
79
    MOVQ      ( MM0, MM1 )		/* x0              | x0              */
80
    ADD_L     ( CONST(16), EDX )	/* next r                            */
81
 
82
    PFMUL     ( REGIND(ECX), MM0 )	/* x0*m1           | x0*m0           */
83
    MOVQ      ( MM2, MM3 )		/* x1              | x1              */
84
 
85
    PFMUL     ( REGOFF(8, ECX), MM1 )	/* x0*m3           | x0*m2           */
86
    PUNPCKLDQ ( MM4, MM4 )		/* x2              | x2              */
87
 
88
    PFMUL     ( REGOFF(16, ECX), MM2 )	/* x1*m5           | x1*m4           */
89
    MOVQ      ( MM4, MM5 )		/* x2              | x2              */
90
 
91
    PFMUL     ( REGOFF(24, ECX), MM3 )	/* x1*m7           | x1*m6           */
92
    PUNPCKHDQ ( MM6, MM6 )		/* x3              | x3              */
93
 
94
    PFMUL     ( REGOFF(32, ECX), MM4 )	/* x2*m9           | x2*m8           */
95
    MOVQ      ( MM6, MM7 )		/* x3              | x3              */
96
 
97
    PFMUL     ( REGOFF(40, ECX), MM5 )	/* x2*m11          | x2*m10          */
98
    PFADD     ( MM0, MM2 )
99
 
100
    PFMUL     ( REGOFF(48, ECX), MM6 )	/* x3*m13          | x3*m12          */
101
    PFADD     ( MM1, MM3 )
102
 
103
    PFMUL     ( REGOFF(56, ECX), MM7 )	/* x3*m15          | x3*m14          */
104
    PFADD     ( MM4, MM6 )
105
 
106
    PFADD     ( MM5, MM7 )
107
    PFADD     ( MM2, MM6 )
108
 
109
    PFADD     ( MM3, MM7 )
110
    MOVQ      ( MM6, REGOFF(-16, EDX) )
111
 
112
    MOVQ      ( MM7, REGOFF(-8, EDX) )
113
 
114
    DEC_L     ( ESI )			/* decrement vertex counter          */
115
    JNZ       ( LLBL( G3TPGR_1 ) )	/* cnt > 0 ? -> process next vertex  */
116
 
117
LLBL( G3TPGR_2 ):
118
 
119
    FEMMS
120
    POP_L     ( EDI )
121
    POP_L     ( ESI )
122
    RET
123
 
124
 
125
 
126
 
127
ALIGNTEXT16
128
GLOBL GLNAME( _mesa_3dnow_transform_points4_perspective )
129
GLNAME( _mesa_3dnow_transform_points4_perspective ):
130
 
131
    PUSH_L    ( ESI )
132
 
133
    MOV_L     ( ARG_DEST, ECX )
134
    MOV_L     ( ARG_MATRIX, ESI )
135
    MOV_L     ( ARG_SOURCE, EAX )
136
    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
137
    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
138
    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
139
    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
140
 
141
    PUSH_L    ( EDI )
142
 
143
    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
144
    MOV_L     ( ESI, ECX )
145
    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
146
    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
147
    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
148
 
149
    TEST_L    ( ESI, ESI )
150
    JZ        ( LLBL( G3TPPR_2 ) )
151
 
152
    PREFETCH  ( REGIND(EAX) )
153
    PREFETCHW ( REGIND(EDX) )
154
 
155
    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
156
    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
157
 
158
    MOVD      ( REGOFF(40, ECX), MM1 )	/*                 | m22             */
159
    PUNPCKLDQ ( REGOFF(56, ECX), MM1 )	/* m32             | m22             */
160
 
161
    MOVQ      ( REGOFF(32, ECX), MM2 )	/* m21             | m20             */
162
    PXOR      ( MM7, MM7 )		/* 0               | 0               */
163
 
164
ALIGNTEXT16
165
LLBL( G3TPPR_1 ):
166
 
167
    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
168
 
169
    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
170
    MOVQ      ( REGOFF(8, EAX), MM5 )	/* x3              | x2              */
171
    MOVD      ( REGOFF(8, EAX), MM3 )	/*                 | x2              */
172
 
173
    ADD_L     ( EDI, EAX )		/* next vertex                       */
174
    PREFETCH  ( REGOFF(32, EAX) )	/* hopefully stride is zero          */
175
 
176
    MOVQ      ( MM5, MM6 )		/* x3              | x2              */
177
    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
178
 
179
    PUNPCKLDQ ( MM5, MM5 )		/* x2              | x2              */
180
    ADD_L     ( CONST(16), EDX )	/* next r                            */
181
 
182
    PFMUL     ( MM2, MM5 )		/* x2*m21          | x2*m20          */
183
    PFSUBR    ( MM7, MM3 )		/*                 | -x2             */
184
 
185
    PFMUL     ( MM1, MM6 )		/* x3*m32          | x2*m22          */
186
    PFADD     ( MM4, MM5 )		/* x1*m11+x2*m21   | x0*m00+x2*m20   */
187
 
188
    PFACC     ( MM3, MM6 )		/* -x2             | x2*m22+x3*m32   */
189
    MOVQ      ( MM5, REGOFF(-16, EDX) )	/* write r0, r1                      */
190
 
191
    MOVQ      ( MM6, REGOFF(-8, EDX) )	/* write r2, r3                      */
192
    DEC_L     ( ESI )			/* decrement vertex counter          */
193
 
194
    JNZ       ( LLBL( G3TPPR_1 ) )	/* cnt > 0 ? -> process next vertex  */
195
 
196
LLBL( G3TPPR_2 ):
197
 
198
    FEMMS
199
    POP_L     ( EDI )
200
    POP_L     ( ESI )
201
    RET
202
 
203
 
204
 
205
 
206
ALIGNTEXT16
207
GLOBL GLNAME( _mesa_3dnow_transform_points4_3d )
208
GLNAME( _mesa_3dnow_transform_points4_3d ):
209
 
210
    PUSH_L    ( ESI )
211
 
212
    MOV_L     ( ARG_DEST, ECX )
213
    MOV_L     ( ARG_MATRIX, ESI )
214
    MOV_L     ( ARG_SOURCE, EAX )
215
    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
216
    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
217
    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
218
    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
219
 
220
    PUSH_L    ( EDI )
221
 
222
    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
223
    MOV_L     ( ESI, ECX )
224
    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
225
    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
226
    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
227
 
228
    TEST_L    ( ESI, ESI )
229
    JZ        ( LLBL( G3TP3R_2 ) )
230
 
231
    MOVD      ( REGOFF(8, ECX), MM6 )	/*                 | m2              */
232
    PUNPCKLDQ ( REGOFF(24, ECX), MM6 )	/* m6              | m2              */
233
 
234
    MOVD      ( REGOFF(40, ECX), MM7 )	/*                 | m10             */
235
    PUNPCKLDQ ( REGOFF(56, ECX), MM7 )	/* m14             | m10             */
236
 
237
ALIGNTEXT16
238
LLBL( G3TP3R_1 ):
239
 
240
    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
241
    PREFETCH  ( REGOFF(32, EAX) )	/* hopefully array is tightly packed */
242
 
243
    MOVQ      ( REGIND(EAX), MM2 )	/* x1              | x0              */
244
    MOVQ      ( REGOFF(8, EAX), MM3 )	/* x3              | x2              */
245
 
246
    MOVQ      ( MM2, MM0 )		/* x1              | x0              */
247
    MOVQ      ( MM3, MM4 )		/* x3              | x2              */
248
 
249
    MOVQ      ( MM0, MM1 )		/* x1              | x0              */
250
    MOVQ      ( MM4, MM5 )		/* x3              | x2              */
251
 
252
    PUNPCKLDQ ( MM0, MM0 )		/* x0              | x0              */
253
    PUNPCKHDQ ( MM1, MM1 )		/* x1              | x1              */
254
 
255
    PFMUL     ( REGIND(ECX), MM0 )	/* x0*m1           | x0*m0           */
256
    PUNPCKLDQ ( MM3, MM3 )		/* x2              | x2              */
257
 
258
    PFMUL     ( REGOFF(16, ECX), MM1 )	/* x1*m5           | x1*m4           */
259
    PUNPCKHDQ ( MM4, MM4 )		/* x3              | x3              */
260
 
261
    PFMUL     ( MM6, MM2 )		/* x1*m6           | x0*m2           */
262
    PFADD     ( MM0, MM1 )		/* x0*m1+x1*m5     | x0*m0+x1*m4     */
263
 
264
    PFMUL     ( REGOFF(32, ECX), MM3 )	/* x2*m9           | x2*m8           */
265
    ADD_L     ( CONST(16), EDX )	/* next r                            */
266
 
267
    PFMUL     ( REGOFF(48, ECX), MM4 )	/* x3*m13          | x3*m12          */
268
    PFADD     ( MM1, MM3 )		/* x0*m1+..+x2*m9  | x0*m0+...+x2*m8 */
269
 
270
    PFMUL     ( MM7, MM5 )		/* x3*m14          | x2*m10          */
271
    PFADD     ( MM3, MM4 )		/* r1              | r0              */
272
 
273
    PFACC     ( MM2, MM5 )		/* x0*m2+x1*m6     | x2*m10+x3*m14   */
274
    MOVD      ( REGOFF(12, EAX), MM0 )	/*                 | x3              */
275
 
276
    ADD_L     ( EDI, EAX )		/* next vertex                       */
277
    PFACC     ( MM0, MM5 )		/* r3              | r2              */
278
 
279
    MOVQ      ( MM4, REGOFF(-16, EDX) )	/* write r0, r1                      */
280
    MOVQ      ( MM5, REGOFF(-8, EDX) )	/* write r2, r3                      */
281
 
282
    DEC_L     ( ESI )			/* decrement vertex counter          */
283
    JNZ       ( LLBL( G3TP3R_1 ) )	/* cnt > 0 ? -> process next vertex  */
284
 
285
LLBL( G3TP3R_2 ):
286
 
287
    FEMMS
288
    POP_L     ( EDI )
289
    POP_L     ( ESI )
290
    RET
291
 
292
 
293
 
294
 
295
ALIGNTEXT16
296
GLOBL GLNAME( _mesa_3dnow_transform_points4_3d_no_rot )
297
GLNAME( _mesa_3dnow_transform_points4_3d_no_rot ):
298
 
299
    PUSH_L    ( ESI )
300
    MOV_L     ( ARG_DEST, ECX )
301
    MOV_L     ( ARG_MATRIX, ESI )
302
    MOV_L     ( ARG_SOURCE, EAX )
303
    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
304
    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
305
    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
306
    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
307
 
308
    PUSH_L    ( EDI )
309
 
310
    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
311
    MOV_L     ( ESI, ECX )
312
    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
313
    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
314
    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
315
 
316
    TEST_L    ( ESI, ESI )
317
    JZ        ( LLBL( G3TP3NRR_2 ) )
318
 
319
    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
320
    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
321
 
322
    MOVD      ( REGOFF(40, ECX), MM2 )	/*                 | m22             */
323
    PUNPCKLDQ ( REGOFF(56, ECX), MM2 )	/* m32             | m22             */
324
 
325
    MOVQ      ( REGOFF(48, ECX), MM1 )	/* m31             | m30             */
326
 
327
ALIGNTEXT16
328
LLBL( G3TP3NRR_1 ):
329
 
330
    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
331
 
332
    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
333
    MOVQ      ( REGOFF(8, EAX), MM5 )	/* x3              | x2              */
334
    MOVD      ( REGOFF(12, EAX), MM7 )	/*                 | x3              */
335
 
336
    ADD_L     ( EDI, EAX )		/* next vertex                       */
337
    PREFETCH  ( REGOFF(32, EAX) )	/* hopefully stride is zero          */
338
 
339
    MOVQ      ( MM5, MM6 )		/* x3              | x2              */
340
    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
341
 
342
    PUNPCKHDQ ( MM6, MM6 )		/* x3              | x3              */
343
    PFMUL     ( MM2, MM5 )		/* x3*m32          | x2*m22          */
344
 
345
    PFMUL     ( MM1, MM6 )		/* x3*m31          | x3*m30          */
346
    PFACC     ( MM7, MM5 )		/* x3              | x2*m22+x3*m32   */
347
 
348
    PFADD     ( MM6, MM4 )		/* x1*m11+x3*m31   | x0*m00+x3*m30   */
349
    ADD_L     ( CONST(16), EDX )	/* next r                            */
350
 
351
    MOVQ      ( MM4, REGOFF(-16, EDX) )	/* write r0, r1                      */
352
    MOVQ      ( MM5, REGOFF(-8, EDX) )	/* write r2, r3                      */
353
 
354
    DEC_L     ( ESI )			/* decrement vertex counter          */
355
    JNZ       ( LLBL( G3TP3NRR_1 ) )	/* cnt > 0 ? -> process next vertex  */
356
 
357
LLBL( G3TP3NRR_2 ):
358
 
359
    FEMMS
360
    POP_L     ( EDI )
361
    POP_L     ( ESI )
362
    RET
363
 
364
 
365
 
366
 
367
ALIGNTEXT16
368
GLOBL GLNAME( _mesa_3dnow_transform_points4_2d )
369
GLNAME( _mesa_3dnow_transform_points4_2d ):
370
 
371
    PUSH_L    ( ESI )
372
 
373
    MOV_L     ( ARG_DEST, ECX )
374
    MOV_L     ( ARG_MATRIX, ESI )
375
    MOV_L     ( ARG_SOURCE, EAX )
376
    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
377
    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
378
    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
379
    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
380
 
381
    PUSH_L    ( EDI )
382
 
383
    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
384
    MOV_L     ( ESI, ECX )
385
    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
386
    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
387
    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
388
 
389
    TEST_L    ( ESI, ESI )
390
    JZ        ( LLBL( G3TP2R_2 ) )
391
 
392
    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
393
    PUNPCKLDQ ( REGOFF(16, ECX), MM0 )	/* m10             | m00             */
394
 
395
    MOVD      ( REGOFF(4, ECX), MM1 )	/*                 | m01             */
396
    PUNPCKLDQ ( REGOFF(20, ECX), MM1 )	/* m11             | m01             */
397
 
398
    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
399
 
400
ALIGNTEXT16
401
LLBL( G3TP2R_1 ):
402
 
403
    PREFETCHW ( REGOFF(32, EDX) )       /* prefetch 2 vertices ahead         */
404
 
405
    MOVQ      ( REGIND(EAX), MM3 )	/* x1              | x0              */
406
    MOVQ      ( REGOFF(8, EAX), MM5 )	/* x3              | x2              */
407
 
408
    ADD_L     ( EDI, EAX )		/* next vertex                       */
409
    PREFETCH  ( REGIND(EAX) )
410
 
411
    MOVQ      ( MM3, MM4 )		/* x1              | x0              */
412
    MOVQ      ( MM5, MM6 )		/* x3              | x2              */
413
 
414
    PFMUL     ( MM1, MM4 )		/* x1*m11          | x0*m01          */
415
    PUNPCKHDQ ( MM6, MM6 )		/* x3              | x3              */
416
 
417
    PFMUL     ( MM0, MM3 )		/* x1*m10          | x0*m00          */
418
    ADD_L     ( CONST(16), EDX )	/* next r                            */
419
 
420
    PFACC     ( MM4, MM3 )		/* x0*m01+x1*m11   | x0*m00+x1*m10   */
421
    PFMUL     ( MM2, MM6 )		/* x3*m31          | x3*m30          */
422
 
423
    PFADD     ( MM6, MM3 )		/* r1              | r0              */
424
    MOVQ      ( MM5, REGOFF(-8, EDX) )	/* write r2, r3                      */
425
 
426
    MOVQ      ( MM3, REGOFF(-16, EDX) )	/* write r0, r1                      */
427
 
428
    DEC_L     ( ESI )			/* decrement vertex counter          */
429
    JNZ       ( LLBL( G3TP2R_1 ) )	/* cnt > 0 ? -> process next vertex  */
430
 
431
LLBL( G3TP2R_2 ):
432
 
433
    FEMMS
434
    POP_L     ( EDI )
435
    POP_L     ( ESI )
436
    RET
437
 
438
 
439
 
440
 
441
ALIGNTEXT16
442
GLOBL GLNAME( _mesa_3dnow_transform_points4_2d_no_rot )
443
GLNAME( _mesa_3dnow_transform_points4_2d_no_rot ):
444
 
445
    PUSH_L    ( ESI )
446
 
447
    MOV_L     ( ARG_DEST, ECX )
448
    MOV_L     ( ARG_MATRIX, ESI )
449
    MOV_L     ( ARG_SOURCE, EAX )
450
    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
451
    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
452
    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
453
    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
454
 
455
    PUSH_L    ( EDI )
456
 
457
    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
458
    MOV_L     ( ESI, ECX )
459
    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
460
    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
461
    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
462
 
463
    TEST_L    ( ESI, ESI )
464
    JZ        ( LLBL( G3TP2NRR_3 ) )
465
 
466
    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
467
    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
468
 
469
    MOVQ      ( REGOFF(48, ECX), MM1 )	/* m31             | m30             */
470
 
471
ALIGNTEXT16
472
LLBL( G3TP2NRR_2 ):
473
 
474
    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
475
 
476
    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
477
    MOVQ      ( REGOFF(8, EAX), MM5 )	/* x3              | x2              */
478
 
479
    ADD_L     ( EDI, EAX )		/* next vertex                       */
480
    PREFETCH  ( REGIND(EAX) )
481
 
482
    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
483
    MOVQ      ( MM5, MM6 )		/* x3              | x2              */
484
 
485
    ADD_L     ( CONST(16), EDX )	/* next r                            */
486
    PUNPCKHDQ ( MM6, MM6 )		/* x3              | x3              */
487
 
488
    PFMUL     ( MM1, MM6 )		/* x3*m31          | x3*m30          */
489
    PFADD     ( MM4, MM6 )		/* x1*m11+x3*m31   | x0*m00+x3*m30   */
490
 
491
    MOVQ      ( MM6, REGOFF(-16, EDX) )	/* write r0, r1                      */
492
    MOVQ      ( MM5, REGOFF(-8, EDX) )	/* write r2, r3                      */
493
 
494
    DEC_L     ( ESI )			/* decrement vertex counter          */
495
 
496
    JNZ       ( LLBL( G3TP2NRR_2 ) )	/* cnt > 0 ? -> process next vertex  */
497
 
498
LLBL( G3TP2NRR_3 ):
499
 
500
    FEMMS
501
    POP_L     ( EDI )
502
    POP_L     ( ESI )
503
    RET
504
 
505
 
506
 
507
 
508
ALIGNTEXT16
509
GLOBL GLNAME( _mesa_3dnow_transform_points4_identity )
510
GLNAME( _mesa_3dnow_transform_points4_identity ):
511
 
512
    PUSH_L    ( ESI )
513
 
514
    MOV_L     ( ARG_DEST, ECX )
515
    MOV_L     ( ARG_MATRIX, ESI )
516
    MOV_L     ( ARG_SOURCE, EAX )
517
    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
518
    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
519
    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
520
    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
521
 
522
    PUSH_L    ( EDI )
523
 
524
    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
525
    MOV_L     ( ESI, ECX )
526
    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
527
    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
528
    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
529
 
530
    TEST_L    ( ESI, ESI )
531
    JZ        ( LLBL( G3TPIR_2 ) )
532
 
533
ALIGNTEXT16
534
LLBL( G3TPIR_1 ):
535
 
536
    PREFETCHW ( REGOFF(32, EDX) )       /* prefetch 2 vertices ahead         */
537
 
538
    MOVQ      ( REGIND(EAX), MM0 )	/* x1              | x0              */
539
    MOVQ      ( REGOFF(8, EAX), MM1 )	/* x3              | x2              */
540
 
541
    ADD_L     ( EDI, EAX )		/* next vertex                       */
542
    PREFETCH  ( REGIND(EAX) )
543
 
544
    ADD_L     ( CONST(16), EDX )	/* next r                            */
545
    MOVQ      ( MM0, REGOFF(-16, EDX) )	/* r1              | r0              */
546
 
547
    MOVQ      ( MM1, REGOFF(-8, EDX) )	/* r3              | r2              */
548
 
549
    DEC_L     ( ESI )			/* decrement vertex counter          */
550
    JNZ       ( LLBL( G3TPIR_1 ) )	/* cnt > 0 ? -> process next vertex  */
551
 
552
LLBL( G3TPIR_2 ):
553
 
554
    FEMMS
555
    POP_L     ( EDI )
556
    POP_L     ( ESI )
557
    RET