Subversion Repositories shark

Rev

Rev 134 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
134 giacomo 1
/* $Id: xform4_x86.s,v 1.1 2003-04-24 13:36:03 giacomo Exp $ */
2
 
3
/*
4
 * Mesa 3-D graphics library
5
 * Version:  3.5
6
 *
7
 * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
8
 *
9
 * Permission is hereby granted, free of charge, to any person obtaining a
10
 * copy of this software and associated documentation files (the "Software"),
11
 * to deal in the Software without restriction, including without limitation
12
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13
 * and/or sell copies of the Software, and to permit persons to whom the
14
 * Software is furnished to do so, subject to the following conditions:
15
 *
16
 * The above copyright notice and this permission notice shall be included
17
 * in all copies or substantial portions of the Software.
18
 *
19
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22
 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
23
 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
 */
26
 
27
/*
28
 * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
29
 * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces
30
 * in there will break the build on some platforms.
31
 */
32
 
33
#include "matypes.h"
34
#include "xform_args.h"
35
 
36
	SEG_TEXT
37
 
38
#define FP_ONE		1065353216
39
#define FP_ZERO		0
40
 
41
#define SRC0		REGOFF(0, ESI)
42
#define SRC1		REGOFF(4, ESI)
43
#define SRC2		REGOFF(8, ESI)
44
#define SRC3		REGOFF(12, ESI)
45
#define DST0		REGOFF(0, EDI)
46
#define DST1		REGOFF(4, EDI)
47
#define DST2		REGOFF(8, EDI)
48
#define DST3		REGOFF(12, EDI)
49
#define MAT0		REGOFF(0, EDX)
50
#define MAT1		REGOFF(4, EDX)
51
#define MAT2		REGOFF(8, EDX)
52
#define MAT3		REGOFF(12, EDX)
53
#define MAT4		REGOFF(16, EDX)
54
#define MAT5		REGOFF(20, EDX)
55
#define MAT6		REGOFF(24, EDX)
56
#define MAT7		REGOFF(28, EDX)
57
#define MAT8		REGOFF(32, EDX)
58
#define MAT9		REGOFF(36, EDX)
59
#define MAT10		REGOFF(40, EDX)
60
#define MAT11		REGOFF(44, EDX)
61
#define MAT12		REGOFF(48, EDX)
62
#define MAT13		REGOFF(52, EDX)
63
#define MAT14		REGOFF(56, EDX)
64
#define MAT15		REGOFF(60, EDX)
65
 
66
 
67
ALIGNTEXT16
68
GLOBL GLNAME( _mesa_x86_transform_points4_general )
69
GLNAME( _mesa_x86_transform_points4_general ):
70
 
71
#define FRAME_OFFSET 8
72
	PUSH_L( ESI )
73
	PUSH_L( EDI )
74
 
75
	MOV_L( ARG_SOURCE, ESI )
76
	MOV_L( ARG_DEST, EDI )
77
 
78
	MOV_L( ARG_MATRIX, EDX )
79
	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
80
 
81
	TEST_L( ECX, ECX )
82
	JZ( LLBL(x86_p4_gr_done) )
83
 
84
	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
85
	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
86
 
87
	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
88
	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
89
 
90
	SHL_L( CONST(4), ECX )
91
	MOV_L( REGOFF(V4F_START, ESI), ESI )
92
 
93
	MOV_L( REGOFF(V4F_START, EDI), EDI )
94
	ADD_L( EDI, ECX )
95
 
96
ALIGNTEXT16
97
LLBL(x86_p4_gr_loop):
98
 
99
	FLD_S( SRC0 )			/* F4 */
100
	FMUL_S( MAT0 )
101
	FLD_S( SRC0 )			/* F5 F4 */
102
	FMUL_S( MAT1 )
103
	FLD_S( SRC0 )			/* F6 F5 F4 */
104
	FMUL_S( MAT2 )
105
	FLD_S( SRC0 )			/* F7 F6 F5 F4 */
106
	FMUL_S( MAT3 )
107
 
108
	FLD_S( SRC1 )			/* F0 F7 F6 F5 F4 */
109
	FMUL_S( MAT4 )
110
	FLD_S( SRC1 )			/* F1 F0 F7 F6 F5 F4 */
111
	FMUL_S( MAT5 )
112
	FLD_S( SRC1 )			/* F2 F1 F0 F7 F6 F5 F4 */
113
	FMUL_S( MAT6 )
114
	FLD_S( SRC1 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
115
	FMUL_S( MAT7 )
116
 
117
	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
118
	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
119
	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
120
	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
121
	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
122
	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
123
 
124
	FLD_S( SRC2 )			/* F0 F7 F6 F5 F4 */
125
	FMUL_S( MAT8 )
126
	FLD_S( SRC2 )			/* F1 F0 F7 F6 F5 F4 */
127
	FMUL_S( MAT9 )
128
	FLD_S( SRC2 )			/* F2 F1 F0 F7 F6 F5 F4 */
129
	FMUL_S( MAT10 )
130
	FLD_S( SRC2 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
131
	FMUL_S( MAT11 )
132
 
133
	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
134
	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
135
	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
136
	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
137
	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
138
	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
139
 
140
	FLD_S( SRC3 )			/* F0 F7 F6 F5 F4 */
141
	FMUL_S( MAT12 )
142
	FLD_S( SRC3 )			/* F1 F0 F7 F6 F5 F4 */
143
	FMUL_S( MAT13 )
144
	FLD_S( SRC3 )			/* F2 F1 F0 F7 F6 F5 F4 */
145
	FMUL_S( MAT14 )
146
	FLD_S( SRC3 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
147
	FMUL_S( MAT15 )
148
 
149
	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
150
	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
151
	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
152
	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
153
	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
154
	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
155
 
156
	FXCH( ST(3) )			/* F4 F6 F5 F7 */
157
	FSTP_S( DST0 )		/* F6 F5 F7 */
158
	FXCH( ST(1) )			/* F5 F6 F7 */
159
	FSTP_S( DST1 )		/* F6 F7 */
160
	FSTP_S( DST2 )		/* F7 */
161
	FSTP_S( DST3 )		/* */
162
 
163
LLBL(x86_p4_gr_skip):
164
 
165
	ADD_L( CONST(16), EDI )
166
	ADD_L( EAX, ESI )
167
	CMP_L( ECX, EDI )
168
	JNE( LLBL(x86_p4_gr_loop) )
169
 
170
LLBL(x86_p4_gr_done):
171
 
172
	POP_L( EDI )
173
	POP_L( ESI )
174
	RET
175
#undef FRAME_OFFSET
176
 
177
 
178
 
179
 
180
ALIGNTEXT16
181
GLOBL GLNAME( _mesa_x86_transform_points4_perspective )
182
GLNAME( _mesa_x86_transform_points4_perspective ):
183
 
184
#define FRAME_OFFSET 12
185
	PUSH_L( ESI )
186
	PUSH_L( EDI )
187
	PUSH_L( EBX )
188
 
189
	MOV_L( ARG_SOURCE, ESI )
190
	MOV_L( ARG_DEST, EDI )
191
 
192
	MOV_L( ARG_MATRIX, EDX )
193
	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
194
 
195
	TEST_L( ECX, ECX )
196
	JZ( LLBL(x86_p4_pr_done) )
197
 
198
	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
199
	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
200
 
201
	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
202
	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
203
 
204
	SHL_L( CONST(4), ECX )
205
	MOV_L( REGOFF(V4F_START, ESI), ESI )
206
 
207
	MOV_L( REGOFF(V4F_START, EDI), EDI )
208
	ADD_L( EDI, ECX )
209
 
210
ALIGNTEXT16
211
LLBL(x86_p4_pr_loop):
212
 
213
	FLD_S( SRC0 )			/* F4 */
214
	FMUL_S( MAT0 )
215
 
216
	FLD_S( SRC1 )			/* F5 F4 */
217
	FMUL_S( MAT5 )
218
 
219
	FLD_S( SRC2 )			/* F0 F5 F4 */
220
	FMUL_S( MAT8 )
221
	FLD_S( SRC2 )			/* F1 F0 F5 F4 */
222
	FMUL_S( MAT9 )
223
	FLD_S( SRC2 )			/* F6 F1 F0 F5 F4 */
224
	FMUL_S( MAT10 )
225
 
226
	FXCH( ST(2) )			/* F0 F1 F6 F5 F4 */
227
	FADDP( ST0, ST(4) )		/* F1 F6 F5 F4 */
228
	FADDP( ST0, ST(2) )		/* F6 F5 F4 */
229
 
230
	FLD_S( SRC3 )			/* F2 F6 F5 F4 */
231
	FMUL_S( MAT14 )
232
 
233
	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
234
 
235
	MOV_L( SRC2, EBX )
236
	XOR_L( CONST(-2147483648), EBX )/* change sign */
237
 
238
	FXCH( ST(2) )			/* F4 F5 F6 */
239
	FSTP_S( DST0 )		/* F5 F6 */
240
	FSTP_S( DST1 )		/* F6 */
241
	FSTP_S( DST2 )		/* */
242
	MOV_L( EBX, DST3 )
243
 
244
LLBL(x86_p4_pr_skip):
245
 
246
	ADD_L( CONST(16), EDI )
247
	ADD_L( EAX, ESI )
248
	CMP_L( ECX, EDI )
249
	JNE( LLBL(x86_p4_pr_loop) )
250
 
251
LLBL(x86_p4_pr_done):
252
 
253
	POP_L( EBX )
254
	POP_L( EDI )
255
	POP_L( ESI )
256
	RET
257
#undef FRAME_OFFSET
258
 
259
 
260
 
261
 
262
ALIGNTEXT16
263
GLOBL GLNAME( _mesa_x86_transform_points4_3d )
264
GLNAME( _mesa_x86_transform_points4_3d ):
265
 
266
#define FRAME_OFFSET 12
267
	PUSH_L( ESI )
268
	PUSH_L( EDI )
269
	PUSH_L( EBX )
270
 
271
	MOV_L( ARG_SOURCE, ESI )
272
	MOV_L( ARG_DEST, EDI )
273
 
274
	MOV_L( ARG_MATRIX, EDX )
275
	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
276
 
277
	TEST_L( ECX, ECX )
278
	JZ( LLBL(x86_p4_3dr_done) )
279
 
280
	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
281
	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
282
 
283
	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
284
	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
285
 
286
	SHL_L( CONST(4), ECX )
287
	MOV_L( REGOFF(V4F_START, ESI), ESI )
288
 
289
	MOV_L( REGOFF(V4F_START, EDI), EDI )
290
	ADD_L( EDI, ECX )
291
 
292
ALIGNTEXT16
293
LLBL(x86_p4_3dr_loop):
294
 
295
	FLD_S( SRC0 )			/* F4 */
296
	FMUL_S( MAT0 )
297
	FLD_S( SRC0 )			/* F5 F4 */
298
	FMUL_S( MAT1 )
299
	FLD_S( SRC0 )			/* F6 F5 F4 */
300
	FMUL_S( MAT2 )
301
 
302
	FLD_S( SRC1 )			/* F0 F6 F5 F4 */
303
	FMUL_S( MAT4 )
304
	FLD_S( SRC1 )			/* F1 F0 F6 F5 F4 */
305
	FMUL_S( MAT5 )
306
	FLD_S( SRC1 )			/* F2 F1 F0 F6 F5 F4 */
307
	FMUL_S( MAT6 )
308
 
309
	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
310
	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
311
	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
312
	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
313
 
314
	FLD_S( SRC2 )			/* F0 F6 F5 F4 */
315
	FMUL_S( MAT8 )
316
	FLD_S( SRC2 )			/* F1 F0 F6 F5 F4 */
317
	FMUL_S( MAT9 )
318
	FLD_S( SRC2 )			/* F2 F1 F0 F6 F5 F4 */
319
	FMUL_S( MAT10 )
320
 
321
	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
322
	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
323
	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
324
	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
325
 
326
	FLD_S( SRC3 )			/* F0 F6 F5 F4 */
327
	FMUL_S( MAT12 )
328
	FLD_S( SRC3 )			/* F1 F0 F6 F5 F4 */
329
	FMUL_S( MAT13 )
330
	FLD_S( SRC3 )			/* F2 F1 F0 F6 F5 F4 */
331
	FMUL_S( MAT14 )
332
 
333
	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
334
	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
335
	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
336
	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
337
 
338
	MOV_L( SRC3, EBX )
339
 
340
	FXCH( ST(2) )			/* F4 F5 F6 */
341
	FSTP_S( DST0 )		/* F5 F6 */
342
	FSTP_S( DST1 )		/* F6 */
343
	FSTP_S( DST2 )		/* */
344
	MOV_L( EBX, DST3 )
345
 
346
LLBL(x86_p4_3dr_skip):
347
 
348
	ADD_L( CONST(16), EDI )
349
	ADD_L( EAX, ESI )
350
	CMP_L( ECX, EDI )
351
	JNE( LLBL(x86_p4_3dr_loop) )
352
 
353
LLBL(x86_p4_3dr_done):
354
 
355
	POP_L( EBX )
356
	POP_L( EDI )
357
	POP_L( ESI )
358
	RET
359
#undef FRAME_OFFSET
360
 
361
 
362
 
363
 
364
ALIGNTEXT16
365
GLOBL GLNAME(_mesa_x86_transform_points4_3d_no_rot)
366
GLNAME(_mesa_x86_transform_points4_3d_no_rot):
367
 
368
#define FRAME_OFFSET 12
369
	PUSH_L( ESI )
370
	PUSH_L( EDI )
371
	PUSH_L( EBX )
372
 
373
	MOV_L( ARG_SOURCE, ESI )
374
	MOV_L( ARG_DEST, EDI )
375
 
376
	MOV_L( ARG_MATRIX, EDX )
377
	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
378
 
379
	TEST_L( ECX, ECX )
380
	JZ( LLBL(x86_p4_3dnrr_done) )
381
 
382
	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
383
	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
384
 
385
	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
386
	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
387
 
388
	SHL_L( CONST(4), ECX )
389
	MOV_L( REGOFF(V4F_START, ESI), ESI )
390
 
391
	MOV_L( REGOFF(V4F_START, EDI), EDI )
392
	ADD_L( EDI, ECX )
393
 
394
ALIGNTEXT16
395
LLBL(x86_p4_3dnrr_loop):
396
 
397
	FLD_S( SRC0 )			/* F4 */
398
	FMUL_S( MAT0 )
399
 
400
	FLD_S( SRC1 )			/* F5 F4 */
401
	FMUL_S( MAT5 )
402
 
403
	FLD_S( SRC2 )			/* F6 F5 F4 */
404
	FMUL_S( MAT10 )
405
 
406
	FLD_S( SRC3 )			/* F0 F6 F5 F4 */
407
	FMUL_S( MAT12 )
408
	FLD_S( SRC3 )			/* F1 F0 F6 F5 F4 */
409
	FMUL_S( MAT13 )
410
	FLD_S( SRC3 )			/* F2 F1 F0 F6 F5 F4 */
411
	FMUL_S( MAT14 )
412
 
413
	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
414
	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
415
	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
416
	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
417
 
418
	MOV_L( SRC3, EBX )
419
 
420
	FXCH( ST(2) )			/* F4 F5 F6 */
421
	FSTP_S( DST0   )		/* F5 F6 */
422
	FSTP_S( DST1   )		/* F6 */
423
	FSTP_S( DST2   )		/* */
424
	MOV_L( EBX, DST3 )
425
 
426
LLBL(x86_p4_3dnrr_skip):
427
 
428
	ADD_L( CONST(16), EDI )
429
	ADD_L( EAX, ESI )
430
	CMP_L( ECX, EDI )
431
	JNE( LLBL(x86_p4_3dnrr_loop) )
432
 
433
LLBL(x86_p4_3dnrr_done):
434
 
435
	POP_L( EBX )
436
	POP_L( EDI )
437
	POP_L( ESI )
438
	RET
439
#undef FRAME_OFFSET
440
 
441
 
442
 
443
 
444
ALIGNTEXT16
445
GLOBL GLNAME( _mesa_x86_transform_points4_2d )
446
GLNAME( _mesa_x86_transform_points4_2d ):
447
 
448
#define FRAME_OFFSET 16
449
	PUSH_L( ESI )
450
	PUSH_L( EDI )
451
	PUSH_L( EBX )
452
	PUSH_L( EBP )
453
 
454
	MOV_L( ARG_SOURCE, ESI )
455
	MOV_L( ARG_DEST, EDI )
456
 
457
	MOV_L( ARG_MATRIX, EDX )
458
	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
459
 
460
	TEST_L( ECX, ECX )
461
	JZ( LLBL(x86_p4_2dr_done) )
462
 
463
	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
464
	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
465
 
466
	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
467
	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
468
 
469
	SHL_L( CONST(4), ECX )
470
	MOV_L( REGOFF(V4F_START, ESI), ESI )
471
 
472
	MOV_L( REGOFF(V4F_START, EDI), EDI )
473
	ADD_L( EDI, ECX )
474
 
475
ALIGNTEXT16
476
LLBL(x86_p4_2dr_loop):
477
 
478
	FLD_S( SRC0 )			/* F4 */
479
	FMUL_S( MAT0 )
480
	FLD_S( SRC0 )			/* F5 F4 */
481
	FMUL_S( MAT1 )
482
 
483
	FLD_S( SRC1 )			/* F0 F5 F4 */
484
	FMUL_S( MAT4 )
485
	FLD_S( SRC1 )			/* F1 F0 F5 F4 */
486
	FMUL_S( MAT5 )
487
 
488
	FXCH( ST(1) )			/* F0 F1 F5 F4 */
489
	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
490
	FADDP( ST0, ST(1) )		/* F5 F4 */
491
 
492
	FLD_S( SRC3 )			/* F0 F5 F4 */
493
	FMUL_S( MAT12 )
494
	FLD_S( SRC3 )			/* F1 F0 F5 F4 */
495
	FMUL_S( MAT13 )
496
 
497
	FXCH( ST(1) )			/* F0 F1 F5 F4 */
498
	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
499
	FADDP( ST0, ST(1) )		/* F5 F4 */
500
 
501
	MOV_L( SRC2, EBX )
502
	MOV_L( SRC3, EBP )
503
 
504
	FXCH( ST(1) )			/* F4 F5 */
505
	FSTP_S( DST0 )		/* F5 */
506
	FSTP_S( DST1 )		/* */
507
	MOV_L( EBX, DST2 )
508
	MOV_L( EBP, DST3 )
509
 
510
LLBL(x86_p4_2dr_skip):
511
 
512
	ADD_L( CONST(16), EDI )
513
	ADD_L( EAX, ESI )
514
	CMP_L( ECX, EDI )
515
	JNE( LLBL(x86_p4_2dr_loop) )
516
 
517
LLBL(x86_p4_2dr_done):
518
 
519
	POP_L( EBP )
520
	POP_L( EBX )
521
	POP_L( EDI )
522
	POP_L( ESI )
523
	RET
524
#undef FRAME_OFFSET
525
 
526
 
527
 
528
 
529
ALIGNTEXT16
530
GLOBL GLNAME( _mesa_x86_transform_points4_2d_no_rot )
531
GLNAME( _mesa_x86_transform_points4_2d_no_rot ):
532
 
533
#define FRAME_OFFSET 16
534
	PUSH_L( ESI )
535
	PUSH_L( EDI )
536
	PUSH_L( EBX )
537
	PUSH_L( EBP )
538
 
539
	MOV_L( ARG_SOURCE, ESI )
540
	MOV_L( ARG_DEST, EDI )
541
 
542
	MOV_L( ARG_MATRIX, EDX )
543
	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
544
 
545
	TEST_L( ECX, ECX )
546
	JZ( LLBL(x86_p4_2dnrr_done) )
547
 
548
	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
549
	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
550
 
551
	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
552
	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
553
 
554
	SHL_L( CONST(4), ECX )
555
	MOV_L( REGOFF(V4F_START, ESI), ESI )
556
 
557
	MOV_L( REGOFF(V4F_START, EDI), EDI )
558
	ADD_L( EDI, ECX )
559
 
560
ALIGNTEXT16
561
LLBL(x86_p4_2dnrr_loop):
562
 
563
	FLD_S( SRC0 )			/* F4 */
564
	FMUL_S( MAT0 )
565
 
566
	FLD_S( SRC1 )			/* F5 F4 */
567
	FMUL_S( MAT5 )
568
 
569
	FLD_S( SRC3 )			/* F0 F5 F4 */
570
	FMUL_S( MAT12 )
571
	FLD_S( SRC3 )			/* F1 F0 F5 F4 */
572
	FMUL_S( MAT13 )
573
 
574
	FXCH( ST(1) )			/* F0 F1 F5 F4 */
575
	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
576
	FADDP( ST0, ST(1) )		/* F5 F4 */
577
 
578
	MOV_L( SRC2, EBX )
579
	MOV_L( SRC3, EBP )
580
 
581
	FXCH( ST(1) )			/* F4 F5 */
582
	FSTP_S( DST0   )		/* F5 */
583
	FSTP_S( DST1   )		/* */
584
	MOV_L( EBX, DST2 )
585
	MOV_L( EBP, DST3 )
586
 
587
LLBL(x86_p4_2dnrr_skip):
588
 
589
	ADD_L( CONST(16), EDI )
590
	ADD_L( EAX, ESI )
591
	CMP_L( ECX, EDI )
592
	JNE( LLBL(x86_p4_2dnrr_loop) )
593
 
594
LLBL(x86_p4_2dnrr_done):
595
 
596
	POP_L( EBP )
597
	POP_L( EBX )
598
	POP_L( EDI )
599
	POP_L( ESI )
600
	RET
601
#undef FRAME_OFFSET
602
 
603
 
604
 
605
 
606
ALIGNTEXT16
607
GLOBL GLNAME( _mesa_x86_transform_points4_identity )
608
GLNAME( _mesa_x86_transform_points4_identity ):
609
 
610
#define FRAME_OFFSET 12
611
	PUSH_L( ESI )
612
	PUSH_L( EDI )
613
	PUSH_L( EBX )
614
 
615
	MOV_L( ARG_SOURCE, ESI )
616
	MOV_L( ARG_DEST, EDI )
617
 
618
	MOV_L( ARG_MATRIX, EDX )
619
	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
620
 
621
	TEST_L( ECX, ECX )
622
	JZ( LLBL(x86_p4_ir_done) )
623
 
624
	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
625
	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
626
 
627
	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
628
	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
629
 
630
	SHL_L( CONST(4), ECX )
631
	MOV_L( REGOFF(V4F_START, ESI), ESI )
632
 
633
	MOV_L( REGOFF(V4F_START, EDI), EDI )
634
	ADD_L( EDI, ECX )
635
 
636
	CMP_L( ESI, EDI )
637
	JE( LLBL(x86_p4_ir_done) )
638
 
639
ALIGNTEXT16
640
LLBL(x86_p4_ir_loop):
641
 
642
	MOV_L( SRC0, EBX )
643
	MOV_L( SRC1, EDX )
644
 
645
	MOV_L( EBX, DST0 )
646
	MOV_L( EDX, DST1 )
647
 
648
	MOV_L( SRC2, EBX )
649
	MOV_L( SRC3, EDX )
650
 
651
	MOV_L( EBX, DST2 )
652
	MOV_L( EDX, DST3 )
653
 
654
LLBL(x86_p4_ir_skip):
655
 
656
	ADD_L( CONST(16), EDI )
657
	ADD_L( EAX, ESI )
658
	CMP_L( ECX, EDI )
659
	JNE( LLBL(x86_p4_ir_loop) )
660
 
661
LLBL(x86_p4_ir_done):
662
 
663
	POP_L( EBX )
664
	POP_L( EDI )
665
	POP_L( ESI )
666
	RET