Subversion Repositories shark

Rev

Rev 134 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
134 giacomo 1
/* $Id: xform2_x86.s,v 1.1 2003-04-24 13:36:03 giacomo Exp $ */
2
 
3
/*
4
 * Mesa 3-D graphics library
5
 * Version:  3.5
6
 *
7
 * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
8
 *
9
 * Permission is hereby granted, free of charge, to any person obtaining a
10
 * copy of this software and associated documentation files (the "Software"),
11
 * to deal in the Software without restriction, including without limitation
12
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13
 * and/or sell copies of the Software, and to permit persons to whom the
14
 * Software is furnished to do so, subject to the following conditions:
15
 *
16
 * The above copyright notice and this permission notice shall be included
17
 * in all copies or substantial portions of the Software.
18
 *
19
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22
 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
23
 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
 */
26
 
27
/*
28
 * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
29
 * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces
30
 * in there will break the build on some platforms.
31
 */
32
 
33
#include "matypes.h"
34
#include "xform_args.h"
35
 
36
	SEG_TEXT
37
 
38
#define FP_ONE		1065353216
39
#define FP_ZERO		0
40
 
41
#define SRC0		REGOFF(0, ESI)
42
#define SRC1		REGOFF(4, ESI)
43
#define SRC2		REGOFF(8, ESI)
44
#define SRC3		REGOFF(12, ESI)
45
#define DST0		REGOFF(0, EDI)
46
#define DST1		REGOFF(4, EDI)
47
#define DST2		REGOFF(8, EDI)
48
#define DST3		REGOFF(12, EDI)
49
#define MAT0		REGOFF(0, EDX)
50
#define MAT1		REGOFF(4, EDX)
51
#define MAT2		REGOFF(8, EDX)
52
#define MAT3		REGOFF(12, EDX)
53
#define MAT4		REGOFF(16, EDX)
54
#define MAT5		REGOFF(20, EDX)
55
#define MAT6		REGOFF(24, EDX)
56
#define MAT7		REGOFF(28, EDX)
57
#define MAT8		REGOFF(32, EDX)
58
#define MAT9		REGOFF(36, EDX)
59
#define MAT10		REGOFF(40, EDX)
60
#define MAT11		REGOFF(44, EDX)
61
#define MAT12		REGOFF(48, EDX)
62
#define MAT13		REGOFF(52, EDX)
63
#define MAT14		REGOFF(56, EDX)
64
#define MAT15		REGOFF(60, EDX)
65
 
66
 
67
ALIGNTEXT16
68
GLOBL GLNAME( _mesa_x86_transform_points2_general )
69
GLNAME( _mesa_x86_transform_points2_general ):
70
 
71
#define FRAME_OFFSET 8
72
	PUSH_L( ESI )
73
	PUSH_L( EDI )
74
 
75
	MOV_L( ARG_SOURCE, ESI )
76
	MOV_L( ARG_DEST, EDI )
77
 
78
	MOV_L( ARG_MATRIX, EDX )
79
	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
80
 
81
	TEST_L( ECX, ECX )
82
	JZ( LLBL(x86_p2_gr_done) )
83
 
84
	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
85
	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
86
 
87
	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
88
	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
89
 
90
	SHL_L( CONST(4), ECX )
91
	MOV_L( REGOFF(V4F_START, ESI), ESI )
92
 
93
	MOV_L( REGOFF(V4F_START, EDI), EDI )
94
	ADD_L( EDI, ECX )
95
 
96
ALIGNTEXT16
97
LLBL(x86_p2_gr_loop):
98
 
99
	FLD_S( SRC0 )			/* F4 */
100
	FMUL_S( MAT0 )
101
	FLD_S( SRC0 )			/* F5 F4 */
102
	FMUL_S( MAT1 )
103
	FLD_S( SRC0 )			/* F6 F5 F4 */
104
	FMUL_S( MAT2 )
105
	FLD_S( SRC0 )			/* F7 F6 F5 F4 */
106
	FMUL_S( MAT3 )
107
 
108
	FLD_S( SRC1 )			/* F0 F7 F6 F5 F4 */
109
	FMUL_S( MAT4 )
110
	FLD_S( SRC1 )			/* F1 F0 F7 F6 F5 F4 */
111
	FMUL_S( MAT5 )
112
	FLD_S( SRC1 )			/* F2 F1 F0 F7 F6 F5 F4 */
113
	FMUL_S( MAT6 )
114
	FLD_S( SRC1 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
115
	FMUL_S( MAT7 )
116
 
117
	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
118
	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
119
	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
120
	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
121
	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
122
	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
123
 
124
	FXCH( ST(3) )			/* F4 F6 F5 F7 */
125
	FADD_S( MAT12 )
126
	FXCH( ST(2) )			/* F5 F6 F4 F7 */
127
	FADD_S( MAT13 )
128
	FXCH( ST(1) )			/* F6 F5 F4 F7 */
129
	FADD_S( MAT14 )
130
	FXCH( ST(3) )			/* F7 F5 F4 F6 */
131
	FADD_S( MAT15 )
132
 
133
	FXCH( ST(2) )			/* F4 F5 F7 F6 */
134
	FSTP_S( DST0 )			/* F5 F7 F6 */
135
	FSTP_S( DST1 )			/* F7 F6 */
136
	FXCH( ST(1) )			/* F6 F7 */
137
	FSTP_S( DST2 )			/* F7 */
138
	FSTP_S( DST3 )			/* */
139
 
140
LLBL(x86_p2_gr_skip):
141
 
142
	ADD_L( CONST(16), EDI )
143
	ADD_L( EAX, ESI )
144
	CMP_L( ECX, EDI )
145
	JNE( LLBL(x86_p2_gr_loop) )
146
 
147
LLBL(x86_p2_gr_done):
148
 
149
	POP_L( EDI )
150
	POP_L( ESI )
151
	RET
152
#undef FRAME_OFFSET
153
 
154
 
155
 
156
 
157
ALIGNTEXT16
158
GLOBL GLNAME( _mesa_x86_transform_points2_perspective )
159
GLNAME( _mesa_x86_transform_points2_perspective ):
160
 
161
#define FRAME_OFFSET 12
162
	PUSH_L( ESI )
163
	PUSH_L( EDI )
164
	PUSH_L( EBX )
165
 
166
	MOV_L( ARG_SOURCE, ESI )
167
	MOV_L( ARG_DEST, EDI )
168
 
169
	MOV_L( ARG_MATRIX, EDX )
170
	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
171
 
172
	TEST_L( ECX, ECX )
173
	JZ( LLBL(x86_p2_pr_done) )
174
 
175
	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
176
	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
177
 
178
	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
179
	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
180
 
181
	SHL_L( CONST(4), ECX )
182
	MOV_L( REGOFF(V4F_START, ESI), ESI )
183
 
184
	MOV_L( REGOFF(V4F_START, EDI), EDI )
185
	ADD_L( EDI, ECX )
186
 
187
	MOV_L( MAT14, EBX )
188
 
189
ALIGNTEXT16
190
LLBL(x86_p2_pr_loop):
191
 
192
	FLD_S( SRC0 )			/* F4 */
193
	FMUL_S( MAT0 )
194
 
195
	FLD_S( SRC1 )			/* F1 F4 */
196
	FMUL_S( MAT5 )
197
 
198
	FXCH( ST(1) )			/* F4 F1 */
199
	FSTP_S( DST0   )		/* F1 */
200
	FSTP_S( DST1   )		/* */
201
	MOV_L( EBX, DST2 )
202
	MOV_L( CONST(FP_ZERO), DST3 )
203
 
204
LLBL(x86_p2_pr_skip):
205
 
206
	ADD_L( CONST(16), EDI )
207
	ADD_L( EAX, ESI )
208
	CMP_L( ECX, EDI )
209
	JNE( LLBL(x86_p2_pr_loop) )
210
 
211
LLBL(x86_p2_pr_done):
212
 
213
	POP_L( EBX )
214
	POP_L( EDI )
215
	POP_L( ESI )
216
	RET
217
#undef FRAME_OFFSET
218
 
219
 
220
 
221
 
222
ALIGNTEXT16
223
GLOBL GLNAME( _mesa_x86_transform_points2_3d )
224
GLNAME( _mesa_x86_transform_points2_3d ):
225
 
226
#define FRAME_OFFSET 8
227
	PUSH_L( ESI )
228
	PUSH_L( EDI )
229
 
230
	MOV_L( ARG_SOURCE, ESI )
231
	MOV_L( ARG_DEST, EDI )
232
 
233
	MOV_L( ARG_MATRIX, EDX )
234
	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
235
 
236
	TEST_L( ECX, ECX )
237
	JZ( LLBL(x86_p2_3dr_done) )
238
 
239
	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
240
	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
241
 
242
	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
243
	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
244
 
245
	SHL_L( CONST(4), ECX )
246
	MOV_L( REGOFF(V4F_START, ESI), ESI )
247
 
248
	MOV_L( REGOFF(V4F_START, EDI), EDI )
249
	ADD_L( EDI, ECX )
250
 
251
ALIGNTEXT16
252
LLBL(x86_p2_3dr_loop):
253
 
254
	FLD_S( SRC0 )			/* F4 */
255
	FMUL_S( MAT0 )
256
	FLD_S( SRC0 )			/* F5 F4 */
257
	FMUL_S( MAT1 )
258
	FLD_S( SRC0 )			/* F6 F5 F4 */
259
	FMUL_S( MAT2 )
260
 
261
	FLD_S( SRC1 )			/* F0 F6 F5 F4 */
262
	FMUL_S( MAT4 )
263
	FLD_S( SRC1 )			/* F1 F0 F6 F5 F4 */
264
	FMUL_S( MAT5 )
265
	FLD_S( SRC1 )			/* F2 F1 F0 F6 F5 F4 */
266
	FMUL_S( MAT6 )
267
 
268
	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
269
	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
270
	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
271
	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
272
 
273
	FXCH( ST(2) )			/* F4 F5 F6 */
274
	FADD_S( MAT12 )
275
	FXCH( ST(1) )			/* F5 F4 F6 */
276
	FADD_S( MAT13 )
277
	FXCH( ST(2) )			/* F6 F4 F5 */
278
	FADD_S( MAT14 )
279
 
280
	FXCH( ST(1) )			/* F4 F6 F5 */
281
	FSTP_S( DST0 )			/* F6 F5 */
282
	FXCH( ST(1) )			/* F5 F6 */
283
	FSTP_S( DST1 )			/* F6 */
284
	FSTP_S( DST2 )			/* */
285
 
286
LLBL(x86_p2_3dr_skip):
287
 
288
	ADD_L( CONST(16), EDI )
289
	ADD_L( EAX, ESI )
290
	CMP_L( ECX, EDI )
291
	JNE( LLBL(x86_p2_3dr_loop) )
292
 
293
LLBL(x86_p2_3dr_done):
294
 
295
	POP_L( EDI )
296
	POP_L( ESI )
297
	RET
298
#undef FRAME_OFFSET
299
 
300
 
301
 
302
 
303
ALIGNTEXT16
304
GLOBL GLNAME( _mesa_x86_transform_points2_3d_no_rot )
305
GLNAME( _mesa_x86_transform_points2_3d_no_rot ):
306
 
307
#define FRAME_OFFSET 12
308
	PUSH_L( ESI )
309
	PUSH_L( EDI )
310
	PUSH_L( EBX )
311
 
312
	MOV_L( ARG_SOURCE, ESI )
313
	MOV_L( ARG_DEST, EDI )
314
 
315
	MOV_L( ARG_MATRIX, EDX )
316
	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
317
 
318
	TEST_L( ECX, ECX )
319
	JZ( LLBL(x86_p2_3dnrr_done) )
320
 
321
	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
322
	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
323
 
324
	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
325
	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
326
 
327
	SHL_L( CONST(4), ECX )
328
	MOV_L( REGOFF(V4F_START, ESI), ESI )
329
 
330
	MOV_L( REGOFF(V4F_START, EDI), EDI )
331
	ADD_L( EDI, ECX )
332
 
333
	MOV_L( MAT14, EBX )
334
 
335
ALIGNTEXT16
336
LLBL(x86_p2_3dnrr_loop):
337
 
338
	FLD_S( SRC0 )			/* F4 */
339
	FMUL_S( MAT0 )
340
 
341
	FLD_S( SRC1 )			/* F1 F4 */
342
	FMUL_S( MAT5 )
343
 
344
	FXCH( ST(1) )			/* F4 F1 */
345
	FADD_S( MAT12 )
346
	FLD_S( MAT13 )		/* F5 F4 F1 */
347
	FXCH( ST(2) )			/* F1 F4 F5 */
348
	FADDP( ST0, ST(2) )		/* F4 F5 */
349
 
350
	FSTP_S( DST0 )		/* F5 */
351
	FSTP_S( DST1 )		/* */
352
	MOV_L( EBX, DST2 )
353
 
354
LLBL(x86_p2_3dnrr_skip):
355
 
356
	ADD_L( CONST(16), EDI )
357
	ADD_L( EAX, ESI )
358
	CMP_L( ECX, EDI )
359
	JNE( LLBL(x86_p2_3dnrr_loop) )
360
 
361
LLBL(x86_p2_3dnrr_done):
362
 
363
	POP_L( EBX )
364
	POP_L( EDI )
365
	POP_L( ESI )
366
	RET
367
#undef FRAME_OFFSET
368
 
369
 
370
 
371
 
372
ALIGNTEXT16
373
GLOBL GLNAME( _mesa_x86_transform_points2_2d )
374
GLNAME( _mesa_x86_transform_points2_2d ):
375
 
376
#define FRAME_OFFSET 8
377
	PUSH_L( ESI )
378
	PUSH_L( EDI )
379
 
380
	MOV_L( ARG_SOURCE, ESI )
381
	MOV_L( ARG_DEST, EDI )
382
 
383
	MOV_L( ARG_MATRIX, EDX )
384
	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
385
 
386
	TEST_L( ECX, ECX )
387
	JZ( LLBL(x86_p2_2dr_done) )
388
 
389
	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
390
	OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
391
 
392
	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
393
	MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
394
 
395
	SHL_L( CONST(4), ECX )
396
	MOV_L( REGOFF(V4F_START, ESI), ESI )
397
 
398
	MOV_L( REGOFF(V4F_START, EDI), EDI )
399
	ADD_L( EDI, ECX )
400
 
401
ALIGNTEXT16
402
LLBL(x86_p2_2dr_loop):
403
 
404
	FLD_S( SRC0 )			/* F4 */
405
	FMUL_S( MAT0 )
406
	FLD_S( SRC0 )			/* F5 F4 */
407
	FMUL_S( MAT1 )
408
 
409
	FLD_S( SRC1 )			/* F0 F5 F4 */
410
	FMUL_S( MAT4 )
411
	FLD_S( SRC1 )			/* F1 F0 F5 F4 */
412
	FMUL_S( MAT5 )
413
 
414
	FXCH( ST(1) )			/* F0 F1 F5 F4 */
415
	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
416
	FADDP( ST0, ST(1) )		/* F5 F4 */
417
 
418
	FXCH( ST(1) )			/* F4 F5 */
419
	FADD_S( MAT12 )
420
	FXCH( ST(1) )			/* F5 F4 */
421
	FADD_S( MAT13 )
422
 
423
	FXCH( ST(1) )			/* F4 F5 */
424
	FSTP_S( DST0 )		/* F5 */
425
	FSTP_S( DST1 )		/* */
426
 
427
LLBL(x86_p2_2dr_skip):
428
 
429
	ADD_L( CONST(16), EDI )
430
	ADD_L( EAX, ESI )
431
	CMP_L( ECX, EDI )
432
	JNE( LLBL(x86_p2_2dr_loop) )
433
 
434
LLBL(x86_p2_2dr_done):
435
 
436
	POP_L( EDI )
437
	POP_L( ESI )
438
	RET
439
#undef FRAME_OFFSET
440
 
441
 
442
 
443
 
444
ALIGNTEXT4
445
GLOBL GLNAME( _mesa_x86_transform_points2_2d_no_rot )
446
GLNAME( _mesa_x86_transform_points2_2d_no_rot ):
447
 
448
#define FRAME_OFFSET 8
449
	PUSH_L( ESI )
450
	PUSH_L( EDI )
451
 
452
	MOV_L( ARG_SOURCE, ESI )
453
	MOV_L( ARG_DEST, EDI )
454
 
455
	MOV_L( ARG_MATRIX, EDX )
456
	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
457
 
458
	TEST_L( ECX, ECX )
459
	JZ( LLBL(x86_p2_2dnrr_done) )
460
 
461
	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
462
	OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
463
 
464
	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
465
	MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
466
 
467
	SHL_L( CONST(4), ECX )
468
	MOV_L( REGOFF(V4F_START, ESI), ESI )
469
 
470
	MOV_L( REGOFF(V4F_START, EDI), EDI )
471
	ADD_L( EDI, ECX )
472
 
473
ALIGNTEXT16
474
LLBL(x86_p2_2dnrr_loop):
475
 
476
	FLD_S( SRC0 )			/* F4 */
477
	FMUL_S( MAT0 )
478
 
479
	FLD_S( SRC1 )			/* F1 F4 */
480
	FMUL_S( MAT5 )
481
 
482
	FXCH( ST(1) )			/* F4 F1 */
483
	FADD_S( MAT12 )
484
	FLD_S( MAT13 )		/* F5 F4 F1 */
485
	FXCH( ST(2) )			/* F1 F4 F5 */
486
	FADDP( ST0, ST(2) )		/* F4 F5 */
487
 
488
	FSTP_S( DST0   )		/* F5 */
489
	FSTP_S( DST1   )		/* */
490
 
491
LLBL(x86_p2_2dnrr_skip):
492
 
493
	ADD_L( CONST(16), EDI )
494
	ADD_L( EAX, ESI )
495
	CMP_L( ECX, EDI )
496
	JNE( LLBL(x86_p2_2dnrr_loop) )
497
 
498
LLBL(x86_p2_2dnrr_done):
499
 
500
	POP_L( EDI )
501
	POP_L( ESI )
502
	RET
503
#undef FRAME_OFFSET
504
 
505
 
506
 
507
 
508
ALIGNTEXT16
509
GLOBL GLNAME( _mesa_x86_transform_points2_identity )
510
GLNAME( _mesa_x86_transform_points2_identity ):
511
 
512
#define FRAME_OFFSET 12
513
	PUSH_L( ESI )
514
	PUSH_L( EDI )
515
	PUSH_L( EBX )
516
 
517
	MOV_L( ARG_SOURCE, ESI )
518
	MOV_L( ARG_DEST, EDI )
519
 
520
	MOV_L( ARG_MATRIX, EDX )
521
	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
522
 
523
	TEST_L( ECX, ECX )
524
	JZ( LLBL(x86_p2_ir_done) )
525
 
526
	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
527
	OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
528
 
529
	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
530
	MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
531
 
532
	SHL_L( CONST(4), ECX )
533
	MOV_L( REGOFF(V4F_START, ESI), ESI )
534
 
535
	MOV_L( REGOFF(V4F_START, EDI), EDI )
536
	ADD_L( EDI, ECX )
537
 
538
	CMP_L( ESI, EDI )
539
	JE( LLBL(x86_p2_ir_done) )
540
 
541
ALIGNTEXT16
542
LLBL(x86_p2_ir_loop):
543
 
544
	MOV_L( SRC0, EBX )
545
	MOV_L( SRC1, EDX )
546
 
547
	MOV_L( EBX, DST0 )
548
	MOV_L( EDX, DST1 )
549
 
550
LLBL(x86_p2_ir_skip):
551
 
552
	ADD_L( CONST(16), EDI )
553
	ADD_L( EAX, ESI )
554
	CMP_L( ECX, EDI )
555
	JNE( LLBL(x86_p2_ir_loop) )
556
 
557
LLBL(x86_p2_ir_done):
558
 
559
	POP_L( EBX )
560
	POP_L( EDI )
561
	POP_L( ESI )
562
	RET
563
#undef FRAME_OFFSET