Subversion Repositories shark

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
107 pj 1
/*
2
 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
3
 *
4
 * This program is free software; you can redistribute it and/or modify
5
 * it under the terms of the GNU General Public License as published by
6
 * the Free Software Foundation; either version 2 of the License, or
7
 * (at your option) any later version.
8
 *
9
 * This program is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
 * GNU General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU General Public License
15
 * along with this program; if not, write to the Free Software
16
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
 *
18
 */
19
 
20
/* fftw.h -- system-wide definitions */
21
/* $Id: fftw-int.h,v 1.1 2003-03-24 11:14:54 pj Exp $ */
22
 
23
#ifndef FFTW_INT_H
24
#define FFTW_INT_H
25
#include <conffftw.h>
26
#include <fftw.h>
27
 
28
#ifdef __cplusplus
29
extern "C" {
30
#else
31
#endif                          /* __cplusplus */
32
 
33
/****************************************************************************/
34
/*                            Private Functions                             */
35
/****************************************************************************/
36
 
37
extern fftw_twiddle *fftw_create_twiddle(int n, const fftw_codelet_desc *d);
38
extern void fftw_destroy_twiddle(fftw_twiddle *tw);
39
 
40
extern void fftw_strided_copy(int, fftw_complex *, int, fftw_complex *);
41
extern void fftw_executor_simple(int, const fftw_complex *, fftw_complex *,
42
                                 fftw_plan_node *, int, int);
43
 
44
extern fftwnd_plan fftwnd_create_plan_aux(int rank, const int *n,
45
                                          fftw_direction dir, int flags);
46
extern fftw_plan *fftwnd_new_plan_array(int rank);
47
extern fftw_plan *fftwnd_create_plans_generic(fftw_plan *plans,
48
                                              int rank, const int *n,
49
                                              fftw_direction dir, int flags);
50
extern fftw_plan *fftwnd_create_plans_specific(fftw_plan *plans,
51
                                               int rank, const int *n,
52
                                               const int *n_after,
53
                                               fftw_direction dir, int flags,
54
                                               fftw_complex *in, int istride,
55
                                               fftw_complex *out, int ostride);
56
extern int fftwnd_work_size(int rank, const int *n, int flags, int ncopies);
57
 
58
extern void fftwnd_aux(fftwnd_plan p, int cur_dim,
59
                       fftw_complex *in, int istride,
60
                       fftw_complex *out, int ostride,
61
                       fftw_complex *work);
62
extern void fftwnd_aux_howmany(fftwnd_plan p, int cur_dim,
63
                               int howmany,
64
                               fftw_complex *in, int istride, int idist,
65
                               fftw_complex *out, int ostride, int odist,
66
                               fftw_complex *work);
67
 
68
/* wisdom prototypes */
69
enum fftw_wisdom_category {
70
     FFTW_WISDOM, RFFTW_WISDOM
71
};
72
 
73
extern int fftw_wisdom_lookup(int n, int flags, fftw_direction dir,
74
                              enum fftw_wisdom_category category,
75
                              int istride, int ostride,
76
                              enum fftw_node_type *type,
77
                              int *signature, int replace_p);
78
extern void fftw_wisdom_add(int n, int flags, fftw_direction dir,
79
                            enum fftw_wisdom_category cat,
80
                            int istride, int ostride,
81
                            enum fftw_node_type type,
82
                            int signature);
83
 
84
/* Private planner functions: */
85
extern double fftw_estimate_node(fftw_plan_node *p);
86
extern fftw_plan_node *fftw_make_node_notw(int size,
87
                                        const fftw_codelet_desc *config);
88
extern fftw_plan_node *fftw_make_node_real2hc(int size,
89
                                        const fftw_codelet_desc *config);
90
extern fftw_plan_node *fftw_make_node_hc2real(int size,
91
                                        const fftw_codelet_desc *config);
92
extern fftw_plan_node *fftw_make_node_twiddle(int n,
93
                                         const fftw_codelet_desc *config,
94
                                              fftw_plan_node *recurse,
95
                                              int flags);
96
extern fftw_plan_node *fftw_make_node_hc2hc(int n,
97
                                            fftw_direction dir,
98
                                         const fftw_codelet_desc *config,
99
                                            fftw_plan_node *recurse,
100
                                            int flags);
101
extern fftw_plan_node *fftw_make_node_generic(int n, int size,
102
                                              fftw_generic_codelet *codelet,
103
                                              fftw_plan_node *recurse,
104
                                              int flags);
105
extern fftw_plan_node *fftw_make_node_rgeneric(int n, int size,
106
                                               fftw_direction dir,
107
                                               fftw_rgeneric_codelet * codelet,
108
                                               fftw_plan_node *recurse,
109
                                               int flags);
110
extern int fftw_factor(int n);
111
extern fftw_plan_node *fftw_make_node(void);
112
extern fftw_plan fftw_make_plan(int n, fftw_direction dir,
113
                                fftw_plan_node *root, int flags,
114
                                enum fftw_node_type wisdom_type,
115
                                int wisdom_signature);
116
extern void fftw_use_plan(fftw_plan p);
117
extern void fftw_use_node(fftw_plan_node *p);
118
extern void fftw_destroy_plan_internal(fftw_plan p);
119
extern fftw_plan fftw_pick_better(fftw_plan p1, fftw_plan p2);
120
extern fftw_plan fftw_lookup(fftw_plan *table, int n, int flags);
121
extern void fftw_insert(fftw_plan *table, fftw_plan this_plan, int n);
122
extern void fftw_make_empty_table(fftw_plan *table);
123
extern void fftw_destroy_table(fftw_plan *table);
124
extern void fftw_complete_twiddle(fftw_plan_node *p, int n);
125
 
126
extern fftw_plan_node *fftw_make_node_rader(int n, int size,
127
                                            fftw_direction dir,
128
                                            fftw_plan_node *recurse,
129
                                            int flags);
130
extern fftw_rader_data *fftw_rader_top;
131
 
132
/****************************************************************************/
133
/*                           Floating Point Types                           */
134
/****************************************************************************/
135
 
136
/*
137
 * We use these definitions to make it easier for people to change
138
 * FFTW to use long double and similar types. You shouldn't have to
139
 * change this just to use float or double.
140
 */
141
 
142
/*
143
 * Change this if your floating-point constants need to be expressed
144
 * in a special way.  For example, if fftw_real is long double, you
145
 * will need to append L to your fp constants to make them of the
146
 * same precision.  Do this by changing "x" below to "x##L".
147
 */
148
#define FFTW_KONST(x) ((fftw_real) x)
149
 
150
#define FFTW_TRIG_SIN sin
151
#define FFTW_TRIG_COS cos
152
typedef double FFTW_TRIG_REAL;  /* the argument type for sin and cos */
153
 
154
#define FFTW_K2PI FFTW_KONST(6.2831853071795864769252867665590057683943388)
155
 
156
/****************************************************************************/
157
/*                               gcc/x86 hacks                              */
158
/****************************************************************************/
159
 
160
/*
161
 * gcc 2.[78].x and x86 specific hacks.  These macros align the stack
162
 * pointer so that the double precision temporary variables in the
163
 * codelets will be aligned to a multiple of 8 bytes (*way* faster on
164
 * pentium and pentiumpro)
165
 */
166
#ifdef __GNUC__
167
#ifdef __i386__
168
#ifdef FFTW_ENABLE_I386_HACKS
169
#ifndef FFTW_ENABLE_FLOAT
170
#define FFTW_USING_I386_HACKS
171
#define HACK_ALIGN_STACK_EVEN() {                        \
172
     if ((((long) (__builtin_alloca(0))) & 0x7)) __builtin_alloca(4);    \
173
}
174
 
175
#define HACK_ALIGN_STACK_ODD() {                         \
176
     if (!(((long) (__builtin_alloca(0))) & 0x7)) __builtin_alloca(4);   \
177
}
178
 
179
#ifdef FFTW_DEBUG_ALIGNMENT
180
#define ASSERT_ALIGNED_DOUBLE() {                        \
181
     double __foo;                                       \
182
     if ((((long) &__foo) & 0x7)) abort();                \
183
}
184
#endif
185
 
186
#endif
187
#endif
188
#endif
189
#endif
190
 
191
#ifndef HACK_ALIGN_STACK_EVEN
192
#define HACK_ALIGN_STACK_EVEN()
193
#endif
194
#ifndef HACK_ALIGN_STACK_ODD
195
#define HACK_ALIGN_STACK_ODD()
196
#endif
197
#ifndef ASSERT_ALIGNED_DOUBLE
198
#define ASSERT_ALIGNED_DOUBLE()
199
#endif
200
 
201
/****************************************************************************/
202
/*                                  Timers                                  */
203
/****************************************************************************/
204
 
205
/*
206
 * Here, you can use all the nice timers available in your machine.
207
 */
208
 
209
/*
210
 *
211
 Things you should define to include your own clock:
212
 
213
 fftw_time -- the data type used to store a time
214
 
215
 extern fftw_time fftw_get_time(void);
216
 -- a function returning the current time.  (We have
217
 implemented this as a macro in most cases.)
218
 
219
 extern fftw_time fftw_time_diff(fftw_time t1, fftw_time t2);
220
 -- returns the time difference (t1 - t2).
221
 If t1 < t2, it may simply return zero (although this
222
 is not required).  (We have implemented this as a macro
223
 in most cases.)
224
 
225
 extern double fftw_time_to_sec(fftw_time t);
226
 -- returns the time t expressed in seconds, as a double.
227
 (Implemented as a macro in most cases.)
228
 
229
 FFTW_TIME_MIN -- a double-precision macro holding the minimum
230
 time interval (in seconds) for accurate time measurements.
231
 This should probably be at least 100 times the precision of
232
 your clock (we use even longer intervals, to be conservative).
233
 This will determine how long the planner takes to measure
234
 the speeds of different possible plans.
235
 
236
 Bracket all of your definitions with an appropriate #ifdef so that
237
 they will be enabled on your machine.  If you do add your own
238
 high-precision timer code, let us know (at fftw@theory.lcs.mit.edu).
239
 
240
 Only declarations should go in this file.  Any function definitions
241
 that you need should go into timer.c.
242
 */
243
 
244
/*
245
 * define a symbol so that we know that we have the fftw_time_diff
246
 * function/macro (it did not exist prior to FFTW 1.2)
247
 */
248
#define FFTW_HAS_TIME_DIFF
249
 
250
/**********************************************
251
 *              SOLARIS
252
 **********************************************/
253
#if defined(HAVE_GETHRTIME)
254
 
255
/* we use the nanosecond virtual timer */
256
#ifdef HAVE_SYS_TIME_H
257
#include <sys/time.h>
258
#endif
259
 
260
typedef hrtime_t fftw_time;
261
 
262
#define fftw_get_time() gethrtime()
263
#define fftw_time_diff(t1,t2) ((t1) - (t2))
264
#define fftw_time_to_sec(t) ((double) t / 1.0e9)
265
 
266
/*
267
 * a measurement is valid if it runs for at least
268
 * FFTW_TIME_MIN seconds.
269
 */
270
#define FFTW_TIME_MIN (1.0e-4)  /* for Solaris nanosecond timer */
271
#define FFTW_TIME_REPEAT 8
272
 
273
/**********************************************
274
 *        Pentium time stamp counter
275
 **********************************************/
276
#elif defined(__GNUC__) && defined(__i386__) && defined(FFTW_ENABLE_PENTIUM_TIMER)
277
 
278
/*
279
 * Use internal Pentium register (time stamp counter). Resolution
280
 * is 1/FFTW_CYCLES_PER_SEC seconds (e.g. 5 ns for Pentium 200 MHz).
281
 * (This code was contributed by Wolfgang Reimer)
282
 */
283
 
284
#ifndef FFTW_CYCLES_PER_SEC
285
#error "Must define FFTW_CYCLES_PER_SEC in fftw/config.h to use the Pentium cycle counter"
286
#endif
287
 
288
typedef unsigned long long fftw_time;
289
 
290
static __inline__ fftw_time read_tsc()
291
{
292
     struct {
293
          long unsigned lo, hi;
294
     } counter;
295
     long unsigned sav_eax, sav_edx;
296
   __asm__("movl %%eax,%0":"=m"(sav_eax));
297
   __asm__("movl %%edx,%0":"=m"(sav_edx));
298
     __asm__("rdtsc");
299
   __asm__("movl %%eax,%0":"=m"(counter.lo));
300
   __asm__("movl %%edx,%0":"=m"(counter.hi));
301
   __asm__("movl %0,%%eax": : "m"(sav_eax):"eax");
302
   __asm__("movl %0,%%edx": : "m"(sav_edx):"edx");
303
     return *(fftw_time *) & counter;
304
}
305
 
306
#define fftw_get_time()  read_tsc()
307
#define fftw_time_diff(t1,t2) ((t1) - (t2))
308
#define fftw_time_to_sec(t) (((double) (t)) / FFTW_CYCLES_PER_SEC)
309
#define FFTW_TIME_MIN (1.0e-4)  /* for Pentium TSC register */
310
 
311
/************* generic systems having gettimeofday ************/
312
#elif defined(HAVE_GETTIMEOFDAY) || defined(HAVE_BSDGETTIMEOFDAY)
313
#ifdef HAVE_SYS_TIME_H
314
#include <sys/time.h>
315
#endif
316
#ifdef HAVE_UNISTD_H
317
#include <unistd.h>
318
#endif
319
#define FFTW_USE_GETTIMEOFDAY
320
 
321
typedef struct timeval fftw_time;
322
 
323
extern fftw_time fftw_gettimeofday_get_time(void);
324
extern fftw_time fftw_gettimeofday_time_diff(fftw_time t1, fftw_time t2);
325
#define fftw_get_time() fftw_gettimeofday_get_time()
326
#define fftw_time_diff(t1, t2) fftw_gettimeofday_time_diff(t1, t2)
327
#define fftw_time_to_sec(t) ((double)(t).tv_sec + (double)(t).tv_usec * 1.0E-6)
328
 
329
#ifndef FFTW_TIME_MIN
330
/* this should be fine on any system claiming a microsecond timer */
331
#define FFTW_TIME_MIN (1.0e-2)
332
#endif
333
 
334
/**********************************************
335
 *              MACINTOSH
336
 **********************************************/
337
#elif defined(HAVE_MAC_TIMER)
338
 
339
/*
340
 * By default, use the microsecond-timer in the Mac Time Manager.
341
 * Alternatively, by changing the following #if 1 to #if 0, you
342
 * can use the nanosecond timer available *only* on PCI PowerMacs.
343
 */
344
#ifndef HAVE_MAC_PCI_TIMER      /* use time manager */
345
 
346
/*
347
 * Use Macintosh Time Manager routines (maximum resolution is about 20
348
 * microseconds).
349
 */
350
typedef struct fftw_time_struct {
351
     unsigned long hi, lo;
352
} fftw_time;
353
 
354
extern fftw_time get_Mac_microseconds(void);
355
 
356
#define fftw_get_time() get_Mac_microseconds()
357
 
358
/* define as a function instead of a macro: */
359
extern fftw_time fftw_time_diff(fftw_time t1, fftw_time t2);
360
 
361
#define fftw_time_to_sec(t) ((t).lo * 1.0e-6 + 4294967295.0e-6 * (t).hi)
362
 
363
/* very conservative, since timer should be accurate to 20e-6: */
364
/* (although this seems not to be the case in practice) */
365
#define FFTW_TIME_MIN (5.0e-2)  /* for MacOS Time Manager timer */
366
 
367
#else                           /* use nanosecond timer */
368
 
369
/* Use the nanosecond timer available on PCI PowerMacs. */
370
 
371
#include <DriverServices.h>
372
 
373
typedef AbsoluteTime fftw_time;
374
#define fftw_get_time() UpTime()
375
#define fftw_time_diff(t1,t2) SubAbsoluteFromAbsolute(t1,t2)
376
#define fftw_time_to_sec(t) (AbsoluteToNanoseconds(t).lo * 1.0e-9)
377
 
378
/* Extremely conservative minimum time: */
379
/* for MacOS PCI PowerMac nanosecond timer */
380
#define FFTW_TIME_MIN (5.0e-3)
381
 
382
#endif                          /* use nanosecond timer */
383
 
384
/**********************************************
385
 *              WINDOWS
386
 **********************************************/
387
#elif defined(HAVE_WIN32_TIMER)
388
 
389
#include <time.h>
390
 
391
typedef unsigned long fftw_time;
392
extern unsigned long GetPerfTime(void);
393
extern double GetPerfSec(double ticks);
394
 
395
#define fftw_get_time() GetPerfTime()
396
#define fftw_time_diff(t1,t2) ((t1) - (t2))
397
#define fftw_time_to_sec(t) GetPerfSec(t)
398
 
399
#define FFTW_TIME_MIN (5.0e-2)  /* for Win32 timer */
400
 
401
/**********************************************
402
 *              CRAY
403
 **********************************************/
404
#elif defined(_CRAYMPP)         /* Cray MPP system */
405
 
406
double SECONDR(void);           /*
407
                                 * I think you have to link with -lsci to
408
                                 * get this
409
                                 */
410
 
411
typedef double fftw_time;
412
#define fftw_get_time() SECONDR()
413
#define fftw_time_diff(t1,t2) ((t1) - (t2))
414
#define fftw_time_to_sec(t) (t)
415
 
416
#define FFTW_TIME_MIN (1.0e-1)  /* for Cray MPP SECONDR timer */
417
 
418
 
419
 
420
/**********************************************
421
 *              HARTIK
422
 **********************************************/
423
#elif defined(_HARTIK)          /* Hartik RT system */
424
 
425
#include <kernel/kern.h>
426
 
427
typedef  TIME fftw_time;
428
 
429
#define fftw_get_time() sys_gettime(NULL)
430
#define fftw_time_diff(t1,t2) ((t1) - (t2))
431
#define fftw_time_to_sec(t) ((double) t / 1.0e6)
432
 
433
/*
434
 * a measurement is valid if it runs for at least
435
 * FFTW_TIME_MIN seconds.
436
 */
437
#define FFTW_TIME_MIN (1.0e-6)
438
#define FFTW_TIME_REPEAT 8
439
 
440
/**********************************************
441
 *          VANILLA UNIX/ISO C SYSTEMS
442
 **********************************************/
443
/* last resort: use good old Unix clock() */
444
#else
445
 
446
#include <time.h>
447
 
448
typedef clock_t fftw_time;
449
 
450
#ifndef CLOCKS_PER_SEC
451
#ifdef sun
452
/* stupid sunos4 prototypes */
453
#define CLOCKS_PER_SEC 1000000
454
extern long clock(void);
455
#else                           /* not sun, we don't know CLOCKS_PER_SEC */
456
#error Please define CLOCKS_PER_SEC
457
#endif
458
#endif
459
 
460
#define fftw_get_time() clock()
461
#define fftw_time_diff(t1,t2) ((t1) - (t2))
462
#define fftw_time_to_sec(t) (((double) (t)) / CLOCKS_PER_SEC)
463
 
464
/*
465
 * ***VERY*** conservative constant: this says that a
466
 * measurement must run for 200ms in order to be valid.
467
 * You had better check the manual of your machine
468
 * to discover if it can do better than this
469
 */
470
#define FFTW_TIME_MIN (2.0e-1)  /* for default clock() timer */
471
 
472
#endif                          /* UNIX clock() */
473
 
474
/* take FFTW_TIME_REPEAT measurements... */
475
#ifndef FFTW_TIME_REPEAT
476
#define FFTW_TIME_REPEAT 4
477
#endif
478
 
479
/* but do not run for more than TIME_LIMIT seconds while measuring one FFT */
480
#ifndef FFTW_TIME_LIMIT
481
#define FFTW_TIME_LIMIT 2.0
482
#endif
483
 
484
#ifdef __cplusplus
485
}                               /* extern "C" */
486
 
487
#endif                          /* __cplusplus */
488
 
489
#endif                          /* FFTW_INT_H */