Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
2 | pj | 1 | /* |
2 | * Copyright (c) 1997-1999 Massachusetts Institute of Technology |
||
3 | * |
||
4 | * This program is free software; you can redistribute it and/or modify |
||
5 | * it under the terms of the GNU General Public License as published by |
||
6 | * the Free Software Foundation; either version 2 of the License, or |
||
7 | * (at your option) any later version. |
||
8 | * |
||
9 | * This program is distributed in the hope that it will be useful, |
||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
12 | * GNU General Public License for more details. |
||
13 | * |
||
14 | * You should have received a copy of the GNU General Public License |
||
15 | * along with this program; if not, write to the Free Software |
||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
||
17 | * |
||
18 | */ |
||
19 | |||
20 | /* fftw.h -- system-wide definitions */ |
||
21 | /* $Id: fftw-int.h,v 1.1.1.1 2002-03-29 14:12:51 pj Exp $ */ |
||
22 | |||
23 | #ifndef FFTW_INT_H |
||
24 | #define FFTW_INT_H |
||
25 | #include <ports/conffftw.h> |
||
26 | #include <ports/fftw.h> |
||
27 | |||
28 | #ifdef __cplusplus |
||
29 | extern "C" { |
||
30 | #else |
||
31 | #endif /* __cplusplus */ |
||
32 | |||
33 | /****************************************************************************/ |
||
34 | /* Private Functions */ |
||
35 | /****************************************************************************/ |
||
36 | |||
37 | extern fftw_twiddle *fftw_create_twiddle(int n, const fftw_codelet_desc *d); |
||
38 | extern void fftw_destroy_twiddle(fftw_twiddle *tw); |
||
39 | |||
40 | extern void fftw_strided_copy(int, fftw_complex *, int, fftw_complex *); |
||
41 | extern void fftw_executor_simple(int, const fftw_complex *, fftw_complex *, |
||
42 | fftw_plan_node *, int, int); |
||
43 | |||
44 | extern fftwnd_plan fftwnd_create_plan_aux(int rank, const int *n, |
||
45 | fftw_direction dir, int flags); |
||
46 | extern fftw_plan *fftwnd_new_plan_array(int rank); |
||
47 | extern fftw_plan *fftwnd_create_plans_generic(fftw_plan *plans, |
||
48 | int rank, const int *n, |
||
49 | fftw_direction dir, int flags); |
||
50 | extern fftw_plan *fftwnd_create_plans_specific(fftw_plan *plans, |
||
51 | int rank, const int *n, |
||
52 | const int *n_after, |
||
53 | fftw_direction dir, int flags, |
||
54 | fftw_complex *in, int istride, |
||
55 | fftw_complex *out, int ostride); |
||
56 | extern int fftwnd_work_size(int rank, const int *n, int flags, int ncopies); |
||
57 | |||
58 | extern void fftwnd_aux(fftwnd_plan p, int cur_dim, |
||
59 | fftw_complex *in, int istride, |
||
60 | fftw_complex *out, int ostride, |
||
61 | fftw_complex *work); |
||
62 | extern void fftwnd_aux_howmany(fftwnd_plan p, int cur_dim, |
||
63 | int howmany, |
||
64 | fftw_complex *in, int istride, int idist, |
||
65 | fftw_complex *out, int ostride, int odist, |
||
66 | fftw_complex *work); |
||
67 | |||
68 | /* wisdom prototypes */ |
||
69 | enum fftw_wisdom_category { |
||
70 | FFTW_WISDOM, RFFTW_WISDOM |
||
71 | }; |
||
72 | |||
73 | extern int fftw_wisdom_lookup(int n, int flags, fftw_direction dir, |
||
74 | enum fftw_wisdom_category category, |
||
75 | int istride, int ostride, |
||
76 | enum fftw_node_type *type, |
||
77 | int *signature, int replace_p); |
||
78 | extern void fftw_wisdom_add(int n, int flags, fftw_direction dir, |
||
79 | enum fftw_wisdom_category cat, |
||
80 | int istride, int ostride, |
||
81 | enum fftw_node_type type, |
||
82 | int signature); |
||
83 | |||
84 | /* Private planner functions: */ |
||
85 | extern double fftw_estimate_node(fftw_plan_node *p); |
||
86 | extern fftw_plan_node *fftw_make_node_notw(int size, |
||
87 | const fftw_codelet_desc *config); |
||
88 | extern fftw_plan_node *fftw_make_node_real2hc(int size, |
||
89 | const fftw_codelet_desc *config); |
||
90 | extern fftw_plan_node *fftw_make_node_hc2real(int size, |
||
91 | const fftw_codelet_desc *config); |
||
92 | extern fftw_plan_node *fftw_make_node_twiddle(int n, |
||
93 | const fftw_codelet_desc *config, |
||
94 | fftw_plan_node *recurse, |
||
95 | int flags); |
||
96 | extern fftw_plan_node *fftw_make_node_hc2hc(int n, |
||
97 | fftw_direction dir, |
||
98 | const fftw_codelet_desc *config, |
||
99 | fftw_plan_node *recurse, |
||
100 | int flags); |
||
101 | extern fftw_plan_node *fftw_make_node_generic(int n, int size, |
||
102 | fftw_generic_codelet *codelet, |
||
103 | fftw_plan_node *recurse, |
||
104 | int flags); |
||
105 | extern fftw_plan_node *fftw_make_node_rgeneric(int n, int size, |
||
106 | fftw_direction dir, |
||
107 | fftw_rgeneric_codelet * codelet, |
||
108 | fftw_plan_node *recurse, |
||
109 | int flags); |
||
110 | extern int fftw_factor(int n); |
||
111 | extern fftw_plan_node *fftw_make_node(void); |
||
112 | extern fftw_plan fftw_make_plan(int n, fftw_direction dir, |
||
113 | fftw_plan_node *root, int flags, |
||
114 | enum fftw_node_type wisdom_type, |
||
115 | int wisdom_signature); |
||
116 | extern void fftw_use_plan(fftw_plan p); |
||
117 | extern void fftw_use_node(fftw_plan_node *p); |
||
118 | extern void fftw_destroy_plan_internal(fftw_plan p); |
||
119 | extern fftw_plan fftw_pick_better(fftw_plan p1, fftw_plan p2); |
||
120 | extern fftw_plan fftw_lookup(fftw_plan *table, int n, int flags); |
||
121 | extern void fftw_insert(fftw_plan *table, fftw_plan this_plan, int n); |
||
122 | extern void fftw_make_empty_table(fftw_plan *table); |
||
123 | extern void fftw_destroy_table(fftw_plan *table); |
||
124 | extern void fftw_complete_twiddle(fftw_plan_node *p, int n); |
||
125 | |||
126 | extern fftw_plan_node *fftw_make_node_rader(int n, int size, |
||
127 | fftw_direction dir, |
||
128 | fftw_plan_node *recurse, |
||
129 | int flags); |
||
130 | extern fftw_rader_data *fftw_rader_top; |
||
131 | |||
132 | /****************************************************************************/ |
||
133 | /* Floating Point Types */ |
||
134 | /****************************************************************************/ |
||
135 | |||
136 | /* |
||
137 | * We use these definitions to make it easier for people to change |
||
138 | * FFTW to use long double and similar types. You shouldn't have to |
||
139 | * change this just to use float or double. |
||
140 | */ |
||
141 | |||
142 | /* |
||
143 | * Change this if your floating-point constants need to be expressed |
||
144 | * in a special way. For example, if fftw_real is long double, you |
||
145 | * will need to append L to your fp constants to make them of the |
||
146 | * same precision. Do this by changing "x" below to "x##L". |
||
147 | */ |
||
148 | #define FFTW_KONST(x) ((fftw_real) x) |
||
149 | |||
150 | #define FFTW_TRIG_SIN sin |
||
151 | #define FFTW_TRIG_COS cos |
||
152 | typedef double FFTW_TRIG_REAL; /* the argument type for sin and cos */ |
||
153 | |||
154 | #define FFTW_K2PI FFTW_KONST(6.2831853071795864769252867665590057683943388) |
||
155 | |||
156 | /****************************************************************************/ |
||
157 | /* gcc/x86 hacks */ |
||
158 | /****************************************************************************/ |
||
159 | |||
160 | /* |
||
161 | * gcc 2.[78].x and x86 specific hacks. These macros align the stack |
||
162 | * pointer so that the double precision temporary variables in the |
||
163 | * codelets will be aligned to a multiple of 8 bytes (*way* faster on |
||
164 | * pentium and pentiumpro) |
||
165 | */ |
||
166 | #ifdef __GNUC__ |
||
167 | #ifdef __i386__ |
||
168 | #ifdef FFTW_ENABLE_I386_HACKS |
||
169 | #ifndef FFTW_ENABLE_FLOAT |
||
170 | #define FFTW_USING_I386_HACKS |
||
171 | #define HACK_ALIGN_STACK_EVEN() { \ |
||
172 | if ((((long) (__builtin_alloca(0))) & 0x7)) __builtin_alloca(4); \ |
||
173 | } |
||
174 | |||
175 | #define HACK_ALIGN_STACK_ODD() { \ |
||
176 | if (!(((long) (__builtin_alloca(0))) & 0x7)) __builtin_alloca(4); \ |
||
177 | } |
||
178 | |||
179 | #ifdef FFTW_DEBUG_ALIGNMENT |
||
180 | #define ASSERT_ALIGNED_DOUBLE() { \ |
||
181 | double __foo; \ |
||
182 | if ((((long) &__foo) & 0x7)) abort(); \ |
||
183 | } |
||
184 | #endif |
||
185 | |||
186 | #endif |
||
187 | #endif |
||
188 | #endif |
||
189 | #endif |
||
190 | |||
191 | #ifndef HACK_ALIGN_STACK_EVEN |
||
192 | #define HACK_ALIGN_STACK_EVEN() |
||
193 | #endif |
||
194 | #ifndef HACK_ALIGN_STACK_ODD |
||
195 | #define HACK_ALIGN_STACK_ODD() |
||
196 | #endif |
||
197 | #ifndef ASSERT_ALIGNED_DOUBLE |
||
198 | #define ASSERT_ALIGNED_DOUBLE() |
||
199 | #endif |
||
200 | |||
201 | /****************************************************************************/ |
||
202 | /* Timers */ |
||
203 | /****************************************************************************/ |
||
204 | |||
205 | /* |
||
206 | * Here, you can use all the nice timers available in your machine. |
||
207 | */ |
||
208 | |||
209 | /* |
||
210 | * |
||
211 | Things you should define to include your own clock: |
||
212 | |||
213 | fftw_time -- the data type used to store a time |
||
214 | |||
215 | extern fftw_time fftw_get_time(void); |
||
216 | -- a function returning the current time. (We have |
||
217 | implemented this as a macro in most cases.) |
||
218 | |||
219 | extern fftw_time fftw_time_diff(fftw_time t1, fftw_time t2); |
||
220 | -- returns the time difference (t1 - t2). |
||
221 | If t1 < t2, it may simply return zero (although this |
||
222 | is not required). (We have implemented this as a macro |
||
223 | in most cases.) |
||
224 | |||
225 | extern double fftw_time_to_sec(fftw_time t); |
||
226 | -- returns the time t expressed in seconds, as a double. |
||
227 | (Implemented as a macro in most cases.) |
||
228 | |||
229 | FFTW_TIME_MIN -- a double-precision macro holding the minimum |
||
230 | time interval (in seconds) for accurate time measurements. |
||
231 | This should probably be at least 100 times the precision of |
||
232 | your clock (we use even longer intervals, to be conservative). |
||
233 | This will determine how long the planner takes to measure |
||
234 | the speeds of different possible plans. |
||
235 | |||
236 | Bracket all of your definitions with an appropriate #ifdef so that |
||
237 | they will be enabled on your machine. If you do add your own |
||
238 | high-precision timer code, let us know (at fftw@theory.lcs.mit.edu). |
||
239 | |||
240 | Only declarations should go in this file. Any function definitions |
||
241 | that you need should go into timer.c. |
||
242 | */ |
||
243 | |||
244 | /* |
||
245 | * define a symbol so that we know that we have the fftw_time_diff |
||
246 | * function/macro (it did not exist prior to FFTW 1.2) |
||
247 | */ |
||
248 | #define FFTW_HAS_TIME_DIFF |
||
249 | |||
250 | /********************************************** |
||
251 | * SOLARIS |
||
252 | **********************************************/ |
||
253 | #if defined(HAVE_GETHRTIME) |
||
254 | |||
255 | /* we use the nanosecond virtual timer */ |
||
256 | #ifdef HAVE_SYS_TIME_H |
||
257 | #include <sys/time.h> |
||
258 | #endif |
||
259 | |||
260 | typedef hrtime_t fftw_time; |
||
261 | |||
262 | #define fftw_get_time() gethrtime() |
||
263 | #define fftw_time_diff(t1,t2) ((t1) - (t2)) |
||
264 | #define fftw_time_to_sec(t) ((double) t / 1.0e9) |
||
265 | |||
266 | /* |
||
267 | * a measurement is valid if it runs for at least |
||
268 | * FFTW_TIME_MIN seconds. |
||
269 | */ |
||
270 | #define FFTW_TIME_MIN (1.0e-4) /* for Solaris nanosecond timer */ |
||
271 | #define FFTW_TIME_REPEAT 8 |
||
272 | |||
273 | /********************************************** |
||
274 | * Pentium time stamp counter |
||
275 | **********************************************/ |
||
276 | #elif defined(__GNUC__) && defined(__i386__) && defined(FFTW_ENABLE_PENTIUM_TIMER) |
||
277 | |||
278 | /* |
||
279 | * Use internal Pentium register (time stamp counter). Resolution |
||
280 | * is 1/FFTW_CYCLES_PER_SEC seconds (e.g. 5 ns for Pentium 200 MHz). |
||
281 | * (This code was contributed by Wolfgang Reimer) |
||
282 | */ |
||
283 | |||
284 | #ifndef FFTW_CYCLES_PER_SEC |
||
285 | #error "Must define FFTW_CYCLES_PER_SEC in fftw/config.h to use the Pentium cycle counter" |
||
286 | #endif |
||
287 | |||
288 | typedef unsigned long long fftw_time; |
||
289 | |||
290 | static __inline__ fftw_time read_tsc() |
||
291 | { |
||
292 | struct { |
||
293 | long unsigned lo, hi; |
||
294 | } counter; |
||
295 | long unsigned sav_eax, sav_edx; |
||
296 | __asm__("movl %%eax,%0":"=m"(sav_eax)); |
||
297 | __asm__("movl %%edx,%0":"=m"(sav_edx)); |
||
298 | __asm__("rdtsc"); |
||
299 | __asm__("movl %%eax,%0":"=m"(counter.lo)); |
||
300 | __asm__("movl %%edx,%0":"=m"(counter.hi)); |
||
301 | __asm__("movl %0,%%eax": : "m"(sav_eax):"eax"); |
||
302 | __asm__("movl %0,%%edx": : "m"(sav_edx):"edx"); |
||
303 | return *(fftw_time *) & counter; |
||
304 | } |
||
305 | |||
306 | #define fftw_get_time() read_tsc() |
||
307 | #define fftw_time_diff(t1,t2) ((t1) - (t2)) |
||
308 | #define fftw_time_to_sec(t) (((double) (t)) / FFTW_CYCLES_PER_SEC) |
||
309 | #define FFTW_TIME_MIN (1.0e-4) /* for Pentium TSC register */ |
||
310 | |||
311 | /************* generic systems having gettimeofday ************/ |
||
312 | #elif defined(HAVE_GETTIMEOFDAY) || defined(HAVE_BSDGETTIMEOFDAY) |
||
313 | #ifdef HAVE_SYS_TIME_H |
||
314 | #include <sys/time.h> |
||
315 | #endif |
||
316 | #ifdef HAVE_UNISTD_H |
||
317 | #include <unistd.h> |
||
318 | #endif |
||
319 | #define FFTW_USE_GETTIMEOFDAY |
||
320 | |||
321 | typedef struct timeval fftw_time; |
||
322 | |||
323 | extern fftw_time fftw_gettimeofday_get_time(void); |
||
324 | extern fftw_time fftw_gettimeofday_time_diff(fftw_time t1, fftw_time t2); |
||
325 | #define fftw_get_time() fftw_gettimeofday_get_time() |
||
326 | #define fftw_time_diff(t1, t2) fftw_gettimeofday_time_diff(t1, t2) |
||
327 | #define fftw_time_to_sec(t) ((double)(t).tv_sec + (double)(t).tv_usec * 1.0E-6) |
||
328 | |||
329 | #ifndef FFTW_TIME_MIN |
||
330 | /* this should be fine on any system claiming a microsecond timer */ |
||
331 | #define FFTW_TIME_MIN (1.0e-2) |
||
332 | #endif |
||
333 | |||
334 | /********************************************** |
||
335 | * MACINTOSH |
||
336 | **********************************************/ |
||
337 | #elif defined(HAVE_MAC_TIMER) |
||
338 | |||
339 | /* |
||
340 | * By default, use the microsecond-timer in the Mac Time Manager. |
||
341 | * Alternatively, by changing the following #if 1 to #if 0, you |
||
342 | * can use the nanosecond timer available *only* on PCI PowerMacs. |
||
343 | */ |
||
344 | #ifndef HAVE_MAC_PCI_TIMER /* use time manager */ |
||
345 | |||
346 | /* |
||
347 | * Use Macintosh Time Manager routines (maximum resolution is about 20 |
||
348 | * microseconds). |
||
349 | */ |
||
350 | typedef struct fftw_time_struct { |
||
351 | unsigned long hi, lo; |
||
352 | } fftw_time; |
||
353 | |||
354 | extern fftw_time get_Mac_microseconds(void); |
||
355 | |||
356 | #define fftw_get_time() get_Mac_microseconds() |
||
357 | |||
358 | /* define as a function instead of a macro: */ |
||
359 | extern fftw_time fftw_time_diff(fftw_time t1, fftw_time t2); |
||
360 | |||
361 | #define fftw_time_to_sec(t) ((t).lo * 1.0e-6 + 4294967295.0e-6 * (t).hi) |
||
362 | |||
363 | /* very conservative, since timer should be accurate to 20e-6: */ |
||
364 | /* (although this seems not to be the case in practice) */ |
||
365 | #define FFTW_TIME_MIN (5.0e-2) /* for MacOS Time Manager timer */ |
||
366 | |||
367 | #else /* use nanosecond timer */ |
||
368 | |||
369 | /* Use the nanosecond timer available on PCI PowerMacs. */ |
||
370 | |||
371 | #include <DriverServices.h> |
||
372 | |||
373 | typedef AbsoluteTime fftw_time; |
||
374 | #define fftw_get_time() UpTime() |
||
375 | #define fftw_time_diff(t1,t2) SubAbsoluteFromAbsolute(t1,t2) |
||
376 | #define fftw_time_to_sec(t) (AbsoluteToNanoseconds(t).lo * 1.0e-9) |
||
377 | |||
378 | /* Extremely conservative minimum time: */ |
||
379 | /* for MacOS PCI PowerMac nanosecond timer */ |
||
380 | #define FFTW_TIME_MIN (5.0e-3) |
||
381 | |||
382 | #endif /* use nanosecond timer */ |
||
383 | |||
384 | /********************************************** |
||
385 | * WINDOWS |
||
386 | **********************************************/ |
||
387 | #elif defined(HAVE_WIN32_TIMER) |
||
388 | |||
389 | #include <time.h> |
||
390 | |||
391 | typedef unsigned long fftw_time; |
||
392 | extern unsigned long GetPerfTime(void); |
||
393 | extern double GetPerfSec(double ticks); |
||
394 | |||
395 | #define fftw_get_time() GetPerfTime() |
||
396 | #define fftw_time_diff(t1,t2) ((t1) - (t2)) |
||
397 | #define fftw_time_to_sec(t) GetPerfSec(t) |
||
398 | |||
399 | #define FFTW_TIME_MIN (5.0e-2) /* for Win32 timer */ |
||
400 | |||
401 | /********************************************** |
||
402 | * CRAY |
||
403 | **********************************************/ |
||
404 | #elif defined(_CRAYMPP) /* Cray MPP system */ |
||
405 | |||
406 | double SECONDR(void); /* |
||
407 | * I think you have to link with -lsci to |
||
408 | * get this |
||
409 | */ |
||
410 | |||
411 | typedef double fftw_time; |
||
412 | #define fftw_get_time() SECONDR() |
||
413 | #define fftw_time_diff(t1,t2) ((t1) - (t2)) |
||
414 | #define fftw_time_to_sec(t) (t) |
||
415 | |||
416 | #define FFTW_TIME_MIN (1.0e-1) /* for Cray MPP SECONDR timer */ |
||
417 | |||
418 | |||
419 | |||
420 | /********************************************** |
||
421 | * HARTIK |
||
422 | **********************************************/ |
||
423 | #elif defined(_HARTIK) /* Hartik RT system */ |
||
424 | |||
425 | #include <kernel/kern.h> |
||
426 | |||
427 | typedef TIME fftw_time; |
||
428 | |||
429 | #define fftw_get_time() sys_gettime(NULL) |
||
430 | #define fftw_time_diff(t1,t2) ((t1) - (t2)) |
||
431 | #define fftw_time_to_sec(t) ((double) t / 1.0e6) |
||
432 | |||
433 | /* |
||
434 | * a measurement is valid if it runs for at least |
||
435 | * FFTW_TIME_MIN seconds. |
||
436 | */ |
||
437 | #define FFTW_TIME_MIN (1.0e-6) |
||
438 | #define FFTW_TIME_REPEAT 8 |
||
439 | |||
440 | /********************************************** |
||
441 | * VANILLA UNIX/ISO C SYSTEMS |
||
442 | **********************************************/ |
||
443 | /* last resort: use good old Unix clock() */ |
||
444 | #else |
||
445 | |||
446 | #include <time.h> |
||
447 | |||
448 | typedef clock_t fftw_time; |
||
449 | |||
450 | #ifndef CLOCKS_PER_SEC |
||
451 | #ifdef sun |
||
452 | /* stupid sunos4 prototypes */ |
||
453 | #define CLOCKS_PER_SEC 1000000 |
||
454 | extern long clock(void); |
||
455 | #else /* not sun, we don't know CLOCKS_PER_SEC */ |
||
456 | #error Please define CLOCKS_PER_SEC |
||
457 | #endif |
||
458 | #endif |
||
459 | |||
460 | #define fftw_get_time() clock() |
||
461 | #define fftw_time_diff(t1,t2) ((t1) - (t2)) |
||
462 | #define fftw_time_to_sec(t) (((double) (t)) / CLOCKS_PER_SEC) |
||
463 | |||
464 | /* |
||
465 | * ***VERY*** conservative constant: this says that a |
||
466 | * measurement must run for 200ms in order to be valid. |
||
467 | * You had better check the manual of your machine |
||
468 | * to discover if it can do better than this |
||
469 | */ |
||
470 | #define FFTW_TIME_MIN (2.0e-1) /* for default clock() timer */ |
||
471 | |||
472 | #endif /* UNIX clock() */ |
||
473 | |||
474 | /* take FFTW_TIME_REPEAT measurements... */ |
||
475 | #ifndef FFTW_TIME_REPEAT |
||
476 | #define FFTW_TIME_REPEAT 4 |
||
477 | #endif |
||
478 | |||
479 | /* but do not run for more than TIME_LIMIT seconds while measuring one FFT */ |
||
480 | #ifndef FFTW_TIME_LIMIT |
||
481 | #define FFTW_TIME_LIMIT 2.0 |
||
482 | #endif |
||
483 | |||
484 | #ifdef __cplusplus |
||
485 | } /* extern "C" */ |
||
486 | |||
487 | #endif /* __cplusplus */ |
||
488 | |||
489 | #endif /* FFTW_INT_H */ |