Rev 107 | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
2 | pj | 1 | /* |
2 | * Copyright (c) 1997-1999 Massachusetts Institute of Technology |
||
3 | * |
||
4 | * This program is free software; you can redistribute it and/or modify |
||
5 | * it under the terms of the GNU General Public License as published by |
||
6 | * the Free Software Foundation; either version 2 of the License, or |
||
7 | * (at your option) any later version. |
||
8 | * |
||
9 | * This program is distributed in the hope that it will be useful, |
||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
12 | * GNU General Public License for more details. |
||
13 | * |
||
14 | * You should have received a copy of the GNU General Public License |
||
15 | * along with this program; if not, write to the Free Software |
||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
||
17 | * |
||
18 | */ |
||
19 | |||
20 | /* |
||
21 | * planner.c -- find the optimal plan |
||
22 | */ |
||
23 | |||
107 | pj | 24 | /* $Id: planner.c,v 1.2 2003-03-24 11:14:54 pj Exp $ */ |
2 | pj | 25 | #ifdef FFTW_USING_CILK |
107 | pj | 26 | #include <cilk.h> |
27 | #include <cilk-compat.h> |
||
2 | pj | 28 | #endif |
29 | |||
107 | pj | 30 | #include <fftw-int.h> |
2 | pj | 31 | #include <stdlib.h> |
32 | //#include <ports/stdio.h> |
||
33 | |||
34 | extern fftw_generic_codelet fftw_twiddle_generic; |
||
35 | extern fftw_generic_codelet fftwi_twiddle_generic; |
||
36 | extern fftw_codelet_desc *fftw_config[]; |
||
37 | |||
38 | /* undocumented debugging hook */ |
||
39 | void (*fftw_plan_hook)(fftw_plan plan) = (void (*)(fftw_plan)) 0; |
||
40 | |||
41 | static void init_test_array(fftw_complex *arr, int stride, int n) |
||
42 | { |
||
43 | int j; |
||
44 | |||
45 | for (j = 0; j < n; ++j) { |
||
46 | c_re(arr[stride * j]) = 0.0; |
||
47 | c_im(arr[stride * j]) = 0.0; |
||
48 | } |
||
49 | } |
||
50 | |||
51 | /* |
||
52 | * The timer keeps doubling the number of iterations |
||
53 | * until the program runs for more than FFTW_TIME_MIN |
||
54 | */ |
||
55 | static double fftw_measure_runtime(fftw_plan plan, |
||
56 | fftw_complex *in, int istride, |
||
57 | fftw_complex *out, int ostride) |
||
58 | { |
||
59 | fftw_time begin, end, start; |
||
60 | double t, tmax, tmin; |
||
61 | int i, iter; |
||
62 | int n; |
||
63 | int repeat; |
||
64 | |||
65 | n = plan->n; |
||
66 | |||
67 | iter = 1; |
||
68 | |||
69 | for (;;) { |
||
70 | tmin = 1.0E10; |
||
71 | tmax = -1.0E10; |
||
72 | init_test_array(in, istride, n); |
||
73 | |||
74 | start = fftw_get_time(); |
||
75 | /* repeat the measurement FFTW_TIME_REPEAT times */ |
||
76 | for (repeat = 0; repeat < FFTW_TIME_REPEAT; ++repeat) { |
||
77 | begin = fftw_get_time(); |
||
78 | for (i = 0; i < iter; ++i) { |
||
79 | fftw(plan, 1, in, istride, 0, out, ostride, 0); |
||
80 | } |
||
81 | end = fftw_get_time(); |
||
82 | |||
83 | t = fftw_time_to_sec(fftw_time_diff(end, begin)); |
||
84 | if (t < tmin) |
||
85 | tmin = t; |
||
86 | if (t > tmax) |
||
87 | tmax = t; |
||
88 | |||
89 | /* do not run for too long */ |
||
90 | t = fftw_time_to_sec(fftw_time_diff(end, start)); |
||
91 | if (t > FFTW_TIME_LIMIT) |
||
92 | break; |
||
93 | } |
||
94 | |||
95 | if (tmin >= FFTW_TIME_MIN) |
||
96 | break; |
||
97 | |||
98 | iter *= 2; |
||
99 | } |
||
100 | |||
101 | tmin /= (double) iter; |
||
102 | tmax /= (double) iter; |
||
103 | |||
104 | return tmin; |
||
105 | } |
||
106 | |||
107 | /* auxiliary functions */ |
||
108 | static void compute_cost(fftw_plan plan, |
||
109 | fftw_complex *in, int istride, |
||
110 | fftw_complex *out, int ostride) |
||
111 | { |
||
112 | if (plan->flags & FFTW_MEASURE) |
||
113 | plan->cost = fftw_measure_runtime(plan, in, istride, out, ostride); |
||
114 | else { |
||
115 | double c; |
||
116 | c = plan->n * fftw_estimate_node(plan->root); |
||
117 | plan->cost = c; |
||
118 | } |
||
119 | } |
||
120 | |||
121 | static void run_plan_hooks(fftw_plan p) |
||
122 | { |
||
123 | if (fftw_plan_hook && p) { |
||
124 | fftw_complete_twiddle(p->root, p->n); |
||
125 | fftw_plan_hook(p); |
||
126 | } |
||
127 | } |
||
128 | |||
129 | |||
130 | /* macrology */ |
||
131 | #define FOR_ALL_CODELETS(p) \ |
||
132 | fftw_codelet_desc **__q, *p; \ |
||
133 | for (__q = &fftw_config[0]; (p = (*__q)); ++__q) |
||
134 | |||
135 | /****************************************** |
||
136 | * Recursive planner * |
||
137 | ******************************************/ |
||
138 | static fftw_plan planner(fftw_plan *table, int n, fftw_direction dir, int flags, |
||
139 | fftw_complex *, int, fftw_complex *, int); |
||
140 | |||
141 | /* |
||
142 | * the planner consists of two parts: one that tries to |
||
143 | * use accumulated wisdom, and one that does not. |
||
144 | * A small driver invokes both parts in sequence |
||
145 | */ |
||
146 | |||
147 | /* planner with wisdom: look up the codelet suggested by the wisdom */ |
||
148 | static fftw_plan planner_wisdom(fftw_plan *table, int n, |
||
149 | fftw_direction dir, int flags, |
||
150 | fftw_complex *in, int istride, |
||
151 | fftw_complex *out, int ostride) |
||
152 | { |
||
153 | fftw_plan best = (fftw_plan) 0; |
||
154 | fftw_plan_node *node; |
||
155 | int have_wisdom; |
||
156 | enum fftw_node_type wisdom_type; |
||
157 | int wisdom_signature; |
||
158 | |||
159 | /* see if we remember any wisdom for this case */ |
||
160 | have_wisdom = fftw_wisdom_lookup(n, flags, dir, FFTW_WISDOM, |
||
161 | istride, ostride, |
||
162 | &wisdom_type, &wisdom_signature, 0); |
||
163 | |||
164 | if (!have_wisdom) |
||
165 | return best; |
||
166 | |||
167 | if (wisdom_type == FFTW_NOTW) { |
||
168 | FOR_ALL_CODELETS(p) { |
||
169 | if (p->dir == dir && p->type == wisdom_type) { |
||
170 | /* see if wisdom applies */ |
||
171 | if (wisdom_signature == p->signature && |
||
172 | p->size == n) { |
||
173 | node = fftw_make_node_notw(n, p); |
||
174 | best = fftw_make_plan(n, dir, node, flags, |
||
175 | p->type, p->signature); |
||
176 | fftw_use_plan(best); |
||
177 | run_plan_hooks(best); |
||
178 | return best; |
||
179 | } |
||
180 | } |
||
181 | } |
||
182 | } |
||
183 | if (wisdom_type == FFTW_TWIDDLE) { |
||
184 | FOR_ALL_CODELETS(p) { |
||
185 | if (p->dir == dir && p->type == wisdom_type) { |
||
186 | |||
187 | /* see if wisdom applies */ |
||
188 | if (wisdom_signature == p->signature && |
||
189 | p->size > 1 && |
||
190 | (n % p->size) == 0) { |
||
191 | fftw_plan r = planner(table, n / p->size, dir, flags, |
||
192 | in, istride, out, ostride); |
||
193 | node = fftw_make_node_twiddle(n, p, |
||
194 | r->root, flags); |
||
195 | best = fftw_make_plan(n, dir, node, flags, |
||
196 | p->type, p->signature); |
||
197 | fftw_use_plan(best); |
||
198 | run_plan_hooks(best); |
||
199 | fftw_destroy_plan_internal(r); |
||
200 | return best; |
||
201 | } |
||
202 | } |
||
203 | } |
||
204 | } |
||
205 | /* |
||
206 | * BUG (or: TODO) Can we have generic wisdom? This is probably |
||
207 | * an academic question |
||
208 | */ |
||
209 | |||
210 | return best; |
||
211 | } |
||
212 | |||
213 | /* |
||
214 | * planner with no wisdom: try all combinations and pick |
||
215 | * the best |
||
216 | */ |
||
217 | static fftw_plan planner_normal(fftw_plan *table, int n, fftw_direction dir, |
||
218 | int flags, |
||
219 | fftw_complex *in, int istride, |
||
220 | fftw_complex *out, int ostride) |
||
221 | { |
||
222 | fftw_plan best = (fftw_plan) 0; |
||
223 | fftw_plan newplan; |
||
224 | fftw_plan_node *node; |
||
225 | |||
226 | /* see if we have any codelet that solves the problem */ |
||
227 | { |
||
228 | FOR_ALL_CODELETS(p) { |
||
229 | if (p->dir == dir && p->type == FFTW_NOTW) { |
||
230 | if (p->size == n) { |
||
231 | node = fftw_make_node_notw(n, p); |
||
232 | newplan = fftw_make_plan(n, dir, node, flags, |
||
233 | p->type, p->signature); |
||
234 | fftw_use_plan(newplan); |
||
235 | compute_cost(newplan, in, istride, out, ostride); |
||
236 | run_plan_hooks(newplan); |
||
237 | best = fftw_pick_better(newplan, best); |
||
238 | } |
||
239 | } |
||
240 | } |
||
241 | } |
||
242 | |||
243 | /* Then, try all available twiddle codelets */ |
||
244 | { |
||
245 | FOR_ALL_CODELETS(p) { |
||
246 | if (p->dir == dir && p->type == FFTW_TWIDDLE) { |
||
247 | if ((n % p->size) == 0 && |
||
248 | p->size > 1 && |
||
249 | (!best || n != p->size)) { |
||
250 | fftw_plan r = planner(table, n / p->size, dir, flags, |
||
251 | in, istride, out, ostride); |
||
252 | node = fftw_make_node_twiddle(n, p, |
||
253 | r->root, flags); |
||
254 | newplan = fftw_make_plan(n, dir, node, flags, |
||
255 | p->type, p->signature); |
||
256 | fftw_use_plan(newplan); |
||
257 | fftw_destroy_plan_internal(r); |
||
258 | compute_cost(newplan, in, istride, out, ostride); |
||
259 | run_plan_hooks(newplan); |
||
260 | best = fftw_pick_better(newplan, best); |
||
261 | } |
||
262 | } |
||
263 | } |
||
264 | } |
||
265 | |||
266 | /* |
||
267 | * resort to generic or rader codelets for unknown factors |
||
268 | */ |
||
269 | { |
||
270 | fftw_generic_codelet *codelet = (dir == FFTW_FORWARD ? |
||
271 | fftw_twiddle_generic : |
||
272 | fftwi_twiddle_generic); |
||
273 | int size, prev_size = 0, remaining_factors = n; |
||
274 | fftw_plan r; |
||
275 | |||
276 | while (remaining_factors > 1) { |
||
277 | size = fftw_factor(remaining_factors); |
||
278 | remaining_factors /= size; |
||
279 | |||
280 | /* don't try the same factor more than once */ |
||
281 | if (size == prev_size) |
||
282 | continue; |
||
283 | prev_size = size; |
||
284 | |||
285 | /* Look for codelets corresponding to this factor. */ |
||
286 | { |
||
287 | FOR_ALL_CODELETS(p) { |
||
288 | if (p->dir == dir && p->type == FFTW_TWIDDLE |
||
289 | && p->size == size) { |
||
290 | size = 0; |
||
291 | break; |
||
292 | } |
||
293 | } |
||
294 | } |
||
295 | |||
296 | /* |
||
297 | * only try a generic/rader codelet if there were no |
||
298 | * twiddle codelets for this factor |
||
299 | */ |
||
300 | if (!size) |
||
301 | continue; |
||
302 | |||
303 | r = planner(table, n / size, dir, flags, |
||
304 | in, istride, out, ostride); |
||
305 | |||
306 | /* Try Rader codelet: */ |
||
307 | node = fftw_make_node_rader(n, size, dir, r->root, flags); |
||
308 | newplan = fftw_make_plan(n, dir, node, flags, FFTW_RADER, 0); |
||
309 | fftw_use_plan(newplan); |
||
310 | compute_cost(newplan, in, istride, out, ostride); |
||
311 | run_plan_hooks(newplan); |
||
312 | best = fftw_pick_better(newplan, best); |
||
313 | |||
314 | if (size < 100) { /* |
||
315 | * only try generic for small |
||
316 | * sizes |
||
317 | */ |
||
318 | /* Try generic codelet: */ |
||
319 | node = fftw_make_node_generic(n, size, codelet, |
||
320 | r->root, flags); |
||
321 | newplan = fftw_make_plan(n, dir, node, flags, |
||
322 | FFTW_GENERIC, 0); |
||
323 | fftw_use_plan(newplan); |
||
324 | compute_cost(newplan, in, istride, out, ostride); |
||
325 | run_plan_hooks(newplan); |
||
326 | best = fftw_pick_better(newplan, best); |
||
327 | } |
||
328 | fftw_destroy_plan_internal(r); |
||
329 | } |
||
330 | } |
||
331 | |||
332 | if (!best) |
||
333 | fftw_die("bug in planner"); |
||
334 | |||
335 | return best; |
||
336 | } |
||
337 | |||
338 | static fftw_plan planner(fftw_plan *table, int n, fftw_direction dir, |
||
339 | int flags, |
||
340 | fftw_complex *in, int istride, |
||
341 | fftw_complex *out, int ostride) |
||
342 | { |
||
343 | fftw_plan best = (fftw_plan) 0; |
||
344 | |||
345 | /* see if plan has already been computed */ |
||
346 | best = fftw_lookup(table, n, flags); |
||
347 | if (best) { |
||
348 | fftw_use_plan(best); |
||
349 | return best; |
||
350 | } |
||
351 | /* try a wise plan */ |
||
352 | best = planner_wisdom(table, n, dir, flags, |
||
353 | in, istride, out, ostride); |
||
354 | |||
355 | if (!best) { |
||
356 | /* No wisdom. Plan normally. */ |
||
357 | best = planner_normal(table, n, dir, flags, |
||
358 | in, istride, out, ostride); |
||
359 | } |
||
360 | if (best) { |
||
361 | fftw_insert(table, best, n); |
||
362 | |||
363 | /* remember the wisdom */ |
||
364 | fftw_wisdom_add(n, flags, dir, FFTW_WISDOM, istride, ostride, |
||
365 | best->wisdom_type, |
||
366 | best->wisdom_signature); |
||
367 | } |
||
368 | return best; |
||
369 | } |
||
370 | |||
371 | fftw_plan fftw_create_plan_specific(int n, fftw_direction dir, int flags, |
||
372 | fftw_complex *in, int istride, |
||
373 | fftw_complex *out, int ostride) |
||
374 | { |
||
375 | fftw_plan table; |
||
376 | fftw_plan p1; |
||
377 | |||
378 | /* validate parameters */ |
||
379 | if (n <= 0) |
||
380 | return (fftw_plan) 0; |
||
381 | |||
382 | if ((dir != FFTW_FORWARD) && (dir != FFTW_BACKWARD)) |
||
383 | return (fftw_plan) 0; |
||
384 | |||
385 | fftw_make_empty_table(&table); |
||
386 | p1 = planner(&table, n, dir, flags, |
||
387 | in, istride, out, ostride); |
||
388 | fftw_destroy_table(&table); |
||
389 | |||
390 | fftw_complete_twiddle(p1->root, n); |
||
391 | return p1; |
||
392 | } |
||
393 | |||
394 | fftw_plan fftw_create_plan(int n, fftw_direction dir, int flags) |
||
395 | { |
||
396 | fftw_complex *tmp_in, *tmp_out; |
||
397 | fftw_plan p; |
||
398 | |||
399 | if (flags & FFTW_MEASURE) { |
||
400 | tmp_in = (fftw_complex *) fftw_malloc(2 * n * sizeof(fftw_complex)); |
||
401 | if (!tmp_in) |
||
402 | return 0; |
||
403 | tmp_out = tmp_in + n; |
||
404 | |||
405 | p = fftw_create_plan_specific(n, dir, flags, |
||
406 | tmp_in, 1, tmp_out, 1); |
||
407 | |||
408 | fftw_free(tmp_in); |
||
409 | } else |
||
410 | p = fftw_create_plan_specific(n, dir, flags, |
||
411 | (fftw_complex *) 0, 1, (fftw_complex *) 0, 1); |
||
412 | |||
413 | return p; |
||
414 | } |
||
415 | |||
416 | void fftw_destroy_plan(fftw_plan plan) |
||
417 | { |
||
418 | fftw_destroy_plan_internal(plan); |
||
419 | } |