Rev 3 | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
2 | pj | 1 | /* |
2 | * Copyright (c) 1997-1999 Massachusetts Institute of Technology |
||
3 | * |
||
4 | * This program is free software; you can redistribute it and/or modify |
||
5 | * it under the terms of the GNU General Public License as published by |
||
6 | * the Free Software Foundation; either version 2 of the License, or |
||
7 | * (at your option) any later version. |
||
8 | * |
||
9 | * This program is distributed in the hope that it will be useful, |
||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
12 | * GNU General Public License for more details. |
||
13 | * |
||
14 | * You should have received a copy of the GNU General Public License |
||
15 | * along with this program; if not, write to the Free Software |
||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
||
17 | * |
||
18 | */ |
||
19 | |||
20 | /* |
||
21 | * fftw_test.c : test program for complex-complex transforms |
||
22 | */ |
||
23 | |||
24 | /* $Id: fftw_tes.c,v 1.1.1.1 2002-03-29 14:12:59 pj Exp $ */ |
||
25 | #include <fftw-int.h> |
||
26 | #include <stdlib.h> |
||
27 | #include <stdio.h> |
||
28 | #include <string.h> |
||
29 | #include <math.h> |
||
30 | #include <time.h> |
||
31 | |||
32 | #include "test_main.h" |
||
33 | |||
34 | char fftw_prefix[] = "fftw"; |
||
35 | |||
36 | void test_ergun(int n, fftw_direction dir, fftw_plan plan); |
||
37 | extern void (*fftw_plan_hook)(fftw_plan plan); |
||
38 | |||
39 | /************************************************* |
||
40 | * Speed tests |
||
41 | *************************************************/ |
||
42 | |||
43 | void zero_arr(int n, fftw_complex *a) |
||
44 | { |
||
45 | int i; |
||
46 | for (i = 0; i < n; ++i) |
||
47 | c_re(a[i]) = c_im(a[i]) = 0.0; |
||
48 | } |
||
49 | |||
50 | void test_speed_aux(int n, fftw_direction dir, int flags, int specific) |
||
51 | { |
||
52 | fftw_complex *in, *out; |
||
53 | fftw_plan plan; |
||
54 | double t; |
||
55 | fftw_time begin, end; |
||
56 | |||
57 | in = (fftw_complex *) fftw_malloc(n * howmany_fields |
||
58 | * sizeof(fftw_complex)); |
||
59 | out = (fftw_complex *) fftw_malloc(n * howmany_fields |
||
60 | * sizeof(fftw_complex)); |
||
61 | |||
62 | if (specific) { |
||
63 | begin = fftw_get_time(); |
||
64 | plan = fftw_create_plan_specific(n, dir, |
||
65 | speed_flag | flags | wisdom_flag, |
||
66 | in, howmany_fields, |
||
67 | out, howmany_fields); |
||
68 | end = fftw_get_time(); |
||
69 | } else { |
||
70 | begin = fftw_get_time(); |
||
71 | plan = fftw_create_plan(n, dir, speed_flag | flags | wisdom_flag); |
||
72 | end = fftw_get_time(); |
||
73 | } |
||
74 | CHECK(plan != NULL, "can't create plan"); |
||
75 | |||
76 | t = fftw_time_to_sec(fftw_time_diff(end, begin)); |
||
77 | WHEN_VERBOSE(2, printf("time for planner: %f s\n", t)); |
||
78 | |||
79 | WHEN_VERBOSE(2, fftw_print_plan(plan)); |
||
80 | |||
81 | if (paranoid && !(flags & FFTW_IN_PLACE)) { |
||
82 | begin = fftw_get_time(); |
||
83 | test_ergun(n, dir, plan); |
||
84 | end = fftw_get_time(); |
||
85 | t = fftw_time_to_sec(fftw_time_diff(end, begin)); |
||
86 | WHEN_VERBOSE(2, printf("time for validation: %f s\n", t)); |
||
87 | } |
||
88 | FFTW_TIME_FFT(fftw(plan, howmany_fields, |
||
89 | in, howmany_fields, 1, out, howmany_fields, 1), |
||
90 | in, n * howmany_fields, t); |
||
91 | |||
92 | fftw_destroy_plan(plan); |
||
93 | |||
94 | WHEN_VERBOSE(1, printf("time for one fft: %s", smart_sprint_time(t))); |
||
95 | WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n))); |
||
96 | WHEN_VERBOSE(1, printf("\"mflops\" = 5 (n log2 n) / (t in microseconds)" |
||
97 | " = %f\n", howmany_fields * mflops(t, n))); |
||
98 | |||
99 | fftw_free(in); |
||
100 | fftw_free(out); |
||
101 | |||
102 | WHEN_VERBOSE(1, printf("\n")); |
||
103 | } |
||
104 | |||
105 | void test_speed_nd_aux(struct size sz, |
||
106 | fftw_direction dir, int flags, int specific) |
||
107 | { |
||
108 | fftw_complex *in; |
||
109 | fftwnd_plan plan; |
||
110 | double t; |
||
111 | fftw_time begin, end; |
||
112 | int i, N; |
||
113 | |||
114 | /* only bench in-place multi-dim transforms */ |
||
115 | flags |= FFTW_IN_PLACE; |
||
116 | |||
117 | N = 1; |
||
118 | for (i = 0; i < sz.rank; ++i) |
||
119 | N *= (sz.narray[i]); |
||
120 | |||
121 | in = (fftw_complex *) fftw_malloc(N * howmany_fields * |
||
122 | sizeof(fftw_complex)); |
||
123 | |||
124 | if (specific) { |
||
125 | begin = fftw_get_time(); |
||
126 | plan = fftwnd_create_plan_specific(sz.rank, sz.narray, dir, |
||
127 | speed_flag | flags | wisdom_flag, |
||
128 | in, howmany_fields, 0, 1); |
||
129 | } else { |
||
130 | begin = fftw_get_time(); |
||
131 | plan = fftwnd_create_plan(sz.rank, sz.narray, |
||
132 | dir, speed_flag | flags | wisdom_flag); |
||
133 | } |
||
134 | end = fftw_get_time(); |
||
135 | CHECK(plan != NULL, "can't create plan"); |
||
136 | |||
137 | t = fftw_time_to_sec(fftw_time_diff(end, begin)); |
||
138 | WHEN_VERBOSE(2, printf("time for planner: %f s\n", t)); |
||
139 | |||
140 | WHEN_VERBOSE(2, printf("\n")); |
||
141 | WHEN_VERBOSE(2, (fftwnd_print_plan(plan))); |
||
142 | WHEN_VERBOSE(2, printf("\n")); |
||
143 | |||
144 | FFTW_TIME_FFT(fftwnd(plan, howmany_fields, |
||
145 | in, howmany_fields, 1, 0, 0, 0), |
||
146 | in, N * howmany_fields, t); |
||
147 | |||
148 | fftwnd_destroy_plan(plan); |
||
149 | |||
150 | WHEN_VERBOSE(1, printf("time for one fft: %s", smart_sprint_time(t))); |
||
151 | WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N))); |
||
152 | WHEN_VERBOSE(1, printf("\"mflops\" = 5 (N log2 N) / (t in microseconds)" |
||
153 | " = %f\n", howmany_fields * mflops(t, N))); |
||
154 | |||
155 | fftw_free(in); |
||
156 | |||
157 | WHEN_VERBOSE(1, printf("\n")); |
||
158 | } |
||
159 | |||
160 | /************************************************* |
||
161 | * correctness tests |
||
162 | *************************************************/ |
||
163 | void fill_random(fftw_complex *a, int n) |
||
164 | { |
||
165 | int i; |
||
166 | |||
167 | /* generate random inputs */ |
||
168 | for (i = 0; i < n; ++i) { |
||
169 | c_re(a[i]) = DRAND(); |
||
170 | c_im(a[i]) = DRAND(); |
||
171 | } |
||
172 | } |
||
173 | |||
174 | void array_copy(fftw_complex *out, fftw_complex *in, int n) |
||
175 | { |
||
176 | int i; |
||
177 | |||
178 | for (i = 0; i < n; ++i) |
||
179 | out[i] = in[i]; |
||
180 | } |
||
181 | |||
182 | /* C = A + B */ |
||
183 | void array_add(fftw_complex *C, fftw_complex *A, fftw_complex *B, int n) |
||
184 | { |
||
185 | int i; |
||
186 | |||
187 | for (i = 0; i < n; ++i) { |
||
188 | c_re(C[i]) = c_re(A[i]) + c_re(B[i]); |
||
189 | c_im(C[i]) = c_im(A[i]) + c_im(B[i]); |
||
190 | } |
||
191 | } |
||
192 | |||
193 | /* C = A - B */ |
||
194 | void array_sub(fftw_complex *C, fftw_complex *A, fftw_complex *B, int n) |
||
195 | { |
||
196 | int i; |
||
197 | |||
198 | for (i = 0; i < n; ++i) { |
||
199 | c_re(C[i]) = c_re(A[i]) - c_re(B[i]); |
||
200 | c_im(C[i]) = c_im(A[i]) - c_im(B[i]); |
||
201 | } |
||
202 | } |
||
203 | |||
204 | /* B = rotate left A */ |
||
205 | void array_rol(fftw_complex *B, fftw_complex *A, |
||
206 | int n, int n_before, int n_after) |
||
207 | { |
||
208 | int i, ib, ia; |
||
209 | |||
210 | for (ib = 0; ib < n_before; ++ib) { |
||
211 | for (i = 0; i < n - 1; ++i) |
||
212 | for (ia = 0; ia < n_after; ++ia) |
||
213 | B[(ib * n + i) * n_after + ia] = |
||
214 | A[(ib * n + i + 1) * n_after + ia]; |
||
215 | |||
216 | for (ia = 0; ia < n_after; ++ia) |
||
217 | B[(ib * n + n - 1) * n_after + ia] = A[ib * n * n_after + ia]; |
||
218 | } |
||
219 | } |
||
220 | |||
221 | /* A = alpha * A (in place) */ |
||
222 | void array_scale(fftw_complex *A, fftw_complex alpha, int n) |
||
223 | { |
||
224 | int i; |
||
225 | |||
226 | for (i = 0; i < n; ++i) { |
||
227 | fftw_complex a = A[i]; |
||
228 | c_re(A[i]) = c_re(a) * c_re(alpha) - c_im(a) * c_im(alpha); |
||
229 | c_im(A[i]) = c_re(a) * c_im(alpha) + c_im(a) * c_re(alpha); |
||
230 | } |
||
231 | } |
||
232 | |||
233 | void array_compare(fftw_complex *A, fftw_complex *B, int n) |
||
234 | { |
||
235 | double d = compute_error_complex(A, 1, B, 1, n); |
||
236 | if (d > TOLERANCE) { |
||
237 | fflush(stdout); |
||
238 | fprintf(stderr, "Found relative error %e\n", d); |
||
239 | fftw_die("failure in Ergun's verification procedure\n"); |
||
240 | } |
||
241 | } |
||
242 | |||
243 | /* |
||
244 | * guaranteed out-of-place transform. Does the necessary |
||
245 | * copying if the plan is in-place. |
||
246 | */ |
||
247 | static void fftw_out_of_place(fftw_plan plan, int n, |
||
248 | fftw_complex *in, fftw_complex *out) |
||
249 | { |
||
250 | if (plan->flags & FFTW_IN_PLACE) { |
||
251 | array_copy(out, in, n); |
||
252 | fftw(plan, 1, out, 1, n, (fftw_complex *)0, 1, n); |
||
253 | } else { |
||
254 | fftw(plan, 1, in, 1, n, out, 1, n); |
||
255 | } |
||
256 | } |
||
257 | |||
258 | /* |
||
259 | * Implementation of the FFT tester described in |
||
260 | * |
||
261 | * Funda Ergün. Testing multivariate linear functions: Overcoming the |
||
262 | * generator bottleneck. In Proceedings of the Twenty-Seventh Annual |
||
263 | * ACM Symposium on the Theory of Computing, pages 407-416, Las Vegas, |
||
264 | * Nevada, 29 May--1 June 1995. |
||
265 | */ |
||
266 | void test_ergun(int n, fftw_direction dir, fftw_plan plan) |
||
267 | { |
||
268 | fftw_complex *inA, *inB, *inC, *outA, *outB, *outC; |
||
269 | fftw_complex *tmp; |
||
270 | fftw_complex impulse; |
||
271 | int i; |
||
272 | int rounds = 20; |
||
273 | FFTW_TRIG_REAL twopin = FFTW_K2PI / (FFTW_TRIG_REAL) n; |
||
274 | |||
275 | inA = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); |
||
276 | inB = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); |
||
277 | inC = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); |
||
278 | outA = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); |
||
279 | outB = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); |
||
280 | outC = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); |
||
281 | tmp = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); |
||
282 | |||
283 | WHEN_VERBOSE(2, |
||
284 | printf("Validating plan, n = %d, dir = %s\n", n, |
||
285 | dir == FFTW_FORWARD ? "FORWARD" : "BACKWARD")); |
||
286 | |||
287 | /* test 1: check linearity */ |
||
288 | for (i = 0; i < rounds; ++i) { |
||
289 | fftw_complex alpha, beta; |
||
290 | c_re(alpha) = DRAND(); |
||
291 | c_im(alpha) = DRAND(); |
||
292 | c_re(beta) = DRAND(); |
||
293 | c_im(beta) = DRAND(); |
||
294 | fill_random(inA, n); |
||
295 | fill_random(inB, n); |
||
296 | fftw_out_of_place(plan, n, inA, outA); |
||
297 | fftw_out_of_place(plan, n, inB, outB); |
||
298 | array_scale(outA, alpha, n); |
||
299 | array_scale(outB, beta, n); |
||
300 | array_add(tmp, outA, outB, n); |
||
301 | array_scale(inA, alpha, n); |
||
302 | array_scale(inB, beta, n); |
||
303 | array_add(inC, inA, inB, n); |
||
304 | fftw_out_of_place(plan, n, inC, outC); |
||
305 | array_compare(outC, tmp, n); |
||
306 | } |
||
307 | |||
308 | /* test 2: check that the unit impulse is transformed properly */ |
||
309 | |||
310 | c_re(impulse) = 1.0; |
||
311 | c_im(impulse) = 0.0; |
||
312 | |||
313 | for (i = 0; i < n; ++i) { |
||
314 | /* impulse */ |
||
315 | c_re(inA[i]) = 0.0; |
||
316 | c_im(inA[i]) = 0.0; |
||
317 | |||
318 | /* transform of the impulse */ |
||
319 | outA[i] = impulse; |
||
320 | } |
||
321 | inA[0] = impulse; |
||
322 | |||
323 | for (i = 0; i < rounds; ++i) { |
||
324 | fill_random(inB, n); |
||
325 | array_sub(inC, inA, inB, n); |
||
326 | fftw_out_of_place(plan, n, inB, outB); |
||
327 | fftw_out_of_place(plan, n, inC, outC); |
||
328 | array_add(tmp, outB, outC, n); |
||
329 | array_compare(tmp, outA, n); |
||
330 | } |
||
331 | |||
332 | /* test 3: check the time-shift property */ |
||
333 | /* the paper performs more tests, but this code should be fine too */ |
||
334 | for (i = 0; i < rounds; ++i) { |
||
335 | int j; |
||
336 | |||
337 | fill_random(inA, n); |
||
338 | array_rol(inB, inA, n, 1, 1); |
||
339 | fftw_out_of_place(plan, n, inA, outA); |
||
340 | fftw_out_of_place(plan, n, inB, outB); |
||
341 | for (j = 0; j < n; ++j) { |
||
342 | FFTW_TRIG_REAL s = dir * FFTW_TRIG_SIN(j * twopin); |
||
343 | FFTW_TRIG_REAL c = FFTW_TRIG_COS(j * twopin); |
||
344 | c_re(tmp[j]) = c_re(outB[j]) * c - c_im(outB[j]) * s; |
||
345 | c_im(tmp[j]) = c_re(outB[j]) * s + c_im(outB[j]) * c; |
||
346 | } |
||
347 | |||
348 | array_compare(tmp, outA, n); |
||
349 | } |
||
350 | |||
351 | WHEN_VERBOSE(2, printf("Validation done\n")); |
||
352 | |||
353 | fftw_free(tmp); |
||
354 | fftw_free(outC); |
||
355 | fftw_free(outB); |
||
356 | fftw_free(outA); |
||
357 | fftw_free(inC); |
||
358 | fftw_free(inB); |
||
359 | fftw_free(inA); |
||
360 | } |
||
361 | |||
362 | static void fftw_plan_hook_function(fftw_plan p) |
||
363 | { |
||
364 | WHEN_VERBOSE(3, printf("Validating tentative plan\n")); |
||
365 | WHEN_VERBOSE(3, fftw_print_plan(p)); |
||
366 | test_ergun(p->n, p->dir, p); |
||
367 | } |
||
368 | |||
369 | /* Same as test_ergun, but for multi-dimensional transforms: */ |
||
370 | void testnd_ergun(int rank, int *n, fftw_direction dir, fftwnd_plan plan) |
||
371 | { |
||
372 | fftw_complex *inA, *inB, *inC, *outA, *outB, *outC; |
||
373 | fftw_complex *tmp; |
||
374 | fftw_complex impulse; |
||
375 | |||
376 | int N, n_before, n_after, dim; |
||
377 | int i, which_impulse; |
||
378 | int rounds = 20; |
||
379 | FFTW_TRIG_REAL twopin; |
||
380 | |||
381 | N = 1; |
||
382 | for (dim = 0; dim < rank; ++dim) |
||
383 | N *= n[dim]; |
||
384 | |||
385 | inA = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); |
||
386 | inB = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); |
||
387 | inC = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); |
||
388 | outA = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); |
||
389 | outB = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); |
||
390 | outC = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); |
||
391 | tmp = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); |
||
392 | |||
393 | WHEN_VERBOSE(2, |
||
394 | printf("Validating plan, N = %d, dir = %s\n", N, |
||
395 | dir == FFTW_FORWARD ? "FORWARD" : "BACKWARD")); |
||
396 | |||
397 | /* test 1: check linearity */ |
||
398 | for (i = 0; i < rounds; ++i) { |
||
399 | fftw_complex alpha, beta; |
||
400 | c_re(alpha) = DRAND(); |
||
401 | c_im(alpha) = DRAND(); |
||
402 | c_re(beta) = DRAND(); |
||
403 | c_im(beta) = DRAND(); |
||
404 | fill_random(inA, N); |
||
405 | fill_random(inB, N); |
||
406 | fftwnd(plan, 1, inA, 1, N, outA, 1, N); |
||
407 | fftwnd(plan, 1, inB, 1, N, outB, 1, N); |
||
408 | array_scale(outA, alpha, N); |
||
409 | array_scale(outB, beta, N); |
||
410 | array_add(tmp, outA, outB, N); |
||
411 | array_scale(inA, alpha, N); |
||
412 | array_scale(inB, beta, N); |
||
413 | array_add(inC, inA, inB, N); |
||
414 | fftwnd(plan, 1, inC, 1, N, outC, 1, N); |
||
415 | array_compare(outC, tmp, N); |
||
416 | } |
||
417 | |||
418 | /* |
||
419 | * test 2: check that the unit impulse is transformed properly -- we |
||
420 | * need to test both the real and imaginary impulses |
||
421 | */ |
||
422 | |||
423 | for (which_impulse = 0; which_impulse < 2; ++which_impulse) { |
||
424 | if (which_impulse == 0) { /* real impulse */ |
||
425 | c_re(impulse) = 1.0; |
||
426 | c_im(impulse) = 0.0; |
||
427 | } else { /* imaginary impulse */ |
||
428 | c_re(impulse) = 0.0; |
||
429 | c_im(impulse) = 1.0; |
||
430 | } |
||
431 | |||
432 | for (i = 0; i < N; ++i) { |
||
433 | /* impulse */ |
||
434 | c_re(inA[i]) = 0.0; |
||
435 | c_im(inA[i]) = 0.0; |
||
436 | |||
437 | /* transform of the impulse */ |
||
438 | outA[i] = impulse; |
||
439 | } |
||
440 | inA[0] = impulse; |
||
441 | |||
442 | for (i = 0; i < rounds; ++i) { |
||
443 | fill_random(inB, N); |
||
444 | array_sub(inC, inA, inB, N); |
||
445 | fftwnd(plan, 1, inB, 1, N, outB, 1, N); |
||
446 | fftwnd(plan, 1, inC, 1, N, outC, 1, N); |
||
447 | array_add(tmp, outB, outC, N); |
||
448 | array_compare(tmp, outA, N); |
||
449 | } |
||
450 | } |
||
451 | |||
452 | /* test 3: check the time-shift property */ |
||
453 | /* the paper performs more tests, but this code should be fine too */ |
||
454 | /* -- we have to check shifts in each dimension */ |
||
455 | |||
456 | n_before = 1; |
||
457 | n_after = N; |
||
458 | for (dim = 0; dim < rank; ++dim) { |
||
459 | int n_cur = n[dim]; |
||
460 | |||
461 | n_after /= n_cur; |
||
462 | twopin = FFTW_K2PI / (FFTW_TRIG_REAL) n_cur; |
||
463 | |||
464 | for (i = 0; i < rounds; ++i) { |
||
465 | int j, jb, ja; |
||
466 | |||
467 | fill_random(inA, N); |
||
468 | array_rol(inB, inA, n_cur, n_before, n_after); |
||
469 | fftwnd(plan, 1, inA, 1, N, outA, 1, N); |
||
470 | fftwnd(plan, 1, inB, 1, N, outB, 1, N); |
||
471 | |||
472 | for (jb = 0; jb < n_before; ++jb) |
||
473 | for (j = 0; j < n_cur; ++j) { |
||
474 | FFTW_TRIG_REAL s = dir * FFTW_TRIG_SIN(j * twopin); |
||
475 | FFTW_TRIG_REAL c = FFTW_TRIG_COS(j * twopin); |
||
476 | |||
477 | for (ja = 0; ja < n_after; ++ja) { |
||
478 | c_re(tmp[(jb * n_cur + j) * n_after + ja]) = |
||
479 | c_re(outB[(jb * n_cur + j) * n_after + ja]) * c |
||
480 | - c_im(outB[(jb * n_cur + j) * n_after + ja]) * s; |
||
481 | c_im(tmp[(jb * n_cur + j) * n_after + ja]) = |
||
482 | c_re(outB[(jb * n_cur + j) * n_after + ja]) * s |
||
483 | + c_im(outB[(jb * n_cur + j) * n_after + ja]) * c; |
||
484 | } |
||
485 | } |
||
486 | |||
487 | array_compare(tmp, outA, N); |
||
488 | } |
||
489 | |||
490 | n_before *= n_cur; |
||
491 | } |
||
492 | |||
493 | WHEN_VERBOSE(2, printf("Validation done\n")); |
||
494 | |||
495 | fftw_free(tmp); |
||
496 | fftw_free(outC); |
||
497 | fftw_free(outB); |
||
498 | fftw_free(outA); |
||
499 | fftw_free(inC); |
||
500 | fftw_free(inB); |
||
501 | fftw_free(inA); |
||
502 | } |
||
503 | |||
504 | void test_out_of_place(int n, int istride, int ostride, |
||
505 | int howmany, fftw_direction dir, |
||
506 | fftw_plan validated_plan, |
||
507 | int specific) |
||
508 | { |
||
509 | fftw_complex *in1, *in2, *out1, *out2; |
||
510 | fftw_plan plan; |
||
511 | int i, j; |
||
512 | int flags = measure_flag | wisdom_flag | FFTW_OUT_OF_PLACE; |
||
513 | |||
514 | if (coinflip()) |
||
515 | flags |= FFTW_THREADSAFE; |
||
516 | |||
517 | in1 = (fftw_complex *) |
||
518 | fftw_malloc(istride * n * sizeof(fftw_complex) * howmany); |
||
519 | in2 = (fftw_complex *) |
||
520 | fftw_malloc(n * sizeof(fftw_complex) * howmany); |
||
521 | out1 = (fftw_complex *) |
||
522 | fftw_malloc(ostride * n * sizeof(fftw_complex) * howmany); |
||
523 | out2 = (fftw_complex *) |
||
524 | fftw_malloc(n * sizeof(fftw_complex) * howmany); |
||
525 | |||
526 | if (!specific) |
||
527 | plan = fftw_create_plan(n, dir, flags); |
||
528 | else |
||
529 | plan = fftw_create_plan_specific(n, dir, |
||
530 | flags, |
||
531 | in1, istride, out1, ostride); |
||
532 | |||
533 | /* generate random inputs */ |
||
534 | for (i = 0; i < n * howmany; ++i) { |
||
535 | c_re(in1[i * istride]) = c_re(in2[i]) = DRAND(); |
||
536 | c_im(in1[i * istride]) = c_im(in2[i]) = DRAND(); |
||
537 | } |
||
538 | |||
539 | /* |
||
540 | * fill in other positions of the array, to make sure that |
||
541 | * fftw doesn't overwrite them |
||
542 | */ |
||
543 | for (j = 1; j < istride; ++j) |
||
544 | for (i = 0; i < n * howmany; ++i) { |
||
545 | c_re(in1[i * istride + j]) = i * istride + j; |
||
546 | c_im(in1[i * istride + j]) = i * istride - j; |
||
547 | } |
||
548 | |||
549 | for (j = 1; j < ostride; ++j) |
||
550 | for (i = 0; i < n * howmany; ++i) { |
||
551 | c_re(out1[i * ostride + j]) = -i * ostride + j; |
||
552 | c_im(out1[i * ostride + j]) = -i * ostride - j; |
||
553 | } |
||
554 | |||
555 | CHECK(plan != NULL, "can't create plan"); |
||
556 | WHEN_VERBOSE(2, fftw_print_plan(plan)); |
||
557 | |||
558 | /* fft-ize */ |
||
559 | if (howmany != 1 || istride != 1 || ostride != 1 || coinflip()) |
||
560 | fftw(plan, howmany, in1, istride, n * istride, out1, ostride, |
||
561 | n * ostride); |
||
562 | else |
||
563 | fftw_one(plan, in1, out1); |
||
564 | |||
565 | fftw_destroy_plan(plan); |
||
566 | |||
567 | /* check for overwriting */ |
||
568 | for (j = 1; j < istride; ++j) |
||
569 | for (i = 0; i < n * howmany; ++i) |
||
570 | CHECK(c_re(in1[i * istride + j]) == i * istride + j && |
||
571 | c_im(in1[i * istride + j]) == i * istride - j, |
||
572 | "input has been overwritten"); |
||
573 | for (j = 1; j < ostride; ++j) |
||
574 | for (i = 0; i < n * howmany; ++i) |
||
575 | CHECK(c_re(out1[i * ostride + j]) == -i * ostride + j && |
||
576 | c_im(out1[i * ostride + j]) == -i * ostride - j, |
||
577 | "output has been overwritten"); |
||
578 | |||
579 | for (i = 0; i < howmany; ++i) { |
||
580 | fftw(validated_plan, 1, in2 + n * i, 1, n, out2 + n * i, 1, n); |
||
581 | } |
||
582 | |||
583 | CHECK(compute_error_complex(out1, ostride, out2, 1, n * howmany) < TOLERANCE, |
||
584 | "test_out_of_place: wrong answer"); |
||
585 | WHEN_VERBOSE(2, printf("OK\n")); |
||
586 | |||
587 | fftw_free(in1); |
||
588 | fftw_free(in2); |
||
589 | fftw_free(out1); |
||
590 | fftw_free(out2); |
||
591 | } |
||
592 | |||
593 | void test_in_place(int n, int istride, int howmany, fftw_direction dir, |
||
594 | fftw_plan validated_plan, int specific) |
||
595 | { |
||
596 | fftw_complex *in1, *in2, *out2; |
||
597 | fftw_plan plan; |
||
598 | int i, j; |
||
599 | int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE; |
||
600 | |||
601 | if (coinflip()) |
||
602 | flags |= FFTW_THREADSAFE; |
||
603 | |||
604 | in1 = (fftw_complex *) fftw_malloc(istride * n * sizeof(fftw_complex) * howmany); |
||
605 | in2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany); |
||
606 | out2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany); |
||
607 | |||
608 | if (!specific) |
||
609 | plan = fftw_create_plan(n, dir, flags); |
||
610 | else |
||
611 | plan = fftw_create_plan_specific(n, dir, flags, |
||
612 | in1, istride, |
||
613 | (fftw_complex *) NULL, 0); |
||
614 | |||
615 | /* generate random inputs */ |
||
616 | for (i = 0; i < n * howmany; ++i) { |
||
617 | c_re(in1[i * istride]) = c_re(in2[i]) = DRAND(); |
||
618 | c_im(in1[i * istride]) = c_im(in2[i]) = DRAND(); |
||
619 | } |
||
620 | |||
621 | /* |
||
622 | * fill in other positions of the array, to make sure that |
||
623 | * fftw doesn't overwrite them |
||
624 | */ |
||
625 | for (j = 1; j < istride; ++j) |
||
626 | for (i = 0; i < n * howmany; ++i) { |
||
627 | c_re(in1[i * istride + j]) = i * istride + j; |
||
628 | c_im(in1[i * istride + j]) = i * istride - j; |
||
629 | } |
||
630 | CHECK(plan != NULL, "can't create plan"); |
||
631 | WHEN_VERBOSE(2, fftw_print_plan(plan)); |
||
632 | |||
633 | /* fft-ize */ |
||
634 | if (howmany != 1 || istride != 1 || coinflip()) |
||
635 | fftw(plan, howmany, in1, istride, n * istride, |
||
636 | (fftw_complex *) NULL, 0, 0); |
||
637 | else |
||
638 | fftw_one(plan, in1, NULL); |
||
639 | |||
640 | fftw_destroy_plan(plan); |
||
641 | |||
642 | /* check for overwriting */ |
||
643 | for (j = 1; j < istride; ++j) |
||
644 | for (i = 0; i < n * howmany; ++i) |
||
645 | CHECK(c_re(in1[i * istride + j]) == i * istride + j && |
||
646 | c_im(in1[i * istride + j]) == i * istride - j, |
||
647 | "input has been overwritten"); |
||
648 | |||
649 | for (i = 0; i < howmany; ++i) { |
||
650 | fftw(validated_plan, 1, in2 + n * i, 1, n, out2 + n * i, 1, n); |
||
651 | } |
||
652 | |||
653 | CHECK(compute_error_complex(in1, istride, out2, 1, n * howmany) < TOLERANCE, |
||
654 | "test_in_place: wrong answer"); |
||
655 | WHEN_VERBOSE(2, printf("OK\n")); |
||
656 | |||
657 | fftw_free(in1); |
||
658 | fftw_free(in2); |
||
659 | fftw_free(out2); |
||
660 | } |
||
661 | |||
662 | void test_out_of_place_both(int n, int istride, int ostride, |
||
663 | int howmany, |
||
664 | fftw_plan validated_plan_forward, |
||
665 | fftw_plan validated_plan_backward) |
||
666 | { |
||
667 | int specific; |
||
668 | |||
669 | for (specific = 0; specific <= 1; ++specific) { |
||
670 | WHEN_VERBOSE(2, |
||
671 | printf("TEST CORRECTNESS (out of place, FFTW_FORWARD, %s)" |
||
672 | " n = %d istride = %d ostride = %d howmany = %d\n", |
||
673 | SPECIFICP(specific), |
||
674 | n, istride, ostride, howmany)); |
||
675 | test_out_of_place(n, istride, ostride, howmany, FFTW_FORWARD, |
||
676 | validated_plan_forward, specific); |
||
677 | |||
678 | WHEN_VERBOSE(2, |
||
679 | printf("TEST CORRECTNESS (out of place, FFTW_BACKWARD, %s)" |
||
680 | " n = %d istride = %d ostride = %d howmany = %d\n", |
||
681 | SPECIFICP(specific), |
||
682 | n, istride, ostride, howmany)); |
||
683 | test_out_of_place(n, istride, ostride, howmany, FFTW_BACKWARD, |
||
684 | validated_plan_backward, specific); |
||
685 | } |
||
686 | } |
||
687 | |||
688 | void test_in_place_both(int n, int istride, int howmany, |
||
689 | fftw_plan validated_plan_forward, |
||
690 | fftw_plan validated_plan_backward) |
||
691 | { |
||
692 | int specific; |
||
693 | |||
694 | for (specific = 0; specific <= 1; ++specific) { |
||
695 | WHEN_VERBOSE(2, |
||
696 | printf("TEST CORRECTNESS (in place, FFTW_FORWARD, %s) " |
||
697 | "n = %d istride = %d howmany = %d\n", |
||
698 | SPECIFICP(specific), |
||
699 | n, istride, howmany)); |
||
700 | test_in_place(n, istride, howmany, FFTW_FORWARD, |
||
701 | validated_plan_forward, specific); |
||
702 | |||
703 | WHEN_VERBOSE(2, |
||
704 | printf("TEST CORRECTNESS (in place, FFTW_BACKWARD, %s) " |
||
705 | "n = %d istride = %d howmany = %d\n", |
||
706 | SPECIFICP(specific), |
||
707 | n, istride, howmany)); |
||
708 | test_in_place(n, istride, howmany, FFTW_BACKWARD, |
||
709 | validated_plan_backward, specific); |
||
710 | } |
||
711 | } |
||
712 | |||
713 | void test_correctness(int n) |
||
714 | { |
||
715 | int istride, ostride, howmany; |
||
716 | fftw_plan validated_plan_forward, validated_plan_backward; |
||
717 | |||
718 | WHEN_VERBOSE(1, |
||
719 | printf("Testing correctness for n = %d...", n); |
||
720 | fflush(stdout)); |
||
721 | |||
722 | /* produce a *good* plan (validated by Ergun's test procedure) */ |
||
723 | validated_plan_forward = |
||
724 | fftw_create_plan(n, FFTW_FORWARD, measure_flag | wisdom_flag); |
||
725 | test_ergun(n, FFTW_FORWARD, validated_plan_forward); |
||
726 | validated_plan_backward = |
||
727 | fftw_create_plan(n, FFTW_BACKWARD, measure_flag | wisdom_flag); |
||
728 | test_ergun(n, FFTW_BACKWARD, validated_plan_backward); |
||
729 | |||
730 | for (istride = 1; istride <= MAX_STRIDE; ++istride) |
||
731 | for (ostride = 1; ostride <= MAX_STRIDE; ++ostride) |
||
732 | for (howmany = 1; howmany <= MAX_HOWMANY; ++howmany) |
||
733 | test_out_of_place_both(n, istride, ostride, howmany, |
||
734 | validated_plan_forward, |
||
735 | validated_plan_backward); |
||
736 | |||
737 | for (istride = 1; istride <= MAX_STRIDE; ++istride) |
||
738 | for (howmany = 1; howmany <= MAX_HOWMANY; ++howmany) |
||
739 | test_in_place_both(n, istride, howmany, |
||
740 | validated_plan_forward, |
||
741 | validated_plan_backward); |
||
742 | |||
743 | fftw_destroy_plan(validated_plan_forward); |
||
744 | fftw_destroy_plan(validated_plan_backward); |
||
745 | |||
746 | if (!(wisdom_flag & FFTW_USE_WISDOM) && chk_mem_leak) |
||
747 | fftw_check_memory_leaks(); |
||
748 | |||
749 | WHEN_VERBOSE(1, printf("OK\n")); |
||
750 | } |
||
751 | |||
752 | /************************************************* |
||
753 | * multi-dimensional correctness tests |
||
754 | *************************************************/ |
||
755 | |||
756 | void testnd_out_of_place(int rank, int *n, fftw_direction dir, |
||
757 | fftwnd_plan validated_plan) |
||
758 | { |
||
759 | int istride, ostride; |
||
760 | int N, dim, i; |
||
761 | fftw_complex *in1, *in2, *out1, *out2; |
||
762 | fftwnd_plan p; |
||
763 | int flags = measure_flag | wisdom_flag; |
||
764 | |||
765 | if (coinflip()) |
||
766 | flags |= FFTW_THREADSAFE; |
||
767 | |||
768 | N = 1; |
||
769 | for (dim = 0; dim < rank; ++dim) |
||
770 | N *= n[dim]; |
||
771 | |||
772 | in1 = (fftw_complex *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_complex)); |
||
773 | out1 = (fftw_complex *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_complex)); |
||
774 | in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); |
||
775 | out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); |
||
776 | |||
777 | p = fftwnd_create_plan(rank, n, dir, flags); |
||
778 | |||
779 | for (istride = 1; istride <= MAX_STRIDE; ++istride) { |
||
780 | /* generate random inputs */ |
||
781 | for (i = 0; i < N; ++i) { |
||
782 | int j; |
||
783 | c_re(in2[i]) = DRAND(); |
||
784 | c_im(in2[i]) = DRAND(); |
||
785 | for (j = 0; j < istride; ++j) { |
||
786 | c_re(in1[i * istride + j]) = c_re(in2[i]); |
||
787 | c_im(in1[i * istride + j]) = c_im(in2[i]); |
||
788 | } |
||
789 | } |
||
790 | |||
791 | for (ostride = 1; ostride <= MAX_STRIDE; ++ostride) { |
||
792 | int howmany = (istride < ostride) ? istride : ostride; |
||
793 | |||
794 | if (howmany != 1 || istride != 1 || ostride != 1 || coinflip()) |
||
795 | fftwnd(p, howmany, in1, istride, 1, out1, ostride, 1); |
||
796 | else |
||
797 | fftwnd_one(p, in1, out1); |
||
798 | |||
799 | fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1); |
||
800 | |||
801 | for (i = 0; i < howmany; ++i) |
||
802 | CHECK(compute_error_complex(out1 + i, ostride, out2, 1, N) |
||
803 | < TOLERANCE, |
||
804 | "testnd_out_of_place: wrong answer"); |
||
805 | } |
||
806 | } |
||
807 | |||
808 | fftwnd_destroy_plan(p); |
||
809 | |||
810 | fftw_free(out2); |
||
811 | fftw_free(in2); |
||
812 | fftw_free(out1); |
||
813 | fftw_free(in1); |
||
814 | } |
||
815 | |||
816 | void testnd_in_place(int rank, int *n, fftw_direction dir, |
||
817 | fftwnd_plan validated_plan, |
||
818 | int alternate_api, int specific, int force_buffered) |
||
819 | { |
||
820 | int istride; |
||
821 | int N, dim, i; |
||
822 | fftw_complex *in1, *in2, *out2; |
||
823 | fftwnd_plan p; |
||
824 | int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE; |
||
825 | |||
826 | if (coinflip()) |
||
827 | flags |= FFTW_THREADSAFE; |
||
828 | |||
829 | if (force_buffered) |
||
830 | flags |= FFTWND_FORCE_BUFFERED; |
||
831 | |||
832 | N = 1; |
||
833 | for (dim = 0; dim < rank; ++dim) |
||
834 | N *= n[dim]; |
||
835 | |||
836 | in1 = (fftw_complex *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_complex)); |
||
837 | in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); |
||
838 | out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); |
||
839 | |||
840 | if (!specific) { |
||
841 | if (alternate_api && (rank == 2 || rank == 3)) { |
||
842 | if (rank == 2) |
||
843 | p = fftw2d_create_plan(n[0], n[1], dir, flags); |
||
844 | else |
||
845 | p = fftw3d_create_plan(n[0], n[1], n[2], dir, flags); |
||
846 | } else /* standard api */ |
||
847 | p = fftwnd_create_plan(rank, n, dir, flags); |
||
848 | } else { /* specific plan creation */ |
||
849 | if (alternate_api && (rank == 2 || rank == 3)) { |
||
850 | if (rank == 2) |
||
851 | p = fftw2d_create_plan_specific(n[0], n[1], dir, flags, |
||
852 | in1, 1, |
||
853 | (fftw_complex *) NULL, 1); |
||
854 | else |
||
855 | p = fftw3d_create_plan_specific(n[0], n[1], n[2], dir, flags, |
||
856 | in1, 1, |
||
857 | (fftw_complex *) NULL, 1); |
||
858 | } else /* standard api */ |
||
859 | p = fftwnd_create_plan_specific(rank, n, dir, flags, |
||
860 | in1, 1, |
||
861 | (fftw_complex *) NULL, 1); |
||
862 | |||
863 | } |
||
864 | |||
865 | for (istride = 1; istride <= MAX_STRIDE; ++istride) { |
||
866 | /* |
||
867 | * generate random inputs */ |
||
868 | for (i = 0; i < N; ++i) { |
||
869 | int j; |
||
870 | c_re(in2[i]) = DRAND(); |
||
871 | c_im(in2[i]) = DRAND(); |
||
872 | for (j = 0; j < istride; ++j) { |
||
873 | c_re(in1[i * istride + j]) = c_re(in2[i]); |
||
874 | c_im(in1[i * istride + j]) = c_im(in2[i]); |
||
875 | } |
||
876 | } |
||
877 | |||
878 | if (istride != 1 || istride != 1 || coinflip()) |
||
879 | fftwnd(p, istride, in1, istride, 1, (fftw_complex *) NULL, 1, 1); |
||
880 | else |
||
881 | fftwnd_one(p, in1, NULL); |
||
882 | |||
883 | fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1); |
||
884 | |||
885 | for (i = 0; i < istride; ++i) |
||
886 | CHECK(compute_error_complex(in1 + i, istride, out2, 1, N) < TOLERANCE, |
||
887 | "testnd_in_place: wrong answer"); |
||
888 | } |
||
889 | |||
890 | fftwnd_destroy_plan(p); |
||
891 | |||
892 | fftw_free(out2); |
||
893 | fftw_free(in2); |
||
894 | fftw_free(in1); |
||
895 | } |
||
896 | |||
897 | void testnd_correctness(struct size sz, fftw_direction dir, |
||
898 | int alt_api, int specific, int force_buf) |
||
899 | { |
||
900 | fftwnd_plan validated_plan; |
||
901 | |||
902 | validated_plan = fftwnd_create_plan(sz.rank, sz.narray, |
||
903 | dir, measure_flag | wisdom_flag); |
||
904 | testnd_ergun(sz.rank, sz.narray, dir, validated_plan); |
||
905 | |||
906 | testnd_out_of_place(sz.rank, sz.narray, dir, validated_plan); |
||
907 | testnd_in_place(sz.rank, sz.narray, dir, validated_plan, alt_api, specific, force_buf); |
||
908 | |||
909 | fftwnd_destroy_plan(validated_plan); |
||
910 | } |
||
911 | |||
912 | /************************************************* |
||
913 | * planner tests |
||
914 | *************************************************/ |
||
915 | |||
916 | void test_planner(int rank) |
||
917 | { |
||
918 | /* |
||
919 | * create and destroy many plans, at random. Check the |
||
920 | * garbage-collecting allocator of twiddle factors |
||
921 | */ |
||
922 | int i, dim; |
||
923 | int r, s; |
||
924 | fftw_plan p[PLANNER_TEST_SIZE]; |
||
925 | fftwnd_plan pnd[PLANNER_TEST_SIZE]; |
||
926 | int *narr, maxdim; |
||
927 | |||
928 | chk_mem_leak = 0; |
||
929 | verbose--; |
||
930 | |||
931 | please_wait(); |
||
932 | if (rank < 1) |
||
933 | rank = 1; |
||
934 | |||
935 | narr = (int *) fftw_malloc(rank * sizeof(int)); |
||
936 | |||
937 | maxdim = (int) pow(8192.0, 1.0/rank); |
||
938 | |||
939 | for (i = 0; i < PLANNER_TEST_SIZE; ++i) { |
||
940 | p[i] = (fftw_plan) 0; |
||
941 | pnd[i] = (fftwnd_plan) 0; |
||
942 | } |
||
943 | |||
944 | for (i = 0; i < PLANNER_TEST_SIZE * PLANNER_TEST_SIZE; ++i) { |
||
945 | r = rand(); |
||
946 | if (r < 0) |
||
947 | r = -r; |
||
948 | r = r % PLANNER_TEST_SIZE; |
||
949 | |||
950 | for (dim = 0; dim < rank; ++dim) { |
||
951 | do { |
||
952 | s = rand(); |
||
953 | if (s < 0) |
||
954 | s = -s; |
||
955 | s = s % maxdim + 1; |
||
956 | } while (s == 0); |
||
957 | narr[dim] = s; |
||
958 | } |
||
959 | |||
960 | if (rank == 1) { |
||
961 | if (p[r]) |
||
962 | fftw_destroy_plan(p[r]); |
||
963 | |||
964 | p[r] = fftw_create_plan(narr[0], random_dir(), measure_flag | |
||
965 | wisdom_flag); |
||
966 | if (paranoid && narr[0] < 200) |
||
967 | test_correctness(narr[0]); |
||
968 | } |
||
969 | |||
970 | if (pnd[r]) |
||
971 | fftwnd_destroy_plan(pnd[r]); |
||
972 | |||
973 | pnd[r] = fftwnd_create_plan(rank, narr, |
||
974 | random_dir(), measure_flag | |
||
975 | wisdom_flag); |
||
976 | |||
977 | if (i % (PLANNER_TEST_SIZE * PLANNER_TEST_SIZE / 20) == 0) { |
||
978 | WHEN_VERBOSE(0, printf("test planner: so far so good\n")); |
||
979 | WHEN_VERBOSE(0, printf("test planner: iteration %d out of %d\n", |
||
980 | i, PLANNER_TEST_SIZE * PLANNER_TEST_SIZE)); |
||
981 | } |
||
982 | } |
||
983 | |||
984 | for (i = 0; i < PLANNER_TEST_SIZE; ++i) { |
||
985 | if (p[i]) |
||
986 | fftw_destroy_plan(p[i]); |
||
987 | if (pnd[i]) |
||
988 | fftwnd_destroy_plan(pnd[i]); |
||
989 | } |
||
990 | |||
991 | fftw_free(narr); |
||
992 | verbose++; |
||
993 | chk_mem_leak = 1; |
||
994 | } |
||
995 | |||
996 | /************************************************* |
||
997 | * test initialization |
||
998 | *************************************************/ |
||
999 | |||
1000 | void test_init(int *argc, char **argv) |
||
1001 | { |
||
1002 | } |
||
1003 | |||
1004 | void test_finish(void) |
||
1005 | { |
||
1006 | } |
||
1007 | |||
1008 | void enter_paranoid_mode(void) |
||
1009 | { |
||
1010 | fftw_plan_hook = fftw_plan_hook_function; |
||
1011 | } |
||
1012 | |||
1013 | int get_option(int argc, char **argv, char *argval, int argval_maxlen) |
||
1014 | { |
||
1015 | return default_get_option(argc,argv,argval,argval_maxlen); |
||
1016 | } |