Subversion Repositories shark

Rev

Rev 3 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 pj 1
/*
2
 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
3
 *
4
 * This program is free software; you can redistribute it and/or modify
5
 * it under the terms of the GNU General Public License as published by
6
 * the Free Software Foundation; either version 2 of the License, or
7
 * (at your option) any later version.
8
 *
9
 * This program is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
 * GNU General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU General Public License
15
 * along with this program; if not, write to the Free Software
16
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
 *
18
 */
19
 
20
/*
21
 * Compute transforms of prime sizes using Rader's trick: turn them
22
 * into convolutions of size n - 1, which you then perform via a pair
23
 * of FFTs.
24
 */
25
 
26
#include <stdlib.h>
27
#include <math.h>
28
 
107 pj 29
#include <fftw-int.h>
2 pj 30
 
31
#ifdef FFTW_USING_CILK
32
#include <ports/cilk.h>
33
#include <ports/cilk-compat.h>
34
#endif
35
 
36
#ifdef FFTW_DEBUG
37
#define WHEN_DEBUG(a) a
38
#else
39
#define WHEN_DEBUG(a)
40
#endif
41
 
42
/* compute n^m mod p, where m >= 0 and p > 0. */
43
static int power_mod(int n, int m, int p)
44
{
45
     if (m == 0)
46
          return 1;
47
     else if (m % 2 == 0) {
48
          int x = power_mod(n, m / 2, p);
49
          return ((x * x) % p);
50
     } else
51
          return ((n * power_mod(n, m - 1, p)) % p);
52
}
53
 
54
/*
55
 * Find the period of n in the multiplicative group mod p (p prime).
56
 * That is, return the smallest m such that n^m == 1 mod p.
57
 */
58
static int period(int n, int p)
59
{
60
     int prod = n, period = 1;
61
 
62
     while (prod != 1) {
63
          prod = (prod * n) % p;
64
          ++period;
65
          if (prod == 0)
66
               fftw_die("non-prime order in Rader\n");
67
     }
68
     return period;
69
}
70
 
71
/* find a generator for the multiplicative group mod p, where p is prime */
72
static int find_generator(int p)
73
{
74
     int g;
75
 
76
     for (g = 1; g < p; ++g)
77
          if (period(g, p) == p - 1)
78
               break;
79
     if (g == p)
80
          fftw_die("couldn't find generator for Rader\n");
81
     return g;
82
}
83
 
84
/***************************************************************************/
85
 
86
static fftw_rader_data *create_rader_aux(int p, int flags)
87
{
88
     fftw_complex *omega, *work;
89
     int g, ginv, gpower;
90
     int i;
91
     FFTW_TRIG_REAL twoPiOverN;
92
     fftw_real scale = 1.0 / (p - 1);   /* for convolution */
93
     fftw_plan plan;
94
     fftw_rader_data *d;
95
 
96
     if (p < 2)
97
          fftw_die("non-prime order in Rader\n");
98
 
99
     flags &= ~FFTW_IN_PLACE;
100
 
101
     d = (fftw_rader_data *) fftw_malloc(sizeof(fftw_rader_data));
102
 
103
     g = find_generator(p);
104
     ginv = power_mod(g, p - 2, p);
105
 
106
     omega = (fftw_complex *) fftw_malloc((p - 1) * sizeof(fftw_complex));
107
 
108
     plan = fftw_create_plan(p - 1, FFTW_FORWARD, flags);
109
 
110
     work = (fftw_complex *) fftw_malloc((p - 1) * sizeof(fftw_complex));
111
 
112
     twoPiOverN = FFTW_K2PI / (FFTW_TRIG_REAL) p;
113
     gpower = 1;
114
     for (i = 0; i < p - 1; ++i) {
115
          c_re(work[i]) = scale * FFTW_TRIG_COS(twoPiOverN * gpower);
116
          c_im(work[i]) = FFTW_FORWARD * scale * FFTW_TRIG_SIN(twoPiOverN * gpower);
117
          gpower = (gpower * ginv) % p;
118
     }
119
 
120
     /* fft permuted roots of unity */
121
     fftw_executor_simple(p - 1, work, omega, plan->root, 1, 1);
122
 
123
     fftw_free(work);
124
 
125
     d->plan = plan;
126
     d->omega = omega;
127
     d->g = g;
128
     d->ginv = ginv;
129
     d->p = p;
130
     d->flags = flags;
131
     d->refcount = 1;
132
     d->next = NULL;
133
 
134
     d->cdesc = (fftw_codelet_desc *) fftw_malloc(sizeof(fftw_codelet_desc));
135
     d->cdesc->name = NULL;
136
     d->cdesc->codelet = NULL;
137
     d->cdesc->size = p;
138
     d->cdesc->dir = FFTW_FORWARD;
139
     d->cdesc->type = FFTW_RADER;
140
     d->cdesc->signature = g;
141
     d->cdesc->ntwiddle = 0;
142
     d->cdesc->twiddle_order = NULL;
143
     return d;
144
}
145
 
146
/***************************************************************************/
147
 
148
static fftw_rader_data *fftw_create_rader(int p, int flags)
149
{
150
     fftw_rader_data *d = fftw_rader_top;
151
 
152
     flags &= ~FFTW_IN_PLACE;
153
     while (d && (d->p != p || d->flags != flags))
154
          d = d->next;
155
     if (d) {
156
          d->refcount++;
157
          return d;
158
     }
159
     d = create_rader_aux(p, flags);
160
     d->next = fftw_rader_top;
161
     fftw_rader_top = d;
162
     return d;
163
}
164
 
165
/***************************************************************************/
166
 
167
/* Compute the prime FFTs, premultiplied by twiddle factors.  Below, we
168
 * extensively use the identity that fft(x*)* = ifft(x) in order to
169
 * share data between forward and backward transforms and to obviate
170
 * the necessity of having separate forward and backward plans. */
171
 
172
void fftw_twiddle_rader(fftw_complex *A, const fftw_complex *W,
173
                        int m, int r, int stride,
174
                        fftw_rader_data * d)
175
{
176
     fftw_complex *tmp = (fftw_complex *)
177
     fftw_malloc((r - 1) * sizeof(fftw_complex));
178
     int i, k, gpower = 1, g = d->g, ginv = d->ginv;
179
     fftw_real a0r, a0i;
180
     fftw_complex *omega = d->omega;
181
 
182
     for (i = 0; i < m; ++i, A += stride, W += r - 1) {
183
          /*
184
           * Here, we fft W[k-1] * A[k*(m*stride)], using Rader.
185
           * (Actually, W is pre-permuted to match the permutation that we
186
           * will do on A.)
187
           */
188
 
189
          /* First, permute the input and multiply by W, storing in tmp: */
190
          /* gpower == g^k mod r in the following loop */
191
          for (k = 0; k < r - 1; ++k, gpower = (gpower * g) % r) {
192
               fftw_real rA, iA, rW, iW;
193
               rW = c_re(W[k]);
194
               iW = c_im(W[k]);
195
               rA = c_re(A[gpower * (m * stride)]);
196
               iA = c_im(A[gpower * (m * stride)]);
197
               c_re(tmp[k]) = rW * rA - iW * iA;
198
               c_im(tmp[k]) = rW * iA + iW * rA;
199
          }
200
 
201
          WHEN_DEBUG( {
202
                     if (gpower != 1)
203
                     fftw_die("incorrect generator in Rader\n");
204
                     }
205
          );
206
 
207
          /* FFT tmp to A: */
208
          fftw_executor_simple(r - 1, tmp, A + (m * stride),
209
                               d->plan->root, 1, m * stride);
210
 
211
          /* set output DC component: */
212
          a0r = c_re(A[0]);
213
          a0i = c_im(A[0]);
214
          c_re(A[0]) += c_re(A[(m * stride)]);
215
          c_im(A[0]) += c_im(A[(m * stride)]);
216
 
217
          /* now, multiply by omega: */
218
          for (k = 0; k < r - 1; ++k) {
219
               fftw_real rA, iA, rW, iW;
220
               rW = c_re(omega[k]);
221
               iW = c_im(omega[k]);
222
               rA = c_re(A[(k + 1) * (m * stride)]);
223
               iA = c_im(A[(k + 1) * (m * stride)]);
224
               c_re(A[(k + 1) * (m * stride)]) = rW * rA - iW * iA;
225
               c_im(A[(k + 1) * (m * stride)]) = -(rW * iA + iW * rA);
226
          }
227
 
228
          /* this will add A[0] to all of the outputs after the ifft */
229
          c_re(A[(m * stride)]) += a0r;
230
          c_im(A[(m * stride)]) -= a0i;
231
 
232
          /* inverse FFT: */
233
          fftw_executor_simple(r - 1, A + (m * stride), tmp,
234
                               d->plan->root, m * stride, 1);
235
 
236
          /* finally, do inverse permutation to unshuffle the output: */
237
          for (k = 0; k < r - 1; ++k, gpower = (gpower * ginv) % r) {
238
               c_re(A[gpower * (m * stride)]) = c_re(tmp[k]);
239
               c_im(A[gpower * (m * stride)]) = -c_im(tmp[k]);
240
          }
241
 
242
          WHEN_DEBUG( {
243
                     if (gpower != 1)
244
                     fftw_die("incorrect generator in Rader\n");
245
                     }
246
          );
247
 
248
     }
249
 
250
     fftw_free(tmp);
251
}
252
 
253
void fftwi_twiddle_rader(fftw_complex *A, const fftw_complex *W,
254
                         int m, int r, int stride,
255
                         fftw_rader_data * d)
256
{
257
     fftw_complex *tmp = (fftw_complex *)
258
     fftw_malloc((r - 1) * sizeof(fftw_complex));
259
     int i, k, gpower = 1, g = d->g, ginv = d->ginv;
260
     fftw_real a0r, a0i;
261
     fftw_complex *omega = d->omega;
262
 
263
     for (i = 0; i < m; ++i, A += stride, W += r - 1) {
264
          /*
265
           * Here, we fft W[k-1]* * A[k*(m*stride)], using Rader.
266
           * (Actually, W is pre-permuted to match the permutation that
267
           * we will do on A.)
268
           */
269
 
270
          /* First, permute the input and multiply by W*, storing in tmp: */
271
          /* gpower == g^k mod r in the following loop */
272
          for (k = 0; k < r - 1; ++k, gpower = (gpower * g) % r) {
273
               fftw_real rA, iA, rW, iW;
274
               rW = c_re(W[k]);
275
               iW = c_im(W[k]);
276
               rA = c_re(A[gpower * (m * stride)]);
277
               iA = c_im(A[gpower * (m * stride)]);
278
               c_re(tmp[k]) = rW * rA + iW * iA;
279
               c_im(tmp[k]) = iW * rA - rW * iA;
280
          }
281
 
282
          WHEN_DEBUG( {
283
                     if (gpower != 1)
284
                     fftw_die("incorrect generator in Rader\n");
285
                     }
286
          );
287
 
288
          /* FFT tmp to A: */
289
          fftw_executor_simple(r - 1, tmp, A + (m * stride),
290
                               d->plan->root, 1, m * stride);
291
 
292
          /* set output DC component: */
293
          a0r = c_re(A[0]);
294
          a0i = c_im(A[0]);
295
          c_re(A[0]) += c_re(A[(m * stride)]);
296
          c_im(A[0]) -= c_im(A[(m * stride)]);
297
 
298
          /* now, multiply by omega: */
299
          for (k = 0; k < r - 1; ++k) {
300
               fftw_real rA, iA, rW, iW;
301
               rW = c_re(omega[k]);
302
               iW = c_im(omega[k]);
303
               rA = c_re(A[(k + 1) * (m * stride)]);
304
               iA = c_im(A[(k + 1) * (m * stride)]);
305
               c_re(A[(k + 1) * (m * stride)]) = rW * rA - iW * iA;
306
               c_im(A[(k + 1) * (m * stride)]) = -(rW * iA + iW * rA);
307
          }
308
 
309
          /* this will add A[0] to all of the outputs after the ifft */
310
          c_re(A[(m * stride)]) += a0r;
311
          c_im(A[(m * stride)]) += a0i;
312
 
313
          /* inverse FFT: */
314
          fftw_executor_simple(r - 1, A + (m * stride), tmp,
315
                               d->plan->root, m * stride, 1);
316
 
317
          /* finally, do inverse permutation to unshuffle the output: */
318
          for (k = 0; k < r - 1; ++k, gpower = (gpower * ginv) % r) {
319
               A[gpower * (m * stride)] = tmp[k];
320
          }
321
 
322
          WHEN_DEBUG( {
323
                     if (gpower != 1)
324
                     fftw_die("incorrect generator in Rader\n");
325
                     }
326
          );
327
     }
328
 
329
     fftw_free(tmp);
330
}
331
 
332
/***************************************************************************/
333
 
334
/*
335
 * Make an FFTW_RADER plan node.  Note that this function must go
336
 * here, rather than in putils.c, because it indirectly calls the
337
 * fftw_planner.  If we included it in putils.c, which is also used
338
 * by rfftw, then any program using rfftw would be linked with all
339
 * of the FFTW codelets, even if they were not needed.   I wish that the
340
 * darn linkers operated on a function rather than a file granularity.
341
 */
342
fftw_plan_node *fftw_make_node_rader(int n, int size, fftw_direction dir,
343
                                     fftw_plan_node *recurse,
344
                                     int flags)
345
{
346
     fftw_plan_node *p = fftw_make_node();
347
 
348
     p->type = FFTW_RADER;
349
     p->nodeu.rader.size = size;
350
     p->nodeu.rader.codelet = dir == FFTW_FORWARD ?
351
         fftw_twiddle_rader : fftwi_twiddle_rader;
352
     p->nodeu.rader.rader_data = fftw_create_rader(size, flags);
353
     p->nodeu.rader.recurse = recurse;
354
     fftw_use_node(recurse);
355
 
356
     if (flags & FFTW_MEASURE)
357
          p->nodeu.rader.tw =
358
              fftw_create_twiddle(n, p->nodeu.rader.rader_data->cdesc);
359
     else
360
          p->nodeu.rader.tw = 0;
361
     return p;
362
}