Rev 3 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
2 | pj | 1 | /* |
2 | * Copyright (c) 1997-1999 Massachusetts Institute of Technology |
||
3 | * |
||
4 | * This program is free software; you can redistribute it and/or modify |
||
5 | * it under the terms of the GNU General Public License as published by |
||
6 | * the Free Software Foundation; either version 2 of the License, or |
||
7 | * (at your option) any later version. |
||
8 | * |
||
9 | * This program is distributed in the hope that it will be useful, |
||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
12 | * GNU General Public License for more details. |
||
13 | * |
||
14 | * You should have received a copy of the GNU General Public License |
||
15 | * along with this program; if not, write to the Free Software |
||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
||
17 | * |
||
18 | */ |
||
19 | |||
20 | /* |
||
21 | * rexec.c -- execute the fft |
||
22 | */ |
||
23 | |||
107 | pj | 24 | /* $Id: rexec.c,v 1.2 2003-03-24 11:14:59 pj Exp $ */ |
2 | pj | 25 | //#include <ports/stdio.h> |
26 | #include <stdlib.h> |
||
27 | |||
107 | pj | 28 | #include <fftw-int.h> |
29 | #include <rfftw.h> |
||
2 | pj | 30 | |
31 | void rfftw_strided_copy(int n, fftw_real *in, int ostride, |
||
32 | fftw_real *out) |
||
33 | { |
||
34 | int i; |
||
35 | fftw_real r0, r1, r2, r3; |
||
36 | |||
37 | i = 0; |
||
38 | for (; i < (n & 3); ++i) { |
||
39 | out[i * ostride] = in[i]; |
||
40 | } |
||
41 | for (; i < n; i += 4) { |
||
42 | r0 = in[i]; |
||
43 | r1 = in[i + 1]; |
||
44 | r2 = in[i + 2]; |
||
45 | r3 = in[i + 3]; |
||
46 | out[i * ostride] = r0; |
||
47 | out[(i + 1) * ostride] = r1; |
||
48 | out[(i + 2) * ostride] = r2; |
||
49 | out[(i + 3) * ostride] = r3; |
||
50 | } |
||
51 | } |
||
52 | |||
53 | void rfftw_executor_simple(int n, fftw_real *in, |
||
54 | fftw_real *out, |
||
55 | fftw_plan_node *p, |
||
56 | int istride, |
||
57 | int ostride) |
||
58 | { |
||
59 | switch (p->type) { |
||
60 | case FFTW_REAL2HC: |
||
61 | HACK_ALIGN_STACK_ODD(); |
||
62 | (p->nodeu.real2hc.codelet) (in, out, out + n * ostride, |
||
63 | istride, ostride, -ostride); |
||
64 | break; |
||
65 | |||
66 | case FFTW_HC2REAL: |
||
67 | HACK_ALIGN_STACK_ODD(); |
||
68 | (p->nodeu.hc2real.codelet) (in, in + n * istride, out, |
||
69 | istride, -istride, ostride); |
||
70 | break; |
||
71 | |||
72 | case FFTW_HC2HC: |
||
73 | { |
||
74 | int r = p->nodeu.hc2hc.size; |
||
75 | int m = n / r; |
||
76 | int i; |
||
77 | /* |
||
78 | * please do resist the temptation of initializing |
||
79 | * these variables here. Doing so forces the |
||
80 | * compiler to keep a live variable across the |
||
81 | * recursive call. |
||
82 | */ |
||
83 | fftw_hc2hc_codelet *codelet; |
||
84 | fftw_complex *W; |
||
85 | |||
86 | switch (p->nodeu.hc2hc.dir) { |
||
87 | case FFTW_REAL_TO_COMPLEX: |
||
88 | for (i = 0; i < r; ++i) |
||
89 | rfftw_executor_simple(m, in + i * istride, |
||
90 | out + i * (m*ostride), |
||
91 | p->nodeu.hc2hc.recurse, |
||
92 | istride * r, ostride); |
||
93 | |||
94 | W = p->nodeu.hc2hc.tw->twarray; |
||
95 | codelet = p->nodeu.hc2hc.codelet; |
||
96 | HACK_ALIGN_STACK_EVEN(); |
||
97 | codelet(out, W, m * ostride, m, ostride); |
||
98 | break; |
||
99 | case FFTW_COMPLEX_TO_REAL: |
||
100 | W = p->nodeu.hc2hc.tw->twarray; |
||
101 | codelet = p->nodeu.hc2hc.codelet; |
||
102 | HACK_ALIGN_STACK_EVEN(); |
||
103 | codelet(in, W, m * istride, m, istride); |
||
104 | |||
105 | for (i = 0; i < r; ++i) |
||
106 | rfftw_executor_simple(m, in + i * (m*istride), |
||
107 | out + i * ostride, |
||
108 | p->nodeu.hc2hc.recurse, |
||
109 | istride, ostride * r); |
||
110 | break; |
||
111 | default: |
||
112 | goto bug; |
||
113 | } |
||
114 | |||
115 | break; |
||
116 | } |
||
117 | |||
118 | case FFTW_RGENERIC: |
||
119 | { |
||
120 | int r = p->nodeu.rgeneric.size; |
||
121 | int m = n / r; |
||
122 | int i; |
||
123 | fftw_rgeneric_codelet *codelet = p->nodeu.rgeneric.codelet; |
||
124 | fftw_complex *W = p->nodeu.rgeneric.tw->twarray; |
||
125 | |||
126 | switch (p->nodeu.rgeneric.dir) { |
||
127 | case FFTW_REAL_TO_COMPLEX: |
||
128 | for (i = 0; i < r; ++i) |
||
129 | rfftw_executor_simple(m, in + i * istride, |
||
130 | out + i * (m * ostride), |
||
131 | p->nodeu.rgeneric.recurse, |
||
132 | istride * r, ostride); |
||
133 | |||
134 | codelet(out, W, m, r, n, ostride); |
||
135 | break; |
||
136 | case FFTW_COMPLEX_TO_REAL: |
||
137 | codelet(in, W, m, r, n, istride); |
||
138 | |||
139 | for (i = 0; i < r; ++i) |
||
140 | rfftw_executor_simple(m, in + i * m * istride, |
||
141 | out + i * ostride, |
||
142 | p->nodeu.rgeneric.recurse, |
||
143 | istride, ostride * r); |
||
144 | break; |
||
145 | default: |
||
146 | goto bug; |
||
147 | } |
||
148 | |||
149 | break; |
||
150 | } |
||
151 | |||
152 | default: |
||
153 | bug: |
||
154 | fftw_die("BUG in rexecutor: invalid plan\n"); |
||
155 | break; |
||
156 | } |
||
157 | } |
||
158 | |||
159 | static void rexecutor_simple_inplace(int n, fftw_real *in, |
||
160 | fftw_real *out, |
||
161 | fftw_plan_node *p, |
||
162 | int istride) |
||
163 | { |
||
164 | switch (p->type) { |
||
165 | case FFTW_REAL2HC: |
||
166 | HACK_ALIGN_STACK_ODD(); |
||
167 | (p->nodeu.real2hc.codelet) (in, in, in + n * istride, |
||
168 | istride, istride, -istride); |
||
169 | break; |
||
170 | |||
171 | case FFTW_HC2REAL: |
||
172 | HACK_ALIGN_STACK_ODD(); |
||
173 | (p->nodeu.hc2real.codelet) (in, in + n * istride, in, |
||
174 | istride, -istride, istride); |
||
175 | break; |
||
176 | |||
177 | default: |
||
178 | { |
||
179 | fftw_real *tmp; |
||
180 | |||
181 | if (out) |
||
182 | tmp = out; |
||
183 | else |
||
184 | tmp = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); |
||
185 | |||
186 | rfftw_executor_simple(n, in, tmp, p, istride, 1); |
||
187 | rfftw_strided_copy(n, tmp, istride, in); |
||
188 | |||
189 | if (!out) |
||
190 | fftw_free(tmp); |
||
191 | } |
||
192 | } |
||
193 | } |
||
194 | |||
195 | static void rexecutor_many(int n, fftw_real *in, |
||
196 | fftw_real *out, |
||
197 | fftw_plan_node *p, |
||
198 | int istride, |
||
199 | int ostride, |
||
200 | int howmany, int idist, int odist) |
||
201 | { |
||
202 | switch (p->type) { |
||
203 | case FFTW_REAL2HC: |
||
204 | { |
||
205 | fftw_real2hc_codelet *codelet = p->nodeu.real2hc.codelet; |
||
206 | int s; |
||
207 | |||
208 | HACK_ALIGN_STACK_ODD(); |
||
209 | for (s = 0; s < howmany; ++s) |
||
210 | codelet(in + s * idist, out + s * odist, |
||
211 | out + n * ostride + s * odist, |
||
212 | istride, ostride, -ostride); |
||
213 | break; |
||
214 | } |
||
215 | |||
216 | case FFTW_HC2REAL: |
||
217 | { |
||
218 | fftw_hc2real_codelet *codelet = p->nodeu.hc2real.codelet; |
||
219 | int s; |
||
220 | |||
221 | HACK_ALIGN_STACK_ODD(); |
||
222 | for (s = 0; s < howmany; ++s) |
||
223 | codelet(in + s * idist, in + n * istride + s * idist, |
||
224 | out + s * odist, |
||
225 | istride, -istride, ostride); |
||
226 | break; |
||
227 | } |
||
228 | |||
229 | default: |
||
230 | { |
||
231 | int s; |
||
232 | for (s = 0; s < howmany; ++s) { |
||
233 | rfftw_executor_simple(n, in + s * idist, |
||
234 | out + s * odist, |
||
235 | p, istride, ostride); |
||
236 | } |
||
237 | } |
||
238 | } |
||
239 | } |
||
240 | |||
241 | static void rexecutor_many_inplace(int n, fftw_real *in, |
||
242 | fftw_real *out, |
||
243 | fftw_plan_node *p, |
||
244 | int istride, |
||
245 | int howmany, int idist) |
||
246 | { |
||
247 | switch (p->type) { |
||
248 | case FFTW_REAL2HC: |
||
249 | { |
||
250 | fftw_real2hc_codelet *codelet = p->nodeu.real2hc.codelet; |
||
251 | int s; |
||
252 | |||
253 | HACK_ALIGN_STACK_ODD(); |
||
254 | for (s = 0; s < howmany; ++s) |
||
255 | codelet(in + s * idist, in + s * idist, |
||
256 | in + n * istride + s * idist, |
||
257 | istride, istride, -istride); |
||
258 | |||
259 | break; |
||
260 | } |
||
261 | |||
262 | case FFTW_HC2REAL: |
||
263 | { |
||
264 | fftw_hc2real_codelet *codelet = p->nodeu.hc2real.codelet; |
||
265 | int s; |
||
266 | |||
267 | HACK_ALIGN_STACK_ODD(); |
||
268 | for (s = 0; s < howmany; ++s) |
||
269 | codelet(in + s * idist, in + n * istride + s * idist, |
||
270 | in + s * idist, |
||
271 | istride, -istride, istride); |
||
272 | |||
273 | break; |
||
274 | } |
||
275 | |||
276 | default: |
||
277 | { |
||
278 | int s; |
||
279 | fftw_real *tmp; |
||
280 | if (out) |
||
281 | tmp = out; |
||
282 | else |
||
283 | tmp = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); |
||
284 | |||
285 | for (s = 0; s < howmany; ++s) { |
||
286 | rfftw_executor_simple(n, |
||
287 | in + s * idist, |
||
288 | tmp, |
||
289 | p, istride, 1); |
||
290 | rfftw_strided_copy(n, tmp, istride, in + s * idist); |
||
291 | } |
||
292 | |||
293 | if (!out) |
||
294 | fftw_free(tmp); |
||
295 | } |
||
296 | } |
||
297 | } |
||
298 | |||
299 | /* user interface */ |
||
300 | void rfftw(fftw_plan plan, int howmany, fftw_real *in, int istride, |
||
301 | int idist, fftw_real *out, int ostride, int odist) |
||
302 | { |
||
303 | int n = plan->n; |
||
304 | |||
305 | if (plan->flags & FFTW_IN_PLACE) { |
||
306 | if (howmany == 1) { |
||
307 | rexecutor_simple_inplace(n, in, out, plan->root, istride); |
||
308 | } else { |
||
309 | rexecutor_many_inplace(n, in, out, plan->root, istride, howmany, |
||
310 | idist); |
||
311 | } |
||
312 | } else { |
||
313 | if (howmany == 1) { |
||
314 | rfftw_executor_simple(n, in, out, plan->root, istride, ostride); |
||
315 | } else { |
||
316 | rexecutor_many(n, in, out, plan->root, istride, ostride, |
||
317 | howmany, idist, odist); |
||
318 | } |
||
319 | } |
||
320 | } |
||
321 | |||
322 | void rfftw_one(fftw_plan plan, fftw_real *in, fftw_real *out) |
||
323 | { |
||
324 | int n = plan->n; |
||
325 | |||
326 | if (plan->flags & FFTW_IN_PLACE) |
||
327 | rexecutor_simple_inplace(n, in, out, plan->root, 1); |
||
328 | else |
||
329 | rfftw_executor_simple(n, in, out, plan->root, 1, 1); |
||
330 | } |