Rev 3 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
2 | pj | 1 | /* |
2 | * Copyright (c) 1997-1999 Massachusetts Institute of Technology |
||
3 | * |
||
4 | * This program is free software; you can redistribute it and/or modify |
||
5 | * it under the terms of the GNU General Public License as published by |
||
6 | * the Free Software Foundation; either version 2 of the License, or |
||
7 | * (at your option) any later version. |
||
8 | * |
||
9 | * This program is distributed in the hope that it will be useful, |
||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
12 | * GNU General Public License for more details. |
||
13 | * |
||
14 | * You should have received a copy of the GNU General Public License |
||
15 | * along with this program; if not, write to the Free Software |
||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
||
17 | * |
||
18 | */ |
||
19 | |||
20 | /* |
||
21 | * executor.c -- execute the fft |
||
22 | */ |
||
23 | |||
107 | pj | 24 | /* $Id: executor.c,v 1.2 2003-03-24 11:14:50 pj Exp $ */ |
25 | #include <fftw-int.h> |
||
2 | pj | 26 | //#include <ports/stdio.h> |
27 | #include <stdlib.h> |
||
28 | |||
107 | pj | 29 | const char *fftw_version = "FFTW V" FFTW_VERSION " ($Id: executor.c,v 1.2 2003-03-24 11:14:50 pj Exp $)"; |
2 | pj | 30 | |
31 | /* |
||
32 | * This function is called in other files, so we cannot declare |
||
33 | * it static. |
||
34 | */ |
||
35 | void fftw_strided_copy(int n, fftw_complex *in, int ostride, |
||
36 | fftw_complex *out) |
||
37 | { |
||
38 | int i; |
||
39 | fftw_real r0, r1, i0, i1; |
||
40 | fftw_real r2, r3, i2, i3; |
||
41 | |||
42 | i = 0; |
||
43 | |||
44 | for (; i < (n & 3); ++i) { |
||
45 | out[i * ostride] = in[i]; |
||
46 | } |
||
47 | |||
48 | for (; i < n; i += 4) { |
||
49 | r0 = c_re(in[i]); |
||
50 | i0 = c_im(in[i]); |
||
51 | r1 = c_re(in[i + 1]); |
||
52 | i1 = c_im(in[i + 1]); |
||
53 | r2 = c_re(in[i + 2]); |
||
54 | i2 = c_im(in[i + 2]); |
||
55 | r3 = c_re(in[i + 3]); |
||
56 | i3 = c_im(in[i + 3]); |
||
57 | c_re(out[i * ostride]) = r0; |
||
58 | c_im(out[i * ostride]) = i0; |
||
59 | c_re(out[(i + 1) * ostride]) = r1; |
||
60 | c_im(out[(i + 1) * ostride]) = i1; |
||
61 | c_re(out[(i + 2) * ostride]) = r2; |
||
62 | c_im(out[(i + 2) * ostride]) = i2; |
||
63 | c_re(out[(i + 3) * ostride]) = r3; |
||
64 | c_im(out[(i + 3) * ostride]) = i3; |
||
65 | } |
||
66 | } |
||
67 | |||
68 | |||
69 | /* |
||
70 | * Do *not* declare simple executor static--we need to call it |
||
71 | * from executor_cilk.cilk...also, preface its name with "fftw_" |
||
72 | * to avoid any possible name collisions. |
||
73 | */ |
||
74 | void fftw_executor_simple(int n, const fftw_complex *in, |
||
75 | fftw_complex *out, |
||
76 | fftw_plan_node *p, |
||
77 | int istride, |
||
78 | int ostride) |
||
79 | { |
||
80 | switch (p->type) { |
||
81 | case FFTW_NOTW: |
||
82 | HACK_ALIGN_STACK_ODD(); |
||
83 | (p->nodeu.notw.codelet)(in, out, istride, ostride); |
||
84 | break; |
||
85 | |||
86 | case FFTW_TWIDDLE: |
||
87 | { |
||
88 | int r = p->nodeu.twiddle.size; |
||
89 | int m = n / r; |
||
90 | int i; |
||
91 | fftw_twiddle_codelet *codelet; |
||
92 | fftw_complex *W; |
||
93 | |||
94 | for (i = 0; i < r; ++i) { |
||
95 | fftw_executor_simple(m, in + i * istride, |
||
96 | out + i * (m * ostride), |
||
97 | p->nodeu.twiddle.recurse, |
||
98 | istride * r, ostride); |
||
99 | } |
||
100 | |||
101 | codelet = p->nodeu.twiddle.codelet; |
||
102 | W = p->nodeu.twiddle.tw->twarray; |
||
103 | |||
104 | HACK_ALIGN_STACK_EVEN(); |
||
105 | codelet(out, W, m * ostride, m, ostride); |
||
106 | |||
107 | break; |
||
108 | } |
||
109 | |||
110 | case FFTW_GENERIC: |
||
111 | { |
||
112 | int r = p->nodeu.generic.size; |
||
113 | int m = n / r; |
||
114 | int i; |
||
115 | fftw_generic_codelet *codelet; |
||
116 | fftw_complex *W; |
||
117 | |||
118 | for (i = 0; i < r; ++i) { |
||
119 | fftw_executor_simple(m, in + i * istride, |
||
120 | out + i * (m * ostride), |
||
121 | p->nodeu.generic.recurse, |
||
122 | istride * r, ostride); |
||
123 | } |
||
124 | |||
125 | codelet = p->nodeu.generic.codelet; |
||
126 | W = p->nodeu.generic.tw->twarray; |
||
127 | codelet(out, W, m, r, n, ostride); |
||
128 | |||
129 | break; |
||
130 | } |
||
131 | |||
132 | case FFTW_RADER: |
||
133 | { |
||
134 | int r = p->nodeu.rader.size; |
||
135 | int m = n / r; |
||
136 | int i; |
||
137 | fftw_rader_codelet *codelet; |
||
138 | fftw_complex *W; |
||
139 | |||
140 | for (i = 0; i < r; ++i) { |
||
141 | fftw_executor_simple(m, in + i * istride, |
||
142 | out + i * (m * ostride), |
||
143 | p->nodeu.rader.recurse, |
||
144 | istride * r, ostride); |
||
145 | } |
||
146 | |||
147 | codelet = p->nodeu.rader.codelet; |
||
148 | W = p->nodeu.rader.tw->twarray; |
||
149 | codelet(out, W, m, r, ostride, |
||
150 | p->nodeu.rader.rader_data); |
||
151 | |||
152 | break; |
||
153 | } |
||
154 | |||
155 | default: |
||
156 | fftw_die("BUG in executor: invalid plan\n"); |
||
157 | break; |
||
158 | } |
||
159 | } |
||
160 | |||
161 | static void executor_simple_inplace(int n, fftw_complex *in, |
||
162 | fftw_complex *out, |
||
163 | fftw_plan_node *p, |
||
164 | int istride) |
||
165 | { |
||
166 | switch (p->type) { |
||
167 | case FFTW_NOTW: |
||
168 | HACK_ALIGN_STACK_ODD(); |
||
169 | (p->nodeu.notw.codelet)(in, in, istride, istride); |
||
170 | break; |
||
171 | |||
172 | default: |
||
173 | { |
||
174 | fftw_complex *tmp; |
||
175 | |||
176 | if (out) |
||
177 | tmp = out; |
||
178 | else |
||
179 | tmp = (fftw_complex *) |
||
180 | fftw_malloc(n * sizeof(fftw_complex)); |
||
181 | |||
182 | fftw_executor_simple(n, in, tmp, p, istride, 1); |
||
183 | fftw_strided_copy(n, tmp, istride, in); |
||
184 | |||
185 | if (!out) |
||
186 | fftw_free(tmp); |
||
187 | } |
||
188 | } |
||
189 | } |
||
190 | |||
191 | static void executor_many(int n, const fftw_complex *in, |
||
192 | fftw_complex *out, |
||
193 | fftw_plan_node *p, |
||
194 | int istride, |
||
195 | int ostride, |
||
196 | int howmany, int idist, int odist) |
||
197 | { |
||
198 | switch (p->type) { |
||
199 | case FFTW_NOTW: |
||
200 | { |
||
201 | fftw_notw_codelet *codelet = p->nodeu.notw.codelet; |
||
202 | int s; |
||
203 | |||
204 | HACK_ALIGN_STACK_ODD(); |
||
205 | for (s = 0; s < howmany; ++s) |
||
206 | codelet(in + s * idist, |
||
207 | out + s * odist, |
||
208 | istride, ostride); |
||
209 | break; |
||
210 | } |
||
211 | |||
212 | default: |
||
213 | { |
||
214 | int s; |
||
215 | for (s = 0; s < howmany; ++s) { |
||
216 | fftw_executor_simple(n, in + s * idist, |
||
217 | out + s * odist, |
||
218 | p, istride, ostride); |
||
219 | } |
||
220 | } |
||
221 | } |
||
222 | } |
||
223 | |||
224 | static void executor_many_inplace(int n, fftw_complex *in, |
||
225 | fftw_complex *out, |
||
226 | fftw_plan_node *p, |
||
227 | int istride, |
||
228 | int howmany, int idist) |
||
229 | { |
||
230 | switch (p->type) { |
||
231 | case FFTW_NOTW: |
||
232 | { |
||
233 | fftw_notw_codelet *codelet = p->nodeu.notw.codelet; |
||
234 | int s; |
||
235 | |||
236 | HACK_ALIGN_STACK_ODD(); |
||
237 | for (s = 0; s < howmany; ++s) |
||
238 | codelet(in + s * idist, |
||
239 | in + s * idist, |
||
240 | istride, istride); |
||
241 | break; |
||
242 | } |
||
243 | |||
244 | default: |
||
245 | { |
||
246 | int s; |
||
247 | fftw_complex *tmp; |
||
248 | if (out) |
||
249 | tmp = out; |
||
250 | else |
||
251 | tmp = (fftw_complex *) |
||
252 | fftw_malloc(n * sizeof(fftw_complex)); |
||
253 | |||
254 | for (s = 0; s < howmany; ++s) { |
||
255 | fftw_executor_simple(n, |
||
256 | in + s * idist, |
||
257 | tmp, |
||
258 | p, istride, 1); |
||
259 | fftw_strided_copy(n, tmp, istride, in + s * idist); |
||
260 | } |
||
261 | |||
262 | if (!out) |
||
263 | fftw_free(tmp); |
||
264 | } |
||
265 | } |
||
266 | } |
||
267 | |||
268 | /* user interface */ |
||
269 | void fftw(fftw_plan plan, int howmany, fftw_complex *in, int istride, |
||
270 | int idist, fftw_complex *out, int ostride, int odist) |
||
271 | { |
||
272 | int n = plan->n; |
||
273 | |||
274 | if (plan->flags & FFTW_IN_PLACE) { |
||
275 | if (howmany == 1) { |
||
276 | executor_simple_inplace(n, in, out, plan->root, istride); |
||
277 | } else { |
||
278 | executor_many_inplace(n, in, out, plan->root, istride, howmany, |
||
279 | idist); |
||
280 | } |
||
281 | } else { |
||
282 | if (howmany == 1) { |
||
283 | fftw_executor_simple(n, in, out, plan->root, istride, ostride); |
||
284 | } else { |
||
285 | executor_many(n, in, out, plan->root, istride, ostride, |
||
286 | howmany, idist, odist); |
||
287 | } |
||
288 | } |
||
289 | } |
||
290 | |||
291 | void fftw_one(fftw_plan plan, fftw_complex *in, fftw_complex *out) |
||
292 | { |
||
293 | int n = plan->n; |
||
294 | |||
295 | if (plan->flags & FFTW_IN_PLACE) |
||
296 | executor_simple_inplace(n, in, out, plan->root, 1); |
||
297 | else |
||
298 | fftw_executor_simple(n, in, out, plan->root, 1, 1); |
||
299 | } |