Rev 3 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
2 | pj | 1 | /* |
2 | * Copyright (c) 1997-1999 Massachusetts Institute of Technology |
||
3 | * |
||
4 | * This program is free software; you can redistribute it and/or modify |
||
5 | * it under the terms of the GNU General Public License as published by |
||
6 | * the Free Software Foundation; either version 2 of the License, or |
||
7 | * (at your option) any later version. |
||
8 | * |
||
9 | * This program is distributed in the hope that it will be useful, |
||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
12 | * GNU General Public License for more details. |
||
13 | * |
||
14 | * You should have received a copy of the GNU General Public License |
||
15 | * along with this program; if not, write to the Free Software |
||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
||
17 | * |
||
18 | */ |
||
19 | |||
107 | pj | 20 | /* $Id: rexec2.c,v 1.2 2003-03-24 11:14:59 pj Exp $ */ |
2 | pj | 21 | /* |
22 | * rexec2.c -- alternate rfftw executor, specifically designed for the |
||
23 | * multidimensional transforms. Given an extra work array, |
||
24 | * expects complex data in FFTW_COMPLEX format, and does |
||
25 | * not destroy the input in hc2real transforms. |
||
26 | */ |
||
27 | |||
107 | pj | 28 | #include <fftw-int.h> |
29 | #include <rfftw.h> |
||
2 | pj | 30 | |
31 | /* copies halfcomplex array in (contiguous) to fftw_complex array out. */ |
||
32 | void rfftw_hc2c(int n, fftw_real *in, fftw_complex *out, int ostride) |
||
33 | { |
||
34 | int n2 = (n + 1) / 2; |
||
35 | int i = 1; |
||
36 | |||
37 | c_re(out[0]) = in[0]; |
||
38 | c_im(out[0]) = 0.0; |
||
39 | for (; i < ((n2 - 1) & 3) + 1; ++i) { |
||
40 | c_re(out[i * ostride]) = in[i]; |
||
41 | c_im(out[i * ostride]) = in[n - i]; |
||
42 | } |
||
43 | for (; i < n2; i += 4) { |
||
44 | fftw_real r0, r1, r2, r3; |
||
45 | fftw_real i0, i1, i2, i3; |
||
46 | r0 = in[i]; |
||
47 | r1 = in[i + 1]; |
||
48 | r2 = in[i + 2]; |
||
49 | r3 = in[i + 3]; |
||
50 | i3 = in[n - (i + 3)]; |
||
51 | i2 = in[n - (i + 2)]; |
||
52 | i1 = in[n - (i + 1)]; |
||
53 | i0 = in[n - i]; |
||
54 | c_re(out[i * ostride]) = r0; |
||
55 | c_im(out[i * ostride]) = i0; |
||
56 | c_re(out[(i + 1) * ostride]) = r1; |
||
57 | c_im(out[(i + 1) * ostride]) = i1; |
||
58 | c_re(out[(i + 2) * ostride]) = r2; |
||
59 | c_im(out[(i + 2) * ostride]) = i2; |
||
60 | c_re(out[(i + 3) * ostride]) = r3; |
||
61 | c_im(out[(i + 3) * ostride]) = i3; |
||
62 | } |
||
63 | if ((n & 1) == 0) { /* store the Nyquist frequency */ |
||
64 | c_re(out[n2 * ostride]) = in[n2]; |
||
65 | c_im(out[n2 * ostride]) = 0.0; |
||
66 | } |
||
67 | } |
||
68 | |||
69 | /* reverse of rfftw_hc2c */ |
||
70 | void rfftw_c2hc(int n, fftw_complex *in, int istride, fftw_real *out) |
||
71 | { |
||
72 | int n2 = (n + 1) / 2; |
||
73 | int i = 1; |
||
74 | |||
75 | out[0] = c_re(in[0]); |
||
76 | for (; i < ((n2 - 1) & 3) + 1; ++i) { |
||
77 | out[i] = c_re(in[i * istride]); |
||
78 | out[n - i] = c_im(in[i * istride]); |
||
79 | } |
||
80 | for (; i < n2; i += 4) { |
||
81 | fftw_real r0, r1, r2, r3; |
||
82 | fftw_real i0, i1, i2, i3; |
||
83 | r0 = c_re(in[i * istride]); |
||
84 | i0 = c_im(in[i * istride]); |
||
85 | r1 = c_re(in[(i + 1) * istride]); |
||
86 | i1 = c_im(in[(i + 1) * istride]); |
||
87 | r2 = c_re(in[(i + 2) * istride]); |
||
88 | i2 = c_im(in[(i + 2) * istride]); |
||
89 | r3 = c_re(in[(i + 3) * istride]); |
||
90 | i3 = c_im(in[(i + 3) * istride]); |
||
91 | out[i] = r0; |
||
92 | out[i + 1] = r1; |
||
93 | out[i + 2] = r2; |
||
94 | out[i + 3] = r3; |
||
95 | out[n - (i + 3)] = i3; |
||
96 | out[n - (i + 2)] = i2; |
||
97 | out[n - (i + 1)] = i1; |
||
98 | out[n - i] = i0; |
||
99 | } |
||
100 | if ((n & 1) == 0) /* store the Nyquist frequency */ |
||
101 | out[n2] = c_re(in[n2 * istride]); |
||
102 | } |
||
103 | |||
104 | /* |
||
105 | * in: array of n real numbers (* howmany). |
||
106 | * out: array of n/2 + 1 complex numbers (* howmany). |
||
107 | * work: array of n real numbers (stride 1) |
||
108 | * |
||
109 | * We must have out != in if dist < stride. |
||
110 | */ |
||
111 | void rfftw_real2c_aux(fftw_plan plan, int howmany, |
||
112 | fftw_real *in, int istride, int idist, |
||
113 | fftw_complex *out, int ostride, int odist, |
||
114 | fftw_real *work) |
||
115 | { |
||
116 | fftw_plan_node *p = plan->root; |
||
117 | int j; |
||
118 | |||
119 | switch (p->type) { |
||
120 | case FFTW_REAL2HC: |
||
121 | { |
||
122 | fftw_real2hc_codelet *codelet = p->nodeu.real2hc.codelet; |
||
123 | int n = plan->n; |
||
124 | int n2 = (n & 1) ? 0 : (n + 1) / 2; |
||
125 | |||
126 | HACK_ALIGN_STACK_ODD(); |
||
127 | for (j = 0; j < howmany; ++j, out += odist) { |
||
128 | codelet(in + j * idist, |
||
129 | &c_re(*out), |
||
130 | &c_im(*out), |
||
131 | istride, ostride * 2, ostride * 2); |
||
132 | c_im(out[0]) = 0.0; |
||
133 | c_im(out[n2 * ostride]) = 0.0; |
||
134 | } |
||
135 | break; |
||
136 | } |
||
137 | |||
138 | default: |
||
139 | { |
||
140 | int n = plan->n; |
||
141 | |||
142 | for (j = 0; j < howmany; ++j, in += idist, out += odist) { |
||
143 | rfftw_executor_simple(n, in, work, p, istride, 1); |
||
144 | rfftw_hc2c(n, work, out, ostride); |
||
145 | } |
||
146 | break; |
||
147 | } |
||
148 | } |
||
149 | } |
||
150 | |||
151 | /* |
||
152 | * in: array of n/2 + 1 complex numbers (* howmany). |
||
153 | * out: array of n real numbers (* howmany). |
||
154 | * work: array of n real numbers (stride 1) |
||
155 | * |
||
156 | * We must have out != in if dist < stride. |
||
157 | */ |
||
158 | void rfftw_c2real_aux(fftw_plan plan, int howmany, |
||
159 | fftw_complex *in, int istride, int idist, |
||
160 | fftw_real *out, int ostride, int odist, |
||
161 | fftw_real *work) |
||
162 | { |
||
163 | fftw_plan_node *p = plan->root; |
||
164 | |||
165 | switch (p->type) { |
||
166 | case FFTW_HC2REAL: |
||
167 | { |
||
168 | fftw_hc2real_codelet *codelet = p->nodeu.hc2real.codelet; |
||
169 | int j; |
||
170 | |||
171 | HACK_ALIGN_STACK_ODD(); |
||
172 | for (j = 0; j < howmany; ++j) |
||
173 | codelet(&c_re(*(in + j * idist)), |
||
174 | &c_im(*(in + j * idist)), |
||
175 | out + j * odist, |
||
176 | istride * 2, istride * 2, ostride); |
||
177 | break; |
||
178 | } |
||
179 | |||
180 | default: |
||
181 | { |
||
182 | int j, n = plan->n; |
||
183 | |||
184 | for (j = 0; j < howmany; ++j, in += idist, out += odist) { |
||
185 | rfftw_c2hc(n, in, istride, work); |
||
186 | rfftw_executor_simple(n, work, out, p, 1, ostride); |
||
187 | } |
||
188 | break; |
||
189 | } |
||
190 | } |
||
191 | } |
||
192 | |||
193 | /* |
||
194 | * The following two functions are similar to the ones above, BUT: |
||
195 | * |
||
196 | * work must contain n * howmany elements (stride 1) |
||
197 | * |
||
198 | * Can handle out == in for any stride/dist. |
||
199 | */ |
||
200 | void rfftw_real2c_overlap_aux(fftw_plan plan, int howmany, |
||
201 | fftw_real *in, int istride, int idist, |
||
202 | fftw_complex *out, int ostride, int odist, |
||
203 | fftw_real *work) |
||
204 | { |
||
205 | int n = plan->n; |
||
206 | int j; |
||
207 | |||
208 | rfftw(plan, howmany, in, istride, idist, work, 1, n); |
||
209 | |||
210 | /* copy from work to out: */ |
||
211 | for (j = 0; j < howmany; ++j, work += n, out += odist) |
||
212 | rfftw_hc2c(n, work, out, ostride); |
||
213 | } |
||
214 | |||
215 | void rfftw_c2real_overlap_aux(fftw_plan plan, int howmany, |
||
216 | fftw_complex *in, int istride, int idist, |
||
217 | fftw_real *out, int ostride, int odist, |
||
218 | fftw_real *work) |
||
219 | { |
||
220 | int n = plan->n; |
||
221 | int j; |
||
222 | |||
223 | /* copy from in to work: */ |
||
224 | for (j = 0; j < howmany; ++j, in += idist) |
||
225 | rfftw_c2hc(n, in, istride, work + j * n); |
||
226 | |||
227 | rfftw(plan, howmany, work, 1, n, out, ostride, odist); |
||
228 | } |