1 /*
2 ** FFT and FHT routines
3 ** Copyright 1988, 1993; Ron Mayer
4 **
5 ** fht(fz,n);
6 ** Does a hartley transform of "n" points in the array "fz".
7 **
8 ** NOTE: This routine uses at least 2 patented algorithms, and may be
9 ** under the restrictions of a bunch of different organizations.
10 ** Although I wrote it completely myself; it is kind of a derivative
11 ** of a routine I once authored and released under the GPL, so it
12 ** may fall under the free software foundation's restrictions;
13 ** it was worked on as a Stanford Univ project, so they claim
14 ** some rights to it; it was further optimized at work here, so
15 ** I think this company claims parts of it. The patents are
16 ** held by R. Bracewell (the FHT algorithm) and O. Buneman (the
17 ** trig generator), both at Stanford Univ.
18 ** If it were up to me, I'd say go do whatever you want with it;
19 ** but it would be polite to give credit to the following people
20 ** if you use this anywhere:
21 ** Euler - probable inventor of the fourier transform.
22 ** Gauss - probable inventor of the FFT.
23 ** Hartley - probable inventor of the hartley transform.
24 ** Buneman - for a really cool trig generator
25 ** Mayer(me) - for authoring this particular version and
26 ** including all the optimizations in one package.
27 ** Thanks,
28 ** Ron Mayer; mayer@acuson.com
29 ** and added some optimization by
30 ** Mather - idea of using lookup table
31 ** Takehiro - some dirty hack for speed up
32 */
33
34 /* $Id: fft.c,v 1.17 2001/01/13 12:54:41 takehiro Exp $ */
35
36 #ifdef HAVE_CONFIG_H
37 # include <config.h>
38 #endif
39
40 #include <math.h>
41 #include "util.h"
42 #include "fft.h"
43
44 #ifdef WITH_DMALLOC
45 #include <dmalloc.h>
46 #endif
47
48 #ifndef USE_FFT3DN
49 #define TRI_SIZE (5-1) /* 1024 = 4**5 */
50
51 static const FLOAT costab[TRI_SIZE*2] = {
52 9.238795325112867e-01, 3.826834323650898e-01,
53 9.951847266721969e-01, 9.801714032956060e-02,
54 9.996988186962042e-01, 2.454122852291229e-02,
55 9.999811752826011e-01, 6.135884649154475e-03
56 };
57
fht(FLOAT * fz,int n)58 inline static void fht(FLOAT *fz, int n)
59 {
60 const FLOAT *tri = costab;
61 int k4;
62 FLOAT *fi, *fn, *gi;
63
64 fn = fz + n;
65 k4 = 4;
66 do {
67 FLOAT s1, c1;
68 int i, k1, k2, k3, kx;
69 kx = k4 >> 1;
70 k1 = k4;
71 k2 = k4 << 1;
72 k3 = k2 + k1;
73 k4 = k2 << 1;
74 fi = fz;
75 gi = fi + kx;
76 do {
77 FLOAT f0,f1,f2,f3;
78 f1 = fi[0] - fi[k1];
79 f0 = fi[0] + fi[k1];
80 f3 = fi[k2] - fi[k3];
81 f2 = fi[k2] + fi[k3];
82 fi[k2] = f0 - f2;
83 fi[0 ] = f0 + f2;
84 fi[k3] = f1 - f3;
85 fi[k1] = f1 + f3;
86 f1 = gi[0] - gi[k1];
87 f0 = gi[0] + gi[k1];
88 f3 = SQRT2 * gi[k3];
89 f2 = SQRT2 * gi[k2];
90 gi[k2] = f0 - f2;
91 gi[0 ] = f0 + f2;
92 gi[k3] = f1 - f3;
93 gi[k1] = f1 + f3;
94 gi += k4;
95 fi += k4;
96 } while (fi<fn);
97 c1 = tri[0];
98 s1 = tri[1];
99 for (i = 1; i < kx; i++) {
100 FLOAT c2,s2;
101 c2 = 1 - (2*s1)*s1;
102 s2 = (2*s1)*c1;
103 fi = fz + i;
104 gi = fz + k1 - i;
105 do {
106 FLOAT a,b,g0,f0,f1,g1,f2,g2,f3,g3;
107 b = s2*fi[k1] - c2*gi[k1];
108 a = c2*fi[k1] + s2*gi[k1];
109 f1 = fi[0 ] - a;
110 f0 = fi[0 ] + a;
111 g1 = gi[0 ] - b;
112 g0 = gi[0 ] + b;
113 b = s2*fi[k3] - c2*gi[k3];
114 a = c2*fi[k3] + s2*gi[k3];
115 f3 = fi[k2] - a;
116 f2 = fi[k2] + a;
117 g3 = gi[k2] - b;
118 g2 = gi[k2] + b;
119 b = s1*f2 - c1*g3;
120 a = c1*f2 + s1*g3;
121 fi[k2] = f0 - a;
122 fi[0 ] = f0 + a;
123 gi[k3] = g1 - b;
124 gi[k1] = g1 + b;
125 b = c1*g2 - s1*f3;
126 a = s1*g2 + c1*f3;
127 gi[k2] = g0 - a;
128 gi[0 ] = g0 + a;
129 fi[k3] = f1 - b;
130 fi[k1] = f1 + b;
131 gi += k4;
132 fi += k4;
133 } while (fi<fn);
134 c2 = c1;
135 c1 = c2 * tri[0] - s1 * tri[1];
136 s1 = c2 * tri[1] + s1 * tri[0];
137 }
138 tri += 2;
139 } while (k4<n);
140 }
141 #else
142 #define fht(a,b) fht_3DN(a,b/2)
143 #endif /* USE_FFT3DN */
144
145 static const unsigned char rv_tbl[] = {
146 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
147 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
148 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
149 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
150 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
151 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
152 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
153 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
154 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
155 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
156 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
157 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
158 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
159 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
160 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
161 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe
162 };
163
164 #define ch01(index) (buffer[chn][index])
165
166 #define ml00(f) (window[i ] * f(i))
167 #define ml10(f) (window[i + 0x200] * f(i + 0x200))
168 #define ml20(f) (window[i + 0x100] * f(i + 0x100))
169 #define ml30(f) (window[i + 0x300] * f(i + 0x300))
170
171 #define ml01(f) (window[i + 0x001] * f(i + 0x001))
172 #define ml11(f) (window[i + 0x201] * f(i + 0x201))
173 #define ml21(f) (window[i + 0x101] * f(i + 0x101))
174 #define ml31(f) (window[i + 0x301] * f(i + 0x301))
175
176 #define ms00(f) (window_s[i ] * f(i + k))
177 #define ms10(f) (window_s[0x7f - i] * f(i + k + 0x80))
178 #define ms20(f) (window_s[i + 0x40] * f(i + k + 0x40))
179 #define ms30(f) (window_s[0x3f - i] * f(i + k + 0xc0))
180
181 #define ms01(f) (window_s[i + 0x01] * f(i + k + 0x01))
182 #define ms11(f) (window_s[0x7e - i] * f(i + k + 0x81))
183 #define ms21(f) (window_s[i + 0x41] * f(i + k + 0x41))
184 #define ms31(f) (window_s[0x3e - i] * f(i + k + 0xc1))
185
186
fft_short(lame_internal_flags * gfc,FLOAT x_real[3][BLKSIZE_s],int chn,const sample_t * buffer[2])187 void fft_short(lame_internal_flags *gfc,
188 FLOAT x_real[3][BLKSIZE_s], int chn, const sample_t *buffer[2])
189 {
190 const FLOAT* window_s = (const FLOAT *)&gfc->window_s[0];
191 int i;
192 int j;
193 int b;
194
195 for (b = 0; b < 3; b++) {
196 FLOAT *x = &x_real[b][BLKSIZE_s / 2];
197 short k = (576 / 3) * (b + 1);
198 j = BLKSIZE_s / 8 - 1;
199 do {
200 FLOAT f0,f1,f2,f3, w;
201
202 i = rv_tbl[j << 2];
203
204 f0 = ms00(ch01); w = ms10(ch01); f1 = f0 - w; f0 = f0 + w;
205 f2 = ms20(ch01); w = ms30(ch01); f3 = f2 - w; f2 = f2 + w;
206
207 x -= 4;
208 x[0] = f0 + f2;
209 x[2] = f0 - f2;
210 x[1] = f1 + f3;
211 x[3] = f1 - f3;
212
213 f0 = ms01(ch01); w = ms11(ch01); f1 = f0 - w; f0 = f0 + w;
214 f2 = ms21(ch01); w = ms31(ch01); f3 = f2 - w; f2 = f2 + w;
215
216 x[BLKSIZE_s / 2 + 0] = f0 + f2;
217 x[BLKSIZE_s / 2 + 2] = f0 - f2;
218 x[BLKSIZE_s / 2 + 1] = f1 + f3;
219 x[BLKSIZE_s / 2 + 3] = f1 - f3;
220 } while (--j >= 0);
221
222 fht(x, BLKSIZE_s);
223 }
224 }
225
fft_long(lame_internal_flags * const gfc,FLOAT x[BLKSIZE],int chn,const sample_t * buffer[2])226 void fft_long(lame_internal_flags * const gfc,
227 FLOAT x[BLKSIZE], int chn, const sample_t *buffer[2] )
228 {
229 const FLOAT* window = (const FLOAT *)&gfc->window[0];
230 int i;
231 int jj = BLKSIZE / 8 - 1;
232 x += BLKSIZE / 2;
233
234 do {
235 FLOAT f0,f1,f2,f3, w;
236
237 i = rv_tbl[jj];
238 f0 = ml00(ch01); w = ml10(ch01); f1 = f0 - w; f0 = f0 + w;
239 f2 = ml20(ch01); w = ml30(ch01); f3 = f2 - w; f2 = f2 + w;
240
241 x -= 4;
242 x[0] = f0 + f2;
243 x[2] = f0 - f2;
244 x[1] = f1 + f3;
245 x[3] = f1 - f3;
246
247 f0 = ml01(ch01); w = ml11(ch01); f1 = f0 - w; f0 = f0 + w;
248 f2 = ml21(ch01); w = ml31(ch01); f3 = f2 - w; f2 = f2 + w;
249
250 x[BLKSIZE / 2 + 0] = f0 + f2;
251 x[BLKSIZE / 2 + 2] = f0 - f2;
252 x[BLKSIZE / 2 + 1] = f1 + f3;
253 x[BLKSIZE / 2 + 3] = f1 - f3;
254 } while (--jj >= 0);
255
256 fht(x, BLKSIZE);
257 }
258
259
init_fft(lame_internal_flags * const gfc)260 void init_fft(lame_internal_flags * const gfc)
261 {
262 FLOAT *window = &gfc->window[0];
263 FLOAT *window_s = &gfc->window_s[0];
264 int i;
265
266 #if 0
267 if (gfc->nsPsy.use) {
268 for (i = 0; i < BLKSIZE ; i++)
269 /* blackman window */
270 window[i] = 0.42-0.5*cos(2*PI*i/(BLKSIZE-1))+0.08*cos(4*PI*i/(BLKSIZE-1));
271 } else {
272 /*
273 * calculate HANN window coefficients
274 */
275 for (i = 0; i < BLKSIZE ; i++)
276 window[i] = 0.5 * (1.0 - cos(2.0 * PI * (i + 0.5) / BLKSIZE));
277 }
278 #endif
279
280 // The type of window used here will make no real difference, but
281 // in the interest of merging nspsytune stuff - switch to blackman window
282 for (i = 0; i < BLKSIZE ; i++)
283 /* blackman window */
284 window[i] = 0.42-0.5*cos(2*PI*(i+.5)/BLKSIZE)+
285 0.08*cos(4*PI*(i+.5)/BLKSIZE);
286
287 for (i = 0; i < BLKSIZE_s/2 ; i++)
288 window_s[i] = 0.5 * (1.0 - cos(2.0 * PI * (i + 0.5) / BLKSIZE_s));
289 }
290