xref: /plan9/sys/src/games/mp3enc/fft.c (revision 8f5875f3e9b20916b4c52ad4336922bc8653eb7b)
1 /*
2 ** FFT and FHT routines
3 **  Copyright 1988, 1993; Ron Mayer
4 **
5 **  fht(fz,n);
6 **      Does a hartley transform of "n" points in the array "fz".
7 **
8 ** NOTE: This routine uses at least 2 patented algorithms, and may be
9 **       under the restrictions of a bunch of different organizations.
10 **       Although I wrote it completely myself; it is kind of a derivative
11 **       of a routine I once authored and released under the GPL, so it
12 **       may fall under the free software foundation's restrictions;
13 **       it was worked on as a Stanford Univ project, so they claim
14 **       some rights to it; it was further optimized at work here, so
15 **       I think this company claims parts of it.  The patents are
16 **       held by R. Bracewell (the FHT algorithm) and O. Buneman (the
17 **       trig generator), both at Stanford Univ.
18 **       If it were up to me, I'd say go do whatever you want with it;
19 **       but it would be polite to give credit to the following people
20 **       if you use this anywhere:
21 **           Euler     - probable inventor of the fourier transform.
22 **           Gauss     - probable inventor of the FFT.
23 **           Hartley   - probable inventor of the hartley transform.
24 **           Buneman   - for a really cool trig generator
25 **           Mayer(me) - for authoring this particular version and
26 **                       including all the optimizations in one package.
27 **       Thanks,
28 **       Ron Mayer; mayer@acuson.com
29 ** and added some optimization by
30 **           Mather    - idea of using lookup table
31 **           Takehiro  - some dirty hack for speed up
32 */
33 
34 /* $Id: fft.c,v 1.17 2001/01/13 12:54:41 takehiro Exp $ */
35 
36 #ifdef HAVE_CONFIG_H
37 # include <config.h>
38 #endif
39 
40 #include <math.h>
41 #include "util.h"
42 #include "fft.h"
43 
44 #ifdef WITH_DMALLOC
45 #include <dmalloc.h>
46 #endif
47 
48 #ifndef USE_FFT3DN
49 #define TRI_SIZE (5-1) /* 1024 =  4**5 */
50 
51 static const FLOAT costab[TRI_SIZE*2] = {
52   9.238795325112867e-01, 3.826834323650898e-01,
53   9.951847266721969e-01, 9.801714032956060e-02,
54   9.996988186962042e-01, 2.454122852291229e-02,
55   9.999811752826011e-01, 6.135884649154475e-03
56 };
57 
fht(FLOAT * fz,int n)58 inline static void fht(FLOAT *fz, int n)
59 {
60     const FLOAT *tri = costab;
61     int           k4;
62     FLOAT *fi, *fn, *gi;
63 
64     fn = fz + n;
65     k4 = 4;
66     do {
67 	FLOAT s1, c1;
68 	int   i, k1, k2, k3, kx;
69 	kx  = k4 >> 1;
70 	k1  = k4;
71 	k2  = k4 << 1;
72 	k3  = k2 + k1;
73 	k4  = k2 << 1;
74 	fi  = fz;
75 	gi  = fi + kx;
76 	do {
77 	    FLOAT f0,f1,f2,f3;
78 	    f1      = fi[0]  - fi[k1];
79 	    f0      = fi[0]  + fi[k1];
80 	    f3      = fi[k2] - fi[k3];
81 	    f2      = fi[k2] + fi[k3];
82 	    fi[k2]  = f0     - f2;
83 	    fi[0 ]  = f0     + f2;
84 	    fi[k3]  = f1     - f3;
85 	    fi[k1]  = f1     + f3;
86 	    f1      = gi[0]  - gi[k1];
87 	    f0      = gi[0]  + gi[k1];
88 	    f3      = SQRT2  * gi[k3];
89 	    f2      = SQRT2  * gi[k2];
90 	    gi[k2]  = f0     - f2;
91 	    gi[0 ]  = f0     + f2;
92 	    gi[k3]  = f1     - f3;
93 	    gi[k1]  = f1     + f3;
94 	    gi     += k4;
95 	    fi     += k4;
96 	} while (fi<fn);
97 	c1 = tri[0];
98 	s1 = tri[1];
99 	for (i = 1; i < kx; i++) {
100 	    FLOAT c2,s2;
101 	    c2 = 1 - (2*s1)*s1;
102 	    s2 = (2*s1)*c1;
103 	    fi = fz + i;
104 	    gi = fz + k1 - i;
105 	    do {
106 		FLOAT a,b,g0,f0,f1,g1,f2,g2,f3,g3;
107 		b       = s2*fi[k1] - c2*gi[k1];
108 		a       = c2*fi[k1] + s2*gi[k1];
109 		f1      = fi[0 ]    - a;
110 		f0      = fi[0 ]    + a;
111 		g1      = gi[0 ]    - b;
112 		g0      = gi[0 ]    + b;
113 		b       = s2*fi[k3] - c2*gi[k3];
114 		a       = c2*fi[k3] + s2*gi[k3];
115 		f3      = fi[k2]    - a;
116 		f2      = fi[k2]    + a;
117 		g3      = gi[k2]    - b;
118 		g2      = gi[k2]    + b;
119 		b       = s1*f2     - c1*g3;
120 		a       = c1*f2     + s1*g3;
121 		fi[k2]  = f0        - a;
122 		fi[0 ]  = f0        + a;
123 		gi[k3]  = g1        - b;
124 		gi[k1]  = g1        + b;
125 		b       = c1*g2     - s1*f3;
126 		a       = s1*g2     + c1*f3;
127 		gi[k2]  = g0        - a;
128 		gi[0 ]  = g0        + a;
129 		fi[k3]  = f1        - b;
130 		fi[k1]  = f1        + b;
131 		gi     += k4;
132 		fi     += k4;
133 	    } while (fi<fn);
134 	    c2 = c1;
135 	    c1 = c2 * tri[0] - s1 * tri[1];
136 	    s1 = c2 * tri[1] + s1 * tri[0];
137         }
138 	tri += 2;
139     } while (k4<n);
140 }
141 #else
142 #define fht(a,b) fht_3DN(a,b/2)
143 #endif /* USE_FFT3DN */
144 
145 static const unsigned char rv_tbl[] = {
146     0x00,    0x80,    0x40,    0xc0,    0x20,    0xa0,    0x60,    0xe0,
147     0x10,    0x90,    0x50,    0xd0,    0x30,    0xb0,    0x70,    0xf0,
148     0x08,    0x88,    0x48,    0xc8,    0x28,    0xa8,    0x68,    0xe8,
149     0x18,    0x98,    0x58,    0xd8,    0x38,    0xb8,    0x78,    0xf8,
150     0x04,    0x84,    0x44,    0xc4,    0x24,    0xa4,    0x64,    0xe4,
151     0x14,    0x94,    0x54,    0xd4,    0x34,    0xb4,    0x74,    0xf4,
152     0x0c,    0x8c,    0x4c,    0xcc,    0x2c,    0xac,    0x6c,    0xec,
153     0x1c,    0x9c,    0x5c,    0xdc,    0x3c,    0xbc,    0x7c,    0xfc,
154     0x02,    0x82,    0x42,    0xc2,    0x22,    0xa2,    0x62,    0xe2,
155     0x12,    0x92,    0x52,    0xd2,    0x32,    0xb2,    0x72,    0xf2,
156     0x0a,    0x8a,    0x4a,    0xca,    0x2a,    0xaa,    0x6a,    0xea,
157     0x1a,    0x9a,    0x5a,    0xda,    0x3a,    0xba,    0x7a,    0xfa,
158     0x06,    0x86,    0x46,    0xc6,    0x26,    0xa6,    0x66,    0xe6,
159     0x16,    0x96,    0x56,    0xd6,    0x36,    0xb6,    0x76,    0xf6,
160     0x0e,    0x8e,    0x4e,    0xce,    0x2e,    0xae,    0x6e,    0xee,
161     0x1e,    0x9e,    0x5e,    0xde,    0x3e,    0xbe,    0x7e,    0xfe
162 };
163 
164 #define ch01(index)  (buffer[chn][index])
165 
166 #define ml00(f)	(window[i        ] * f(i))
167 #define ml10(f)	(window[i + 0x200] * f(i + 0x200))
168 #define ml20(f)	(window[i + 0x100] * f(i + 0x100))
169 #define ml30(f)	(window[i + 0x300] * f(i + 0x300))
170 
171 #define ml01(f)	(window[i + 0x001] * f(i + 0x001))
172 #define ml11(f)	(window[i + 0x201] * f(i + 0x201))
173 #define ml21(f)	(window[i + 0x101] * f(i + 0x101))
174 #define ml31(f)	(window[i + 0x301] * f(i + 0x301))
175 
176 #define ms00(f)	(window_s[i       ] * f(i + k))
177 #define ms10(f)	(window_s[0x7f - i] * f(i + k + 0x80))
178 #define ms20(f)	(window_s[i + 0x40] * f(i + k + 0x40))
179 #define ms30(f)	(window_s[0x3f - i] * f(i + k + 0xc0))
180 
181 #define ms01(f)	(window_s[i + 0x01] * f(i + k + 0x01))
182 #define ms11(f)	(window_s[0x7e - i] * f(i + k + 0x81))
183 #define ms21(f)	(window_s[i + 0x41] * f(i + k + 0x41))
184 #define ms31(f)	(window_s[0x3e - i] * f(i + k + 0xc1))
185 
186 
fft_short(lame_internal_flags * gfc,FLOAT x_real[3][BLKSIZE_s],int chn,const sample_t * buffer[2])187 void fft_short(lame_internal_flags *gfc,
188                 FLOAT x_real[3][BLKSIZE_s], int chn, const sample_t *buffer[2])
189 {
190     const FLOAT*  window_s = (const FLOAT *)&gfc->window_s[0];
191     int           i;
192     int           j;
193     int           b;
194 
195     for (b = 0; b < 3; b++) {
196 	FLOAT *x = &x_real[b][BLKSIZE_s / 2];
197 	short k = (576 / 3) * (b + 1);
198 	j = BLKSIZE_s / 8 - 1;
199 	do {
200 	  FLOAT f0,f1,f2,f3, w;
201 
202 	  i = rv_tbl[j << 2];
203 
204 	  f0 = ms00(ch01); w = ms10(ch01); f1 = f0 - w; f0 = f0 + w;
205 	  f2 = ms20(ch01); w = ms30(ch01); f3 = f2 - w; f2 = f2 + w;
206 
207 	  x -= 4;
208 	  x[0] = f0 + f2;
209 	  x[2] = f0 - f2;
210 	  x[1] = f1 + f3;
211 	  x[3] = f1 - f3;
212 
213 	  f0 = ms01(ch01); w = ms11(ch01); f1 = f0 - w; f0 = f0 + w;
214 	  f2 = ms21(ch01); w = ms31(ch01); f3 = f2 - w; f2 = f2 + w;
215 
216 	  x[BLKSIZE_s / 2 + 0] = f0 + f2;
217 	  x[BLKSIZE_s / 2 + 2] = f0 - f2;
218 	  x[BLKSIZE_s / 2 + 1] = f1 + f3;
219 	  x[BLKSIZE_s / 2 + 3] = f1 - f3;
220 	} while (--j >= 0);
221 
222 	fht(x, BLKSIZE_s);
223     }
224 }
225 
fft_long(lame_internal_flags * const gfc,FLOAT x[BLKSIZE],int chn,const sample_t * buffer[2])226 void fft_long(lame_internal_flags * const gfc,
227                FLOAT x[BLKSIZE], int chn, const sample_t *buffer[2] )
228 {
229     const FLOAT*  window = (const FLOAT *)&gfc->window[0];
230     int           i;
231     int           jj = BLKSIZE / 8 - 1;
232     x += BLKSIZE / 2;
233 
234     do {
235       FLOAT f0,f1,f2,f3, w;
236 
237       i = rv_tbl[jj];
238       f0 = ml00(ch01); w = ml10(ch01); f1 = f0 - w; f0 = f0 + w;
239       f2 = ml20(ch01); w = ml30(ch01); f3 = f2 - w; f2 = f2 + w;
240 
241       x -= 4;
242       x[0] = f0 + f2;
243       x[2] = f0 - f2;
244       x[1] = f1 + f3;
245       x[3] = f1 - f3;
246 
247       f0 = ml01(ch01); w = ml11(ch01); f1 = f0 - w; f0 = f0 + w;
248       f2 = ml21(ch01); w = ml31(ch01); f3 = f2 - w; f2 = f2 + w;
249 
250       x[BLKSIZE / 2 + 0] = f0 + f2;
251       x[BLKSIZE / 2 + 2] = f0 - f2;
252       x[BLKSIZE / 2 + 1] = f1 + f3;
253       x[BLKSIZE / 2 + 3] = f1 - f3;
254     } while (--jj >= 0);
255 
256     fht(x, BLKSIZE);
257 }
258 
259 
init_fft(lame_internal_flags * const gfc)260 void init_fft(lame_internal_flags * const gfc)
261 {
262     FLOAT *window   = &gfc->window[0];
263     FLOAT *window_s = &gfc->window_s[0];
264     int i;
265 
266 #if 0
267     if (gfc->nsPsy.use) {
268       for (i = 0; i < BLKSIZE ; i++)
269 	/* blackman window */
270 	window[i] = 0.42-0.5*cos(2*PI*i/(BLKSIZE-1))+0.08*cos(4*PI*i/(BLKSIZE-1));
271     } else {
272       /*
273        * calculate HANN window coefficients
274        */
275       for (i = 0; i < BLKSIZE ; i++)
276 	window[i] = 0.5 * (1.0 - cos(2.0 * PI * (i + 0.5) / BLKSIZE));
277     }
278 #endif
279 
280     // The type of window used here will make no real difference, but
281     // in the interest of merging nspsytune stuff - switch to blackman window
282     for (i = 0; i < BLKSIZE ; i++)
283       /* blackman window */
284       window[i] = 0.42-0.5*cos(2*PI*(i+.5)/BLKSIZE)+
285 	0.08*cos(4*PI*(i+.5)/BLKSIZE);
286 
287     for (i = 0; i < BLKSIZE_s/2 ; i++)
288 	window_s[i] = 0.5 * (1.0 - cos(2.0 * PI * (i + 0.5) / BLKSIZE_s));
289 }
290