xref: /plan9/sys/src/games/mp3dec/fixed.h (revision f30ccc91ab9e7f92bd5dd82b1eebdeb503fd3465)
1 /*
2  * libmad - MPEG audio decoder library
3  * Copyright (C) 2000-2004 Underbit Technologies, Inc.
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  *
19  * $Id: fixed.h,v 1.38 2004/02/17 02:02:03 rob Exp $
20  */
21 
22 # ifndef LIBMAD_FIXED_H
23 # define LIBMAD_FIXED_H
24 
25 typedef int mad_fixed_t;	// must be 32 bits
26 typedef int mad_fixed64hi_t;	// must be 32 bits
27 typedef u32int mad_fixed64lo_t;	// must be 32 bits
28 typedef vlong mad_fixed64_t;
29 
30 # if defined(FPM_FLOAT)
31 typedef double mad_sample_t;
32 # else
33 typedef mad_fixed_t mad_sample_t;
34 # endif
35 
36 /*
37  * Fixed-point format: 0xABBBBBBB
38  * A == whole part      (sign + 3 bits)
39  * B == fractional part (28 bits)
40  *
41  * Values are signed two's complement, so the effective range is:
42  * 0x80000000 to 0x7fffffff
43  *       -8.0 to +7.9999999962747097015380859375
44  *
45  * The smallest representable value is:
46  * 0x00000001 == 0.0000000037252902984619140625 (i.e. about 3.725e-9)
47  *
48  * 28 bits of fractional accuracy represent about
49  * 8.6 digits of decimal accuracy.
50  *
51  * Fixed-point numbers can be added or subtracted as normal
52  * integers, but multiplication requires shifting the 64-bit result
53  * from 56 fractional bits back to 28 (and rounding.)
54  *
55  * Changing the definition of MAD_F_FRACBITS is only partially
56  * supported, and must be done with care.
57  */
58 
59 # define MAD_F_FRACBITS		28
60 
61 #  define MAD_F(x)		((mad_fixed_t) (x))
62 
63 # define MAD_F_MIN		((mad_fixed_t) -0x80000000L)
64 # define MAD_F_MAX		((mad_fixed_t) +0x7fffffffL)
65 
66 # define MAD_F_ONE		MAD_F(0x10000000)
67 
68 # define mad_f_tofixed(x)	((mad_fixed_t)  \
69 				 ((x) * (double) (1L << MAD_F_FRACBITS) + 0.5))
70 # define mad_f_todouble(x)	((double)  \
71 				 ((x) / (double) (1L << MAD_F_FRACBITS)))
72 
73 # define mad_f_intpart(x)	((x) >> MAD_F_FRACBITS)
74 # define mad_f_fracpart(x)	((x) & ((1L << MAD_F_FRACBITS) - 1))
75 				/* (x should be positive) */
76 
77 # define mad_f_fromint(x)	((x) << MAD_F_FRACBITS)
78 
79 # define mad_f_add(x, y)	((x) + (y))
80 # define mad_f_sub(x, y)	((x) - (y))
81 
82 # if defined(FPM_FLOAT)
83 #  error "FPM_FLOAT not yet supported"
84 
85 #  undef MAD_F
86 #  define MAD_F(x)		mad_f_todouble(x)
87 
88 #  define mad_f_mul(x, y)	((x) * (y))
89 #  define mad_f_scale64
90 
91 #  undef ASO_ZEROCHECK
92 
93 # elif defined(FPM_64BIT)
94 
95 /*
96  * This version should be the most accurate if 64-bit types are supported by
97  * the compiler, although it may not be the most efficient.
98  */
99 #  if defined(OPT_ACCURACY)
100 #   define mad_f_mul(x, y)  \
101     ((mad_fixed_t)  \
102      ((((mad_fixed64_t) (x) * (y)) +  \
103        (1L << (MAD_F_SCALEBITS - 1))) >> MAD_F_SCALEBITS))
104 #  else
105 #   define mad_f_mul(x, y)  \
106     ((mad_fixed_t) (((mad_fixed64_t) (x) * (y)) >> MAD_F_SCALEBITS))
107 #  endif
108 
109 #  define MAD_F_SCALEBITS  MAD_F_FRACBITS
110 
111 /* --- Intel --------------------------------------------------------------- */
112 
113 # elif defined(FPM_INTEL)
114 
115 #  if defined(_MSC_VER)
116 #   pragma warning(push)
117 #   pragma warning(disable: 4035)  /* no return value */
118 static __forceinline
mad_f_mul_inline(mad_fixed_t x,mad_fixed_t y)119 mad_fixed_t mad_f_mul_inline(mad_fixed_t x, mad_fixed_t y)
120 {
121   enum {
122     fracbits = MAD_F_FRACBITS
123   };
124 
125   __asm {
126     mov eax, x
127     imul y
128     shrd eax, edx, fracbits
129   }
130 
131   /* implicit return of eax */
132 }
133 #   pragma warning(pop)
134 
135 #   define mad_f_mul		mad_f_mul_inline
136 #   define mad_f_scale64
137 #  else
138 /*
139  * This Intel version is fast and accurate; the disposition of the least
140  * significant bit depends on OPT_ACCURACY via mad_f_scale64().
141  */
142 #   define MAD_F_MLX(hi, lo, x, y)  \
143     asm ("imull %3"  \
144 	 : "=a" (lo), "=d" (hi)  \
145 	 : "%a" (x), "rm" (y)  \
146 	 : "cc")
147 
148 #   if defined(OPT_ACCURACY)
149 /*
150  * This gives best accuracy but is not very fast.
151  */
152 #    define MAD_F_MLA(hi, lo, x, y)  \
153     ({ mad_fixed64hi_t __hi;  \
154        mad_fixed64lo_t __lo;  \
155        MAD_F_MLX(__hi, __lo, (x), (y));  \
156        asm ("addl %2,%0\n\t"  \
157 	    "adcl %3,%1"  \
158 	    : "=rm" (lo), "=rm" (hi)  \
159 	    : "r" (__lo), "r" (__hi), "0" (lo), "1" (hi)  \
160 	    : "cc");  \
161     })
162 #   endif  /* OPT_ACCURACY */
163 
164 #   if defined(OPT_ACCURACY)
165 /*
166  * Surprisingly, this is faster than SHRD followed by ADC.
167  */
168 #    define mad_f_scale64(hi, lo)  \
169     ({ mad_fixed64hi_t __hi_;  \
170        mad_fixed64lo_t __lo_;  \
171        mad_fixed_t __result;  \
172        asm ("addl %4,%2\n\t"  \
173 	    "adcl %5,%3"  \
174 	    : "=rm" (__lo_), "=rm" (__hi_)  \
175 	    : "0" (lo), "1" (hi),  \
176 	      "ir" (1L << (MAD_F_SCALEBITS - 1)), "ir" (0)  \
177 	    : "cc");  \
178        asm ("shrdl %3,%2,%1"  \
179 	    : "=rm" (__result)  \
180 	    : "0" (__lo_), "r" (__hi_), "I" (MAD_F_SCALEBITS)  \
181 	    : "cc");  \
182        __result;  \
183     })
184 #   elif defined(OPT_INTEL)
185 /*
186  * Alternate Intel scaling that may or may not perform better.
187  */
188 #    define mad_f_scale64(hi, lo)  \
189     ({ mad_fixed_t __result;  \
190        asm ("shrl %3,%1\n\t"  \
191 	    "shll %4,%2\n\t"  \
192 	    "orl %2,%1"  \
193 	    : "=rm" (__result)  \
194 	    : "0" (lo), "r" (hi),  \
195 	      "I" (MAD_F_SCALEBITS), "I" (32 - MAD_F_SCALEBITS)  \
196 	    : "cc");  \
197        __result;  \
198     })
199 #   else
200 #    define mad_f_scale64(hi, lo)  \
201     ({ mad_fixed_t __result;  \
202        asm ("shrdl %3,%2,%1"  \
203 	    : "=rm" (__result)  \
204 	    : "0" (lo), "r" (hi), "I" (MAD_F_SCALEBITS)  \
205 	    : "cc");  \
206        __result;  \
207     })
208 #   endif  /* OPT_ACCURACY */
209 
210 #   define MAD_F_SCALEBITS  MAD_F_FRACBITS
211 #  endif
212 
213 /* --- ARM ----------------------------------------------------------------- */
214 
215 # elif defined(FPM_ARM)
216 
217 /*
218  * This ARM V4 version is as accurate as FPM_64BIT but much faster. The
219  * least significant bit is properly rounded at no CPU cycle cost!
220  */
221 # if 1
222 /*
223  * This is faster than the default implementation via MAD_F_MLX() and
224  * mad_f_scale64().
225  */
226 #  define mad_f_mul(x, y)  \
227     ({ mad_fixed64hi_t __hi;  \
228        mad_fixed64lo_t __lo;  \
229        mad_fixed_t __result;  \
230        asm ("smull	%0, %1, %3, %4\n\t"  \
231 	    "movs	%0, %0, lsr %5\n\t"  \
232 	    "adc	%2, %0, %1, lsl %6"  \
233 	    : "=&r" (__lo), "=&r" (__hi), "=r" (__result)  \
234 	    : "%r" (x), "r" (y),  \
235 	      "M" (MAD_F_SCALEBITS), "M" (32 - MAD_F_SCALEBITS)  \
236 	    : "cc");  \
237        __result;  \
238     })
239 # endif
240 
241 #  define MAD_F_MLX(hi, lo, x, y)  \
242     asm ("smull	%0, %1, %2, %3"  \
243 	 : "=&r" (lo), "=&r" (hi)  \
244 	 : "%r" (x), "r" (y))
245 
246 #  define MAD_F_MLA(hi, lo, x, y)  \
247     asm ("smlal	%0, %1, %2, %3"  \
248 	 : "+r" (lo), "+r" (hi)  \
249 	 : "%r" (x), "r" (y))
250 
251 #  define MAD_F_MLN(hi, lo)  \
252     asm ("rsbs	%0, %2, #0\n\t"  \
253 	 "rsc	%1, %3, #0"  \
254 	 : "=r" (lo), "=r" (hi)  \
255 	 : "0" (lo), "1" (hi)  \
256 	 : "cc")
257 
258 #  define mad_f_scale64(hi, lo)  \
259     ({ mad_fixed_t __result;  \
260        asm ("movs	%0, %1, lsr %3\n\t"  \
261 	    "adc	%0, %0, %2, lsl %4"  \
262 	    : "=&r" (__result)  \
263 	    : "r" (lo), "r" (hi),  \
264 	      "M" (MAD_F_SCALEBITS), "M" (32 - MAD_F_SCALEBITS)  \
265 	    : "cc");  \
266        __result;  \
267     })
268 
269 #  define MAD_F_SCALEBITS  MAD_F_FRACBITS
270 
271 /* --- MIPS ---------------------------------------------------------------- */
272 
273 # elif defined(FPM_MIPS)
274 
275 /*
276  * This MIPS version is fast and accurate; the disposition of the least
277  * significant bit depends on OPT_ACCURACY via mad_f_scale64().
278  */
279 #  define MAD_F_MLX(hi, lo, x, y)  \
280     asm ("mult	%2,%3"  \
281 	 : "=l" (lo), "=h" (hi)  \
282 	 : "%r" (x), "r" (y))
283 
284 # if defined(HAVE_MADD_ASM)
285 #  define MAD_F_MLA(hi, lo, x, y)  \
286     asm ("madd	%2,%3"  \
287 	 : "+l" (lo), "+h" (hi)  \
288 	 : "%r" (x), "r" (y))
289 # elif defined(HAVE_MADD16_ASM)
290 /*
291  * This loses significant accuracy due to the 16-bit integer limit in the
292  * multiply/accumulate instruction.
293  */
294 #  define MAD_F_ML0(hi, lo, x, y)  \
295     asm ("mult	%2,%3"  \
296 	 : "=l" (lo), "=h" (hi)  \
297 	 : "%r" ((x) >> 12), "r" ((y) >> 16))
298 #  define MAD_F_MLA(hi, lo, x, y)  \
299     asm ("madd16	%2,%3"  \
300 	 : "+l" (lo), "+h" (hi)  \
301 	 : "%r" ((x) >> 12), "r" ((y) >> 16))
302 #  define MAD_F_MLZ(hi, lo)  ((mad_fixed_t) (lo))
303 # endif
304 
305 # if defined(OPT_SPEED)
306 #  define mad_f_scale64(hi, lo)  \
307     ((mad_fixed_t) ((hi) << (32 - MAD_F_SCALEBITS)))
308 #  define MAD_F_SCALEBITS  MAD_F_FRACBITS
309 # endif
310 
311 /* --- SPARC --------------------------------------------------------------- */
312 
313 # elif defined(FPM_SPARC)
314 
315 /*
316  * This SPARC V8 version is fast and accurate; the disposition of the least
317  * significant bit depends on OPT_ACCURACY via mad_f_scale64().
318  */
319 #  define MAD_F_MLX(hi, lo, x, y)  \
320     asm ("smul %2, %3, %0\n\t"  \
321 	 "rd %%y, %1"  \
322 	 : "=r" (lo), "=r" (hi)  \
323 	 : "%r" (x), "rI" (y))
324 
325 /* --- PowerPC ------------------------------------------------------------- */
326 
327 # elif defined(FPM_PPC)
328 
329 /*
330  * This PowerPC version is fast and accurate; the disposition of the least
331  * significant bit depends on OPT_ACCURACY via mad_f_scale64().
332  */
333 #  define MAD_F_MLX(hi, lo, x, y)  \
334     do {  \
335       asm ("mullw %0,%1,%2"  \
336 	   : "=r" (lo)  \
337 	   : "%r" (x), "r" (y));  \
338       asm ("mulhw %0,%1,%2"  \
339 	   : "=r" (hi)  \
340 	   : "%r" (x), "r" (y));  \
341     }  \
342     while (0)
343 
344 #  if defined(OPT_ACCURACY)
345 /*
346  * This gives best accuracy but is not very fast.
347  */
348 #   define MAD_F_MLA(hi, lo, x, y)  \
349     ({ mad_fixed64hi_t __hi;  \
350        mad_fixed64lo_t __lo;  \
351        MAD_F_MLX(__hi, __lo, (x), (y));  \
352        asm ("addc %0,%2,%3\n\t"  \
353 	    "adde %1,%4,%5"  \
354 	    : "=r" (lo), "=r" (hi)  \
355 	    : "%r" (lo), "r" (__lo),  \
356 	      "%r" (hi), "r" (__hi)  \
357 	    : "xer");  \
358     })
359 #  endif
360 
361 #  if defined(OPT_ACCURACY)
362 /*
363  * This is slower than the truncating version below it.
364  */
365 #   define mad_f_scale64(hi, lo)  \
366     ({ mad_fixed_t __result, __round;  \
367        asm ("rotrwi %0,%1,%2"  \
368 	    : "=r" (__result)  \
369 	    : "r" (lo), "i" (MAD_F_SCALEBITS));  \
370        asm ("extrwi %0,%1,1,0"  \
371 	    : "=r" (__round)  \
372 	    : "r" (__result));  \
373        asm ("insrwi %0,%1,%2,0"  \
374 	    : "+r" (__result)  \
375 	    : "r" (hi), "i" (MAD_F_SCALEBITS));  \
376        asm ("add %0,%1,%2"  \
377 	    : "=r" (__result)  \
378 	    : "%r" (__result), "r" (__round));  \
379        __result;  \
380     })
381 #  else
382 #   define mad_f_scale64(hi, lo)  \
383     ({ mad_fixed_t __result;  \
384        asm ("rotrwi %0,%1,%2"  \
385 	    : "=r" (__result)  \
386 	    : "r" (lo), "i" (MAD_F_SCALEBITS));  \
387        asm ("insrwi %0,%1,%2,0"  \
388 	    : "+r" (__result)  \
389 	    : "r" (hi), "i" (MAD_F_SCALEBITS));  \
390        __result;  \
391     })
392 #  endif
393 
394 #  define MAD_F_SCALEBITS  MAD_F_FRACBITS
395 
396 /* --- Default ------------------------------------------------------------- */
397 
398 # elif defined(FPM_DEFAULT)
399 
400 /*
401  * This version is the most portable but it loses significant accuracy.
402  * Furthermore, accuracy is biased against the second argument, so care
403  * should be taken when ordering operands.
404  *
405  * The scale factors are constant as this is not used with SSO.
406  *
407  * Pre-rounding is required to stay within the limits of compliance.
408  */
409 #  if defined(OPT_SPEED)
410 #   define mad_f_mul(x, y)	(((x) >> 12) * ((y) >> 16))
411 #  else
412 #   define mad_f_mul(x, y)	((((x) + (1L << 11)) >> 12) *  \
413 				 (((y) + (1L << 15)) >> 16))
414 #  endif
415 
416 /* ------------------------------------------------------------------------- */
417 
418 # else
419 #  error "no FPM selected"
420 # endif
421 
422 /* default implementations */
423 
424 # if !defined(mad_f_mul)
425 #  define mad_f_mul(x, y)  \
426     ({ register mad_fixed64hi_t __hi;  \
427        register mad_fixed64lo_t __lo;  \
428        MAD_F_MLX(__hi, __lo, (x), (y));  \
429        mad_f_scale64(__hi, __lo);  \
430     })
431 # endif
432 
433 # if !defined(MAD_F_MLA)
434 #  define MAD_F_ML0(hi, lo, x, y)	((lo)  = mad_f_mul((x), (y)))
435 #  define MAD_F_MLA(hi, lo, x, y)	((lo) += mad_f_mul((x), (y)))
436 #  define MAD_F_MLN(hi, lo)		((lo)  = -(lo))
437 #  define MAD_F_MLZ(hi, lo)		((void) (hi), (mad_fixed_t) (lo))
438 # endif
439 
440 # if !defined(MAD_F_ML0)
441 #  define MAD_F_ML0(hi, lo, x, y)	MAD_F_MLX((hi), (lo), (x), (y))
442 # endif
443 
444 # if !defined(MAD_F_MLN)
445 #  define MAD_F_MLN(hi, lo)		((hi) = ((lo) = -(lo)) ? ~(hi) : -(hi))
446 # endif
447 
448 # if !defined(MAD_F_MLZ)
449 #  define MAD_F_MLZ(hi, lo)		mad_f_scale64((hi), (lo))
450 # endif
451 
452 # if !defined(mad_f_scale64)
453 #  if defined(OPT_ACCURACY)
454 #   define mad_f_scale64(hi, lo)  \
455     ((((mad_fixed_t)  \
456        (((hi) << (32 - (MAD_F_SCALEBITS - 1))) |  \
457 	((lo) >> (MAD_F_SCALEBITS - 1)))) + 1) >> 1)
458 #  else
459 #   define mad_f_scale64(hi, lo)  \
460     ((mad_fixed_t)  \
461      (((hi) << (32 - MAD_F_SCALEBITS)) |  \
462       ((lo) >> MAD_F_SCALEBITS)))
463 #  endif
464 #  define MAD_F_SCALEBITS  MAD_F_FRACBITS
465 # endif
466 
467 /* C routines */
468 
469 mad_fixed_t mad_f_abs(mad_fixed_t);
470 mad_fixed_t mad_f_div(mad_fixed_t, mad_fixed_t);
471 
472 # endif
473