17dd7cddfSDavid du Colombier /* 27dd7cddfSDavid du Colombier * jidctflt.c 37dd7cddfSDavid du Colombier * 4*593dc095SDavid du Colombier * Copyright (C) 1994-1998, Thomas G. Lane. 57dd7cddfSDavid du Colombier * This file is part of the Independent JPEG Group's software. 67dd7cddfSDavid du Colombier * For conditions of distribution and use, see the accompanying README file. 77dd7cddfSDavid du Colombier * 87dd7cddfSDavid du Colombier * This file contains a floating-point implementation of the 97dd7cddfSDavid du Colombier * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine 107dd7cddfSDavid du Colombier * must also perform dequantization of the input coefficients. 117dd7cddfSDavid du Colombier * 127dd7cddfSDavid du Colombier * This implementation should be more accurate than either of the integer 137dd7cddfSDavid du Colombier * IDCT implementations. However, it may not give the same results on all 147dd7cddfSDavid du Colombier * machines because of differences in roundoff behavior. Speed will depend 157dd7cddfSDavid du Colombier * on the hardware's floating point capacity. 167dd7cddfSDavid du Colombier * 177dd7cddfSDavid du Colombier * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT 187dd7cddfSDavid du Colombier * on each row (or vice versa, but it's more convenient to emit a row at 197dd7cddfSDavid du Colombier * a time). Direct algorithms are also available, but they are much more 207dd7cddfSDavid du Colombier * complex and seem not to be any faster when reduced to code. 217dd7cddfSDavid du Colombier * 227dd7cddfSDavid du Colombier * This implementation is based on Arai, Agui, and Nakajima's algorithm for 237dd7cddfSDavid du Colombier * scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in 247dd7cddfSDavid du Colombier * Japanese, but the algorithm is described in the Pennebaker & Mitchell 257dd7cddfSDavid du Colombier * JPEG textbook (see REFERENCES section in file README). The following code 267dd7cddfSDavid du Colombier * is based directly on figure 4-8 in P&M. 277dd7cddfSDavid du Colombier * While an 8-point DCT cannot be done in less than 11 multiplies, it is 287dd7cddfSDavid du Colombier * possible to arrange the computation so that many of the multiplies are 297dd7cddfSDavid du Colombier * simple scalings of the final outputs. These multiplies can then be 307dd7cddfSDavid du Colombier * folded into the multiplications or divisions by the JPEG quantization 317dd7cddfSDavid du Colombier * table entries. The AA&N method leaves only 5 multiplies and 29 adds 327dd7cddfSDavid du Colombier * to be done in the DCT itself. 337dd7cddfSDavid du Colombier * The primary disadvantage of this method is that with a fixed-point 347dd7cddfSDavid du Colombier * implementation, accuracy is lost due to imprecise representation of the 357dd7cddfSDavid du Colombier * scaled quantization values. However, that problem does not arise if 367dd7cddfSDavid du Colombier * we use floating point arithmetic. 377dd7cddfSDavid du Colombier */ 387dd7cddfSDavid du Colombier 397dd7cddfSDavid du Colombier #define JPEG_INTERNALS 407dd7cddfSDavid du Colombier #include "jinclude.h" 417dd7cddfSDavid du Colombier #include "jpeglib.h" 427dd7cddfSDavid du Colombier #include "jdct.h" /* Private declarations for DCT subsystem */ 437dd7cddfSDavid du Colombier 447dd7cddfSDavid du Colombier #ifdef DCT_FLOAT_SUPPORTED 457dd7cddfSDavid du Colombier 467dd7cddfSDavid du Colombier 477dd7cddfSDavid du Colombier /* 487dd7cddfSDavid du Colombier * This module is specialized to the case DCTSIZE = 8. 497dd7cddfSDavid du Colombier */ 507dd7cddfSDavid du Colombier 517dd7cddfSDavid du Colombier #if DCTSIZE != 8 527dd7cddfSDavid du Colombier Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ 537dd7cddfSDavid du Colombier #endif 547dd7cddfSDavid du Colombier 557dd7cddfSDavid du Colombier 567dd7cddfSDavid du Colombier /* Dequantize a coefficient by multiplying it by the multiplier-table 577dd7cddfSDavid du Colombier * entry; produce a float result. 587dd7cddfSDavid du Colombier */ 597dd7cddfSDavid du Colombier 607dd7cddfSDavid du Colombier #define DEQUANTIZE(coef,quantval) (((FAST_FLOAT) (coef)) * (quantval)) 617dd7cddfSDavid du Colombier 627dd7cddfSDavid du Colombier 637dd7cddfSDavid du Colombier /* 647dd7cddfSDavid du Colombier * Perform dequantization and inverse DCT on one block of coefficients. 657dd7cddfSDavid du Colombier */ 667dd7cddfSDavid du Colombier 677dd7cddfSDavid du Colombier GLOBAL(void) 687dd7cddfSDavid du Colombier jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, 697dd7cddfSDavid du Colombier JCOEFPTR coef_block, 707dd7cddfSDavid du Colombier JSAMPARRAY output_buf, JDIMENSION output_col) 717dd7cddfSDavid du Colombier { 727dd7cddfSDavid du Colombier FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 737dd7cddfSDavid du Colombier FAST_FLOAT tmp10, tmp11, tmp12, tmp13; 747dd7cddfSDavid du Colombier FAST_FLOAT z5, z10, z11, z12, z13; 757dd7cddfSDavid du Colombier JCOEFPTR inptr; 767dd7cddfSDavid du Colombier FLOAT_MULT_TYPE * quantptr; 777dd7cddfSDavid du Colombier FAST_FLOAT * wsptr; 787dd7cddfSDavid du Colombier JSAMPROW outptr; 797dd7cddfSDavid du Colombier JSAMPLE *range_limit = IDCT_range_limit(cinfo); 807dd7cddfSDavid du Colombier int ctr; 817dd7cddfSDavid du Colombier FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */ 827dd7cddfSDavid du Colombier SHIFT_TEMPS 837dd7cddfSDavid du Colombier 847dd7cddfSDavid du Colombier /* Pass 1: process columns from input, store into work array. */ 857dd7cddfSDavid du Colombier 867dd7cddfSDavid du Colombier inptr = coef_block; 877dd7cddfSDavid du Colombier quantptr = (FLOAT_MULT_TYPE *) compptr->dct_table; 887dd7cddfSDavid du Colombier wsptr = workspace; 897dd7cddfSDavid du Colombier for (ctr = DCTSIZE; ctr > 0; ctr--) { 907dd7cddfSDavid du Colombier /* Due to quantization, we will usually find that many of the input 917dd7cddfSDavid du Colombier * coefficients are zero, especially the AC terms. We can exploit this 927dd7cddfSDavid du Colombier * by short-circuiting the IDCT calculation for any column in which all 937dd7cddfSDavid du Colombier * the AC terms are zero. In that case each output is equal to the 947dd7cddfSDavid du Colombier * DC coefficient (with scale factor as needed). 957dd7cddfSDavid du Colombier * With typical images and quantization tables, half or more of the 967dd7cddfSDavid du Colombier * column DCT calculations can be simplified this way. 977dd7cddfSDavid du Colombier */ 987dd7cddfSDavid du Colombier 99*593dc095SDavid du Colombier if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && 100*593dc095SDavid du Colombier inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && 101*593dc095SDavid du Colombier inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && 102*593dc095SDavid du Colombier inptr[DCTSIZE*7] == 0) { 1037dd7cddfSDavid du Colombier /* AC terms all zero */ 1047dd7cddfSDavid du Colombier FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 1057dd7cddfSDavid du Colombier 1067dd7cddfSDavid du Colombier wsptr[DCTSIZE*0] = dcval; 1077dd7cddfSDavid du Colombier wsptr[DCTSIZE*1] = dcval; 1087dd7cddfSDavid du Colombier wsptr[DCTSIZE*2] = dcval; 1097dd7cddfSDavid du Colombier wsptr[DCTSIZE*3] = dcval; 1107dd7cddfSDavid du Colombier wsptr[DCTSIZE*4] = dcval; 1117dd7cddfSDavid du Colombier wsptr[DCTSIZE*5] = dcval; 1127dd7cddfSDavid du Colombier wsptr[DCTSIZE*6] = dcval; 1137dd7cddfSDavid du Colombier wsptr[DCTSIZE*7] = dcval; 1147dd7cddfSDavid du Colombier 1157dd7cddfSDavid du Colombier inptr++; /* advance pointers to next column */ 1167dd7cddfSDavid du Colombier quantptr++; 1177dd7cddfSDavid du Colombier wsptr++; 1187dd7cddfSDavid du Colombier continue; 1197dd7cddfSDavid du Colombier } 1207dd7cddfSDavid du Colombier 1217dd7cddfSDavid du Colombier /* Even part */ 1227dd7cddfSDavid du Colombier 1237dd7cddfSDavid du Colombier tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 1247dd7cddfSDavid du Colombier tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 1257dd7cddfSDavid du Colombier tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 1267dd7cddfSDavid du Colombier tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 1277dd7cddfSDavid du Colombier 1287dd7cddfSDavid du Colombier tmp10 = tmp0 + tmp2; /* phase 3 */ 1297dd7cddfSDavid du Colombier tmp11 = tmp0 - tmp2; 1307dd7cddfSDavid du Colombier 1317dd7cddfSDavid du Colombier tmp13 = tmp1 + tmp3; /* phases 5-3 */ 1327dd7cddfSDavid du Colombier tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */ 1337dd7cddfSDavid du Colombier 1347dd7cddfSDavid du Colombier tmp0 = tmp10 + tmp13; /* phase 2 */ 1357dd7cddfSDavid du Colombier tmp3 = tmp10 - tmp13; 1367dd7cddfSDavid du Colombier tmp1 = tmp11 + tmp12; 1377dd7cddfSDavid du Colombier tmp2 = tmp11 - tmp12; 1387dd7cddfSDavid du Colombier 1397dd7cddfSDavid du Colombier /* Odd part */ 1407dd7cddfSDavid du Colombier 1417dd7cddfSDavid du Colombier tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 1427dd7cddfSDavid du Colombier tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 1437dd7cddfSDavid du Colombier tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 1447dd7cddfSDavid du Colombier tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 1457dd7cddfSDavid du Colombier 1467dd7cddfSDavid du Colombier z13 = tmp6 + tmp5; /* phase 6 */ 1477dd7cddfSDavid du Colombier z10 = tmp6 - tmp5; 1487dd7cddfSDavid du Colombier z11 = tmp4 + tmp7; 1497dd7cddfSDavid du Colombier z12 = tmp4 - tmp7; 1507dd7cddfSDavid du Colombier 1517dd7cddfSDavid du Colombier tmp7 = z11 + z13; /* phase 5 */ 1527dd7cddfSDavid du Colombier tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */ 1537dd7cddfSDavid du Colombier 1547dd7cddfSDavid du Colombier z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */ 1557dd7cddfSDavid du Colombier tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */ 1567dd7cddfSDavid du Colombier tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */ 1577dd7cddfSDavid du Colombier 1587dd7cddfSDavid du Colombier tmp6 = tmp12 - tmp7; /* phase 2 */ 1597dd7cddfSDavid du Colombier tmp5 = tmp11 - tmp6; 1607dd7cddfSDavid du Colombier tmp4 = tmp10 + tmp5; 1617dd7cddfSDavid du Colombier 1627dd7cddfSDavid du Colombier wsptr[DCTSIZE*0] = tmp0 + tmp7; 1637dd7cddfSDavid du Colombier wsptr[DCTSIZE*7] = tmp0 - tmp7; 1647dd7cddfSDavid du Colombier wsptr[DCTSIZE*1] = tmp1 + tmp6; 1657dd7cddfSDavid du Colombier wsptr[DCTSIZE*6] = tmp1 - tmp6; 1667dd7cddfSDavid du Colombier wsptr[DCTSIZE*2] = tmp2 + tmp5; 1677dd7cddfSDavid du Colombier wsptr[DCTSIZE*5] = tmp2 - tmp5; 1687dd7cddfSDavid du Colombier wsptr[DCTSIZE*4] = tmp3 + tmp4; 1697dd7cddfSDavid du Colombier wsptr[DCTSIZE*3] = tmp3 - tmp4; 1707dd7cddfSDavid du Colombier 1717dd7cddfSDavid du Colombier inptr++; /* advance pointers to next column */ 1727dd7cddfSDavid du Colombier quantptr++; 1737dd7cddfSDavid du Colombier wsptr++; 1747dd7cddfSDavid du Colombier } 1757dd7cddfSDavid du Colombier 1767dd7cddfSDavid du Colombier /* Pass 2: process rows from work array, store into output array. */ 1777dd7cddfSDavid du Colombier /* Note that we must descale the results by a factor of 8 == 2**3. */ 1787dd7cddfSDavid du Colombier 1797dd7cddfSDavid du Colombier wsptr = workspace; 1807dd7cddfSDavid du Colombier for (ctr = 0; ctr < DCTSIZE; ctr++) { 1817dd7cddfSDavid du Colombier outptr = output_buf[ctr] + output_col; 1827dd7cddfSDavid du Colombier /* Rows of zeroes can be exploited in the same way as we did with columns. 1837dd7cddfSDavid du Colombier * However, the column calculation has created many nonzero AC terms, so 1847dd7cddfSDavid du Colombier * the simplification applies less often (typically 5% to 10% of the time). 1857dd7cddfSDavid du Colombier * And testing floats for zero is relatively expensive, so we don't bother. 1867dd7cddfSDavid du Colombier */ 1877dd7cddfSDavid du Colombier 1887dd7cddfSDavid du Colombier /* Even part */ 1897dd7cddfSDavid du Colombier 1907dd7cddfSDavid du Colombier tmp10 = wsptr[0] + wsptr[4]; 1917dd7cddfSDavid du Colombier tmp11 = wsptr[0] - wsptr[4]; 1927dd7cddfSDavid du Colombier 1937dd7cddfSDavid du Colombier tmp13 = wsptr[2] + wsptr[6]; 1947dd7cddfSDavid du Colombier tmp12 = (wsptr[2] - wsptr[6]) * ((FAST_FLOAT) 1.414213562) - tmp13; 1957dd7cddfSDavid du Colombier 1967dd7cddfSDavid du Colombier tmp0 = tmp10 + tmp13; 1977dd7cddfSDavid du Colombier tmp3 = tmp10 - tmp13; 1987dd7cddfSDavid du Colombier tmp1 = tmp11 + tmp12; 1997dd7cddfSDavid du Colombier tmp2 = tmp11 - tmp12; 2007dd7cddfSDavid du Colombier 2017dd7cddfSDavid du Colombier /* Odd part */ 2027dd7cddfSDavid du Colombier 2037dd7cddfSDavid du Colombier z13 = wsptr[5] + wsptr[3]; 2047dd7cddfSDavid du Colombier z10 = wsptr[5] - wsptr[3]; 2057dd7cddfSDavid du Colombier z11 = wsptr[1] + wsptr[7]; 2067dd7cddfSDavid du Colombier z12 = wsptr[1] - wsptr[7]; 2077dd7cddfSDavid du Colombier 2087dd7cddfSDavid du Colombier tmp7 = z11 + z13; 2097dd7cddfSDavid du Colombier tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); 2107dd7cddfSDavid du Colombier 2117dd7cddfSDavid du Colombier z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */ 2127dd7cddfSDavid du Colombier tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */ 2137dd7cddfSDavid du Colombier tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */ 2147dd7cddfSDavid du Colombier 2157dd7cddfSDavid du Colombier tmp6 = tmp12 - tmp7; 2167dd7cddfSDavid du Colombier tmp5 = tmp11 - tmp6; 2177dd7cddfSDavid du Colombier tmp4 = tmp10 + tmp5; 2187dd7cddfSDavid du Colombier 2197dd7cddfSDavid du Colombier /* Final output stage: scale down by a factor of 8 and range-limit */ 2207dd7cddfSDavid du Colombier 2217dd7cddfSDavid du Colombier outptr[0] = range_limit[(int) DESCALE((INT32) (tmp0 + tmp7), 3) 2227dd7cddfSDavid du Colombier & RANGE_MASK]; 2237dd7cddfSDavid du Colombier outptr[7] = range_limit[(int) DESCALE((INT32) (tmp0 - tmp7), 3) 2247dd7cddfSDavid du Colombier & RANGE_MASK]; 2257dd7cddfSDavid du Colombier outptr[1] = range_limit[(int) DESCALE((INT32) (tmp1 + tmp6), 3) 2267dd7cddfSDavid du Colombier & RANGE_MASK]; 2277dd7cddfSDavid du Colombier outptr[6] = range_limit[(int) DESCALE((INT32) (tmp1 - tmp6), 3) 2287dd7cddfSDavid du Colombier & RANGE_MASK]; 2297dd7cddfSDavid du Colombier outptr[2] = range_limit[(int) DESCALE((INT32) (tmp2 + tmp5), 3) 2307dd7cddfSDavid du Colombier & RANGE_MASK]; 2317dd7cddfSDavid du Colombier outptr[5] = range_limit[(int) DESCALE((INT32) (tmp2 - tmp5), 3) 2327dd7cddfSDavid du Colombier & RANGE_MASK]; 2337dd7cddfSDavid du Colombier outptr[4] = range_limit[(int) DESCALE((INT32) (tmp3 + tmp4), 3) 2347dd7cddfSDavid du Colombier & RANGE_MASK]; 2357dd7cddfSDavid du Colombier outptr[3] = range_limit[(int) DESCALE((INT32) (tmp3 - tmp4), 3) 2367dd7cddfSDavid du Colombier & RANGE_MASK]; 2377dd7cddfSDavid du Colombier 2387dd7cddfSDavid du Colombier wsptr += DCTSIZE; /* advance pointer to next row */ 2397dd7cddfSDavid du Colombier } 2407dd7cddfSDavid du Colombier } 2417dd7cddfSDavid du Colombier 2427dd7cddfSDavid du Colombier #endif /* DCT_FLOAT_SUPPORTED */ 243