1*7dd7cddfSDavid du Colombier /* 2*7dd7cddfSDavid du Colombier * jfdctflt.c 3*7dd7cddfSDavid du Colombier * 4*7dd7cddfSDavid du Colombier * Copyright (C) 1994-1996, Thomas G. Lane. 5*7dd7cddfSDavid du Colombier * This file is part of the Independent JPEG Group's software. 6*7dd7cddfSDavid du Colombier * For conditions of distribution and use, see the accompanying README file. 7*7dd7cddfSDavid du Colombier * 8*7dd7cddfSDavid du Colombier * This file contains a floating-point implementation of the 9*7dd7cddfSDavid du Colombier * forward DCT (Discrete Cosine Transform). 10*7dd7cddfSDavid du Colombier * 11*7dd7cddfSDavid du Colombier * This implementation should be more accurate than either of the integer 12*7dd7cddfSDavid du Colombier * DCT implementations. However, it may not give the same results on all 13*7dd7cddfSDavid du Colombier * machines because of differences in roundoff behavior. Speed will depend 14*7dd7cddfSDavid du Colombier * on the hardware's floating point capacity. 15*7dd7cddfSDavid du Colombier * 16*7dd7cddfSDavid du Colombier * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT 17*7dd7cddfSDavid du Colombier * on each column. Direct algorithms are also available, but they are 18*7dd7cddfSDavid du Colombier * much more complex and seem not to be any faster when reduced to code. 19*7dd7cddfSDavid du Colombier * 20*7dd7cddfSDavid du Colombier * This implementation is based on Arai, Agui, and Nakajima's algorithm for 21*7dd7cddfSDavid du Colombier * scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in 22*7dd7cddfSDavid du Colombier * Japanese, but the algorithm is described in the Pennebaker & Mitchell 23*7dd7cddfSDavid du Colombier * JPEG textbook (see REFERENCES section in file README). The following code 24*7dd7cddfSDavid du Colombier * is based directly on figure 4-8 in P&M. 25*7dd7cddfSDavid du Colombier * While an 8-point DCT cannot be done in less than 11 multiplies, it is 26*7dd7cddfSDavid du Colombier * possible to arrange the computation so that many of the multiplies are 27*7dd7cddfSDavid du Colombier * simple scalings of the final outputs. These multiplies can then be 28*7dd7cddfSDavid du Colombier * folded into the multiplications or divisions by the JPEG quantization 29*7dd7cddfSDavid du Colombier * table entries. The AA&N method leaves only 5 multiplies and 29 adds 30*7dd7cddfSDavid du Colombier * to be done in the DCT itself. 31*7dd7cddfSDavid du Colombier * The primary disadvantage of this method is that with a fixed-point 32*7dd7cddfSDavid du Colombier * implementation, accuracy is lost due to imprecise representation of the 33*7dd7cddfSDavid du Colombier * scaled quantization values. However, that problem does not arise if 34*7dd7cddfSDavid du Colombier * we use floating point arithmetic. 35*7dd7cddfSDavid du Colombier */ 36*7dd7cddfSDavid du Colombier 37*7dd7cddfSDavid du Colombier #define JPEG_INTERNALS 38*7dd7cddfSDavid du Colombier #include "jinclude.h" 39*7dd7cddfSDavid du Colombier #include "jpeglib.h" 40*7dd7cddfSDavid du Colombier #include "jdct.h" /* Private declarations for DCT subsystem */ 41*7dd7cddfSDavid du Colombier 42*7dd7cddfSDavid du Colombier #ifdef DCT_FLOAT_SUPPORTED 43*7dd7cddfSDavid du Colombier 44*7dd7cddfSDavid du Colombier 45*7dd7cddfSDavid du Colombier /* 46*7dd7cddfSDavid du Colombier * This module is specialized to the case DCTSIZE = 8. 47*7dd7cddfSDavid du Colombier */ 48*7dd7cddfSDavid du Colombier 49*7dd7cddfSDavid du Colombier #if DCTSIZE != 8 50*7dd7cddfSDavid du Colombier Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ 51*7dd7cddfSDavid du Colombier #endif 52*7dd7cddfSDavid du Colombier 53*7dd7cddfSDavid du Colombier 54*7dd7cddfSDavid du Colombier /* 55*7dd7cddfSDavid du Colombier * Perform the forward DCT on one block of samples. 56*7dd7cddfSDavid du Colombier */ 57*7dd7cddfSDavid du Colombier 58*7dd7cddfSDavid du Colombier GLOBAL(void) 59*7dd7cddfSDavid du Colombier jpeg_fdct_float (FAST_FLOAT * data) 60*7dd7cddfSDavid du Colombier { 61*7dd7cddfSDavid du Colombier FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 62*7dd7cddfSDavid du Colombier FAST_FLOAT tmp10, tmp11, tmp12, tmp13; 63*7dd7cddfSDavid du Colombier FAST_FLOAT z1, z2, z3, z4, z5, z11, z13; 64*7dd7cddfSDavid du Colombier FAST_FLOAT *dataptr; 65*7dd7cddfSDavid du Colombier int ctr; 66*7dd7cddfSDavid du Colombier 67*7dd7cddfSDavid du Colombier /* Pass 1: process rows. */ 68*7dd7cddfSDavid du Colombier 69*7dd7cddfSDavid du Colombier dataptr = data; 70*7dd7cddfSDavid du Colombier for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 71*7dd7cddfSDavid du Colombier tmp0 = dataptr[0] + dataptr[7]; 72*7dd7cddfSDavid du Colombier tmp7 = dataptr[0] - dataptr[7]; 73*7dd7cddfSDavid du Colombier tmp1 = dataptr[1] + dataptr[6]; 74*7dd7cddfSDavid du Colombier tmp6 = dataptr[1] - dataptr[6]; 75*7dd7cddfSDavid du Colombier tmp2 = dataptr[2] + dataptr[5]; 76*7dd7cddfSDavid du Colombier tmp5 = dataptr[2] - dataptr[5]; 77*7dd7cddfSDavid du Colombier tmp3 = dataptr[3] + dataptr[4]; 78*7dd7cddfSDavid du Colombier tmp4 = dataptr[3] - dataptr[4]; 79*7dd7cddfSDavid du Colombier 80*7dd7cddfSDavid du Colombier /* Even part */ 81*7dd7cddfSDavid du Colombier 82*7dd7cddfSDavid du Colombier tmp10 = tmp0 + tmp3; /* phase 2 */ 83*7dd7cddfSDavid du Colombier tmp13 = tmp0 - tmp3; 84*7dd7cddfSDavid du Colombier tmp11 = tmp1 + tmp2; 85*7dd7cddfSDavid du Colombier tmp12 = tmp1 - tmp2; 86*7dd7cddfSDavid du Colombier 87*7dd7cddfSDavid du Colombier dataptr[0] = tmp10 + tmp11; /* phase 3 */ 88*7dd7cddfSDavid du Colombier dataptr[4] = tmp10 - tmp11; 89*7dd7cddfSDavid du Colombier 90*7dd7cddfSDavid du Colombier z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */ 91*7dd7cddfSDavid du Colombier dataptr[2] = tmp13 + z1; /* phase 5 */ 92*7dd7cddfSDavid du Colombier dataptr[6] = tmp13 - z1; 93*7dd7cddfSDavid du Colombier 94*7dd7cddfSDavid du Colombier /* Odd part */ 95*7dd7cddfSDavid du Colombier 96*7dd7cddfSDavid du Colombier tmp10 = tmp4 + tmp5; /* phase 2 */ 97*7dd7cddfSDavid du Colombier tmp11 = tmp5 + tmp6; 98*7dd7cddfSDavid du Colombier tmp12 = tmp6 + tmp7; 99*7dd7cddfSDavid du Colombier 100*7dd7cddfSDavid du Colombier /* The rotator is modified from fig 4-8 to avoid extra negations. */ 101*7dd7cddfSDavid du Colombier z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */ 102*7dd7cddfSDavid du Colombier z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */ 103*7dd7cddfSDavid du Colombier z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */ 104*7dd7cddfSDavid du Colombier z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */ 105*7dd7cddfSDavid du Colombier 106*7dd7cddfSDavid du Colombier z11 = tmp7 + z3; /* phase 5 */ 107*7dd7cddfSDavid du Colombier z13 = tmp7 - z3; 108*7dd7cddfSDavid du Colombier 109*7dd7cddfSDavid du Colombier dataptr[5] = z13 + z2; /* phase 6 */ 110*7dd7cddfSDavid du Colombier dataptr[3] = z13 - z2; 111*7dd7cddfSDavid du Colombier dataptr[1] = z11 + z4; 112*7dd7cddfSDavid du Colombier dataptr[7] = z11 - z4; 113*7dd7cddfSDavid du Colombier 114*7dd7cddfSDavid du Colombier dataptr += DCTSIZE; /* advance pointer to next row */ 115*7dd7cddfSDavid du Colombier } 116*7dd7cddfSDavid du Colombier 117*7dd7cddfSDavid du Colombier /* Pass 2: process columns. */ 118*7dd7cddfSDavid du Colombier 119*7dd7cddfSDavid du Colombier dataptr = data; 120*7dd7cddfSDavid du Colombier for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 121*7dd7cddfSDavid du Colombier tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; 122*7dd7cddfSDavid du Colombier tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; 123*7dd7cddfSDavid du Colombier tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; 124*7dd7cddfSDavid du Colombier tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; 125*7dd7cddfSDavid du Colombier tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; 126*7dd7cddfSDavid du Colombier tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; 127*7dd7cddfSDavid du Colombier tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; 128*7dd7cddfSDavid du Colombier tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; 129*7dd7cddfSDavid du Colombier 130*7dd7cddfSDavid du Colombier /* Even part */ 131*7dd7cddfSDavid du Colombier 132*7dd7cddfSDavid du Colombier tmp10 = tmp0 + tmp3; /* phase 2 */ 133*7dd7cddfSDavid du Colombier tmp13 = tmp0 - tmp3; 134*7dd7cddfSDavid du Colombier tmp11 = tmp1 + tmp2; 135*7dd7cddfSDavid du Colombier tmp12 = tmp1 - tmp2; 136*7dd7cddfSDavid du Colombier 137*7dd7cddfSDavid du Colombier dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */ 138*7dd7cddfSDavid du Colombier dataptr[DCTSIZE*4] = tmp10 - tmp11; 139*7dd7cddfSDavid du Colombier 140*7dd7cddfSDavid du Colombier z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */ 141*7dd7cddfSDavid du Colombier dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */ 142*7dd7cddfSDavid du Colombier dataptr[DCTSIZE*6] = tmp13 - z1; 143*7dd7cddfSDavid du Colombier 144*7dd7cddfSDavid du Colombier /* Odd part */ 145*7dd7cddfSDavid du Colombier 146*7dd7cddfSDavid du Colombier tmp10 = tmp4 + tmp5; /* phase 2 */ 147*7dd7cddfSDavid du Colombier tmp11 = tmp5 + tmp6; 148*7dd7cddfSDavid du Colombier tmp12 = tmp6 + tmp7; 149*7dd7cddfSDavid du Colombier 150*7dd7cddfSDavid du Colombier /* The rotator is modified from fig 4-8 to avoid extra negations. */ 151*7dd7cddfSDavid du Colombier z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */ 152*7dd7cddfSDavid du Colombier z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */ 153*7dd7cddfSDavid du Colombier z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */ 154*7dd7cddfSDavid du Colombier z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */ 155*7dd7cddfSDavid du Colombier 156*7dd7cddfSDavid du Colombier z11 = tmp7 + z3; /* phase 5 */ 157*7dd7cddfSDavid du Colombier z13 = tmp7 - z3; 158*7dd7cddfSDavid du Colombier 159*7dd7cddfSDavid du Colombier dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */ 160*7dd7cddfSDavid du Colombier dataptr[DCTSIZE*3] = z13 - z2; 161*7dd7cddfSDavid du Colombier dataptr[DCTSIZE*1] = z11 + z4; 162*7dd7cddfSDavid du Colombier dataptr[DCTSIZE*7] = z11 - z4; 163*7dd7cddfSDavid du Colombier 164*7dd7cddfSDavid du Colombier dataptr++; /* advance pointer to next column */ 165*7dd7cddfSDavid du Colombier } 166*7dd7cddfSDavid du Colombier } 167*7dd7cddfSDavid du Colombier 168*7dd7cddfSDavid du Colombier #endif /* DCT_FLOAT_SUPPORTED */ 169