xref: /plan9/sys/src/cmd/bzip2/lib/huffman.c (revision 59cc4ca53493a3c6d2349fe2b7f7c40f7dce7294)
1*59cc4ca5SDavid du Colombier 
2*59cc4ca5SDavid du Colombier /*-------------------------------------------------------------*/
3*59cc4ca5SDavid du Colombier /*--- Huffman coding low-level stuff                        ---*/
4*59cc4ca5SDavid du Colombier /*---                                             huffman.c ---*/
5*59cc4ca5SDavid du Colombier /*-------------------------------------------------------------*/
6*59cc4ca5SDavid du Colombier 
7*59cc4ca5SDavid du Colombier /*--
8*59cc4ca5SDavid du Colombier   This file is a part of bzip2 and/or libbzip2, a program and
9*59cc4ca5SDavid du Colombier   library for lossless, block-sorting data compression.
10*59cc4ca5SDavid du Colombier 
11*59cc4ca5SDavid du Colombier   Copyright (C) 1996-2000 Julian R Seward.  All rights reserved.
12*59cc4ca5SDavid du Colombier 
13*59cc4ca5SDavid du Colombier   Redistribution and use in source and binary forms, with or without
14*59cc4ca5SDavid du Colombier   modification, are permitted provided that the following conditions
15*59cc4ca5SDavid du Colombier   are met:
16*59cc4ca5SDavid du Colombier 
17*59cc4ca5SDavid du Colombier   1. Redistributions of source code must retain the above copyright
18*59cc4ca5SDavid du Colombier      notice, this list of conditions and the following disclaimer.
19*59cc4ca5SDavid du Colombier 
20*59cc4ca5SDavid du Colombier   2. The origin of this software must not be misrepresented; you must
21*59cc4ca5SDavid du Colombier      not claim that you wrote the original software.  If you use this
22*59cc4ca5SDavid du Colombier      software in a product, an acknowledgment in the product
23*59cc4ca5SDavid du Colombier      documentation would be appreciated but is not required.
24*59cc4ca5SDavid du Colombier 
25*59cc4ca5SDavid du Colombier   3. Altered source versions must be plainly marked as such, and must
26*59cc4ca5SDavid du Colombier      not be misrepresented as being the original software.
27*59cc4ca5SDavid du Colombier 
28*59cc4ca5SDavid du Colombier   4. The name of the author may not be used to endorse or promote
29*59cc4ca5SDavid du Colombier      products derived from this software without specific prior written
30*59cc4ca5SDavid du Colombier      permission.
31*59cc4ca5SDavid du Colombier 
32*59cc4ca5SDavid du Colombier   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
33*59cc4ca5SDavid du Colombier   OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
34*59cc4ca5SDavid du Colombier   WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35*59cc4ca5SDavid du Colombier   ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
36*59cc4ca5SDavid du Colombier   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
37*59cc4ca5SDavid du Colombier   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
38*59cc4ca5SDavid du Colombier   GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
39*59cc4ca5SDavid du Colombier   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
40*59cc4ca5SDavid du Colombier   WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41*59cc4ca5SDavid du Colombier   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
42*59cc4ca5SDavid du Colombier   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43*59cc4ca5SDavid du Colombier 
44*59cc4ca5SDavid du Colombier   Julian Seward, Cambridge, UK.
45*59cc4ca5SDavid du Colombier   jseward@acm.org
46*59cc4ca5SDavid du Colombier   bzip2/libbzip2 version 1.0 of 21 March 2000
47*59cc4ca5SDavid du Colombier 
48*59cc4ca5SDavid du Colombier   This program is based on (at least) the work of:
49*59cc4ca5SDavid du Colombier      Mike Burrows
50*59cc4ca5SDavid du Colombier      David Wheeler
51*59cc4ca5SDavid du Colombier      Peter Fenwick
52*59cc4ca5SDavid du Colombier      Alistair Moffat
53*59cc4ca5SDavid du Colombier      Radford Neal
54*59cc4ca5SDavid du Colombier      Ian H. Witten
55*59cc4ca5SDavid du Colombier      Robert Sedgewick
56*59cc4ca5SDavid du Colombier      Jon L. Bentley
57*59cc4ca5SDavid du Colombier 
58*59cc4ca5SDavid du Colombier   For more information on these sources, see the manual.
59*59cc4ca5SDavid du Colombier --*/
60*59cc4ca5SDavid du Colombier 
61*59cc4ca5SDavid du Colombier 
62*59cc4ca5SDavid du Colombier #include "os.h"
63*59cc4ca5SDavid du Colombier #include "bzlib.h"
64*59cc4ca5SDavid du Colombier #include "bzlib_private.h"
65*59cc4ca5SDavid du Colombier 
66*59cc4ca5SDavid du Colombier /*---------------------------------------------------*/
67*59cc4ca5SDavid du Colombier #define WEIGHTOF(zz0)  ((zz0) & 0xffffff00)
68*59cc4ca5SDavid du Colombier #define DEPTHOF(zz1)   ((zz1) & 0x000000ff)
69*59cc4ca5SDavid du Colombier #define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
70*59cc4ca5SDavid du Colombier 
71*59cc4ca5SDavid du Colombier #define ADDWEIGHTS(zw1,zw2)                           \
72*59cc4ca5SDavid du Colombier    (WEIGHTOF(zw1)+WEIGHTOF(zw2)) |                    \
73*59cc4ca5SDavid du Colombier    (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
74*59cc4ca5SDavid du Colombier 
75*59cc4ca5SDavid du Colombier #define UPHEAP(z)                                     \
76*59cc4ca5SDavid du Colombier {                                                     \
77*59cc4ca5SDavid du Colombier    Int32 zz, tmp;                                     \
78*59cc4ca5SDavid du Colombier    zz = z; tmp = heap[zz];                            \
79*59cc4ca5SDavid du Colombier    while (weight[tmp] < weight[heap[zz >> 1]]) {      \
80*59cc4ca5SDavid du Colombier       heap[zz] = heap[zz >> 1];                       \
81*59cc4ca5SDavid du Colombier       zz >>= 1;                                       \
82*59cc4ca5SDavid du Colombier    }                                                  \
83*59cc4ca5SDavid du Colombier    heap[zz] = tmp;                                    \
84*59cc4ca5SDavid du Colombier }
85*59cc4ca5SDavid du Colombier 
86*59cc4ca5SDavid du Colombier #define DOWNHEAP(z)                                   \
87*59cc4ca5SDavid du Colombier {                                                     \
88*59cc4ca5SDavid du Colombier    Int32 zz, yy, tmp;                                 \
89*59cc4ca5SDavid du Colombier    zz = z; tmp = heap[zz];                            \
90*59cc4ca5SDavid du Colombier    while (True) {                                     \
91*59cc4ca5SDavid du Colombier       yy = zz << 1;                                   \
92*59cc4ca5SDavid du Colombier       if (yy > nHeap) break;                          \
93*59cc4ca5SDavid du Colombier       if (yy < nHeap &&                               \
94*59cc4ca5SDavid du Colombier           weight[heap[yy+1]] < weight[heap[yy]])      \
95*59cc4ca5SDavid du Colombier          yy++;                                        \
96*59cc4ca5SDavid du Colombier       if (weight[tmp] < weight[heap[yy]]) break;      \
97*59cc4ca5SDavid du Colombier       heap[zz] = heap[yy];                            \
98*59cc4ca5SDavid du Colombier       zz = yy;                                        \
99*59cc4ca5SDavid du Colombier    }                                                  \
100*59cc4ca5SDavid du Colombier    heap[zz] = tmp;                                    \
101*59cc4ca5SDavid du Colombier }
102*59cc4ca5SDavid du Colombier 
103*59cc4ca5SDavid du Colombier 
104*59cc4ca5SDavid du Colombier /*---------------------------------------------------*/
BZ2_hbMakeCodeLengths(UChar * len,Int32 * freq,Int32 alphaSize,Int32 maxLen)105*59cc4ca5SDavid du Colombier void BZ2_hbMakeCodeLengths ( UChar *len,
106*59cc4ca5SDavid du Colombier                              Int32 *freq,
107*59cc4ca5SDavid du Colombier                              Int32 alphaSize,
108*59cc4ca5SDavid du Colombier                              Int32 maxLen )
109*59cc4ca5SDavid du Colombier {
110*59cc4ca5SDavid du Colombier    /*--
111*59cc4ca5SDavid du Colombier       Nodes and heap entries run from 1.  Entry 0
112*59cc4ca5SDavid du Colombier       for both the heap and nodes is a sentinel.
113*59cc4ca5SDavid du Colombier    --*/
114*59cc4ca5SDavid du Colombier    Int32 nNodes, nHeap, n1, n2, i, j, k;
115*59cc4ca5SDavid du Colombier    Bool  tooLong;
116*59cc4ca5SDavid du Colombier 
117*59cc4ca5SDavid du Colombier    Int32 heap   [ BZ_MAX_ALPHA_SIZE + 2 ];
118*59cc4ca5SDavid du Colombier    Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ];
119*59cc4ca5SDavid du Colombier    Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ];
120*59cc4ca5SDavid du Colombier 
121*59cc4ca5SDavid du Colombier    for (i = 0; i < alphaSize; i++)
122*59cc4ca5SDavid du Colombier       weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
123*59cc4ca5SDavid du Colombier 
124*59cc4ca5SDavid du Colombier    while (True) {
125*59cc4ca5SDavid du Colombier 
126*59cc4ca5SDavid du Colombier       nNodes = alphaSize;
127*59cc4ca5SDavid du Colombier       nHeap = 0;
128*59cc4ca5SDavid du Colombier 
129*59cc4ca5SDavid du Colombier       heap[0] = 0;
130*59cc4ca5SDavid du Colombier       weight[0] = 0;
131*59cc4ca5SDavid du Colombier       parent[0] = -2;
132*59cc4ca5SDavid du Colombier 
133*59cc4ca5SDavid du Colombier       for (i = 1; i <= alphaSize; i++) {
134*59cc4ca5SDavid du Colombier          parent[i] = -1;
135*59cc4ca5SDavid du Colombier          nHeap++;
136*59cc4ca5SDavid du Colombier          heap[nHeap] = i;
137*59cc4ca5SDavid du Colombier          UPHEAP(nHeap);
138*59cc4ca5SDavid du Colombier       }
139*59cc4ca5SDavid du Colombier 
140*59cc4ca5SDavid du Colombier       AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 );
141*59cc4ca5SDavid du Colombier 
142*59cc4ca5SDavid du Colombier       while (nHeap > 1) {
143*59cc4ca5SDavid du Colombier          n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
144*59cc4ca5SDavid du Colombier          n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
145*59cc4ca5SDavid du Colombier          nNodes++;
146*59cc4ca5SDavid du Colombier          parent[n1] = parent[n2] = nNodes;
147*59cc4ca5SDavid du Colombier          weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
148*59cc4ca5SDavid du Colombier          parent[nNodes] = -1;
149*59cc4ca5SDavid du Colombier          nHeap++;
150*59cc4ca5SDavid du Colombier          heap[nHeap] = nNodes;
151*59cc4ca5SDavid du Colombier          UPHEAP(nHeap);
152*59cc4ca5SDavid du Colombier       }
153*59cc4ca5SDavid du Colombier 
154*59cc4ca5SDavid du Colombier       AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 );
155*59cc4ca5SDavid du Colombier 
156*59cc4ca5SDavid du Colombier       tooLong = False;
157*59cc4ca5SDavid du Colombier       for (i = 1; i <= alphaSize; i++) {
158*59cc4ca5SDavid du Colombier          j = 0;
159*59cc4ca5SDavid du Colombier          k = i;
160*59cc4ca5SDavid du Colombier          while (parent[k] >= 0) { k = parent[k]; j++; }
161*59cc4ca5SDavid du Colombier          len[i-1] = j;
162*59cc4ca5SDavid du Colombier          if (j > maxLen) tooLong = True;
163*59cc4ca5SDavid du Colombier       }
164*59cc4ca5SDavid du Colombier 
165*59cc4ca5SDavid du Colombier       if (! tooLong) break;
166*59cc4ca5SDavid du Colombier 
167*59cc4ca5SDavid du Colombier       for (i = 1; i < alphaSize; i++) {
168*59cc4ca5SDavid du Colombier          j = weight[i] >> 8;
169*59cc4ca5SDavid du Colombier          j = 1 + (j / 2);
170*59cc4ca5SDavid du Colombier          weight[i] = j << 8;
171*59cc4ca5SDavid du Colombier       }
172*59cc4ca5SDavid du Colombier    }
173*59cc4ca5SDavid du Colombier }
174*59cc4ca5SDavid du Colombier 
175*59cc4ca5SDavid du Colombier 
176*59cc4ca5SDavid du Colombier /*---------------------------------------------------*/
BZ2_hbAssignCodes(Int32 * code,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)177*59cc4ca5SDavid du Colombier void BZ2_hbAssignCodes ( Int32 *code,
178*59cc4ca5SDavid du Colombier                          UChar *length,
179*59cc4ca5SDavid du Colombier                          Int32 minLen,
180*59cc4ca5SDavid du Colombier                          Int32 maxLen,
181*59cc4ca5SDavid du Colombier                          Int32 alphaSize )
182*59cc4ca5SDavid du Colombier {
183*59cc4ca5SDavid du Colombier    Int32 n, vec, i;
184*59cc4ca5SDavid du Colombier 
185*59cc4ca5SDavid du Colombier    vec = 0;
186*59cc4ca5SDavid du Colombier    for (n = minLen; n <= maxLen; n++) {
187*59cc4ca5SDavid du Colombier       for (i = 0; i < alphaSize; i++)
188*59cc4ca5SDavid du Colombier          if (length[i] == n) { code[i] = vec; vec++; };
189*59cc4ca5SDavid du Colombier       vec <<= 1;
190*59cc4ca5SDavid du Colombier    }
191*59cc4ca5SDavid du Colombier }
192*59cc4ca5SDavid du Colombier 
193*59cc4ca5SDavid du Colombier 
194*59cc4ca5SDavid du Colombier /*---------------------------------------------------*/
BZ2_hbCreateDecodeTables(Int32 * limit,Int32 * base,Int32 * perm,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)195*59cc4ca5SDavid du Colombier void BZ2_hbCreateDecodeTables ( Int32 *limit,
196*59cc4ca5SDavid du Colombier                                 Int32 *base,
197*59cc4ca5SDavid du Colombier                                 Int32 *perm,
198*59cc4ca5SDavid du Colombier                                 UChar *length,
199*59cc4ca5SDavid du Colombier                                 Int32 minLen,
200*59cc4ca5SDavid du Colombier                                 Int32 maxLen,
201*59cc4ca5SDavid du Colombier                                 Int32 alphaSize )
202*59cc4ca5SDavid du Colombier {
203*59cc4ca5SDavid du Colombier    Int32 pp, i, j, vec;
204*59cc4ca5SDavid du Colombier 
205*59cc4ca5SDavid du Colombier    pp = 0;
206*59cc4ca5SDavid du Colombier    for (i = minLen; i <= maxLen; i++)
207*59cc4ca5SDavid du Colombier       for (j = 0; j < alphaSize; j++)
208*59cc4ca5SDavid du Colombier          if (length[j] == i) { perm[pp] = j; pp++; };
209*59cc4ca5SDavid du Colombier 
210*59cc4ca5SDavid du Colombier    for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0;
211*59cc4ca5SDavid du Colombier    for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
212*59cc4ca5SDavid du Colombier 
213*59cc4ca5SDavid du Colombier    for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1];
214*59cc4ca5SDavid du Colombier 
215*59cc4ca5SDavid du Colombier    for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0;
216*59cc4ca5SDavid du Colombier    vec = 0;
217*59cc4ca5SDavid du Colombier 
218*59cc4ca5SDavid du Colombier    for (i = minLen; i <= maxLen; i++) {
219*59cc4ca5SDavid du Colombier       vec += (base[i+1] - base[i]);
220*59cc4ca5SDavid du Colombier       limit[i] = vec-1;
221*59cc4ca5SDavid du Colombier       vec <<= 1;
222*59cc4ca5SDavid du Colombier    }
223*59cc4ca5SDavid du Colombier    for (i = minLen + 1; i <= maxLen; i++)
224*59cc4ca5SDavid du Colombier       base[i] = ((limit[i-1] + 1) << 1) - base[i];
225*59cc4ca5SDavid du Colombier }
226*59cc4ca5SDavid du Colombier 
227*59cc4ca5SDavid du Colombier 
228*59cc4ca5SDavid du Colombier /*-------------------------------------------------------------*/
229*59cc4ca5SDavid du Colombier /*--- end                                         huffman.c ---*/
230*59cc4ca5SDavid du Colombier /*-------------------------------------------------------------*/
231