xref: /freebsd-src/contrib/bzip2/huffman.c (revision 51f61fc0c7ece7a30c737341e65455841bc3f04e)
1df9de0ebSDavid E. O'Brien 
2df9de0ebSDavid E. O'Brien /*-------------------------------------------------------------*/
3df9de0ebSDavid E. O'Brien /*--- Huffman coding low-level stuff                        ---*/
4df9de0ebSDavid E. O'Brien /*---                                             huffman.c ---*/
5df9de0ebSDavid E. O'Brien /*-------------------------------------------------------------*/
6df9de0ebSDavid E. O'Brien 
71b79bae0SXin LI /* ------------------------------------------------------------------
81b79bae0SXin LI    This file is part of bzip2/libbzip2, a program and library for
91b79bae0SXin LI    lossless, block-sorting data compression.
10df9de0ebSDavid E. O'Brien 
11*51f61fc0SXin LI    bzip2/libbzip2 version 1.0.8 of 13 July 2019
12*51f61fc0SXin LI    Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
13df9de0ebSDavid E. O'Brien 
141b79bae0SXin LI    Please read the WARNING, DISCLAIMER and PATENTS sections in the
151b79bae0SXin LI    README file.
16df9de0ebSDavid E. O'Brien 
171b79bae0SXin LI    This program is released under the terms of the license contained
181b79bae0SXin LI    in the file LICENSE.
191b79bae0SXin LI    ------------------------------------------------------------------ */
20df9de0ebSDavid E. O'Brien 
21df9de0ebSDavid E. O'Brien 
22df9de0ebSDavid E. O'Brien #include "bzlib_private.h"
23df9de0ebSDavid E. O'Brien 
24df9de0ebSDavid E. O'Brien /*---------------------------------------------------*/
25df9de0ebSDavid E. O'Brien #define WEIGHTOF(zz0)  ((zz0) & 0xffffff00)
26df9de0ebSDavid E. O'Brien #define DEPTHOF(zz1)   ((zz1) & 0x000000ff)
27df9de0ebSDavid E. O'Brien #define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
28df9de0ebSDavid E. O'Brien 
29df9de0ebSDavid E. O'Brien #define ADDWEIGHTS(zw1,zw2)                           \
30df9de0ebSDavid E. O'Brien    (WEIGHTOF(zw1)+WEIGHTOF(zw2)) |                    \
31df9de0ebSDavid E. O'Brien    (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
32df9de0ebSDavid E. O'Brien 
33df9de0ebSDavid E. O'Brien #define UPHEAP(z)                                     \
34df9de0ebSDavid E. O'Brien {                                                     \
35df9de0ebSDavid E. O'Brien    Int32 zz, tmp;                                     \
36df9de0ebSDavid E. O'Brien    zz = z; tmp = heap[zz];                            \
37df9de0ebSDavid E. O'Brien    while (weight[tmp] < weight[heap[zz >> 1]]) {      \
38df9de0ebSDavid E. O'Brien       heap[zz] = heap[zz >> 1];                       \
39df9de0ebSDavid E. O'Brien       zz >>= 1;                                       \
40df9de0ebSDavid E. O'Brien    }                                                  \
41df9de0ebSDavid E. O'Brien    heap[zz] = tmp;                                    \
42df9de0ebSDavid E. O'Brien }
43df9de0ebSDavid E. O'Brien 
44df9de0ebSDavid E. O'Brien #define DOWNHEAP(z)                                   \
45df9de0ebSDavid E. O'Brien {                                                     \
46df9de0ebSDavid E. O'Brien    Int32 zz, yy, tmp;                                 \
47df9de0ebSDavid E. O'Brien    zz = z; tmp = heap[zz];                            \
48df9de0ebSDavid E. O'Brien    while (True) {                                     \
49df9de0ebSDavid E. O'Brien       yy = zz << 1;                                   \
50df9de0ebSDavid E. O'Brien       if (yy > nHeap) break;                          \
51df9de0ebSDavid E. O'Brien       if (yy < nHeap &&                               \
52df9de0ebSDavid E. O'Brien           weight[heap[yy+1]] < weight[heap[yy]])      \
53df9de0ebSDavid E. O'Brien          yy++;                                        \
54df9de0ebSDavid E. O'Brien       if (weight[tmp] < weight[heap[yy]]) break;      \
55df9de0ebSDavid E. O'Brien       heap[zz] = heap[yy];                            \
56df9de0ebSDavid E. O'Brien       zz = yy;                                        \
57df9de0ebSDavid E. O'Brien    }                                                  \
58df9de0ebSDavid E. O'Brien    heap[zz] = tmp;                                    \
59df9de0ebSDavid E. O'Brien }
60df9de0ebSDavid E. O'Brien 
61df9de0ebSDavid E. O'Brien 
62df9de0ebSDavid E. O'Brien /*---------------------------------------------------*/
BZ2_hbMakeCodeLengths(UChar * len,Int32 * freq,Int32 alphaSize,Int32 maxLen)63df9de0ebSDavid E. O'Brien void BZ2_hbMakeCodeLengths ( UChar *len,
64df9de0ebSDavid E. O'Brien                              Int32 *freq,
65df9de0ebSDavid E. O'Brien                              Int32 alphaSize,
66df9de0ebSDavid E. O'Brien                              Int32 maxLen )
67df9de0ebSDavid E. O'Brien {
68df9de0ebSDavid E. O'Brien    /*--
69df9de0ebSDavid E. O'Brien       Nodes and heap entries run from 1.  Entry 0
70df9de0ebSDavid E. O'Brien       for both the heap and nodes is a sentinel.
71df9de0ebSDavid E. O'Brien    --*/
72df9de0ebSDavid E. O'Brien    Int32 nNodes, nHeap, n1, n2, i, j, k;
73df9de0ebSDavid E. O'Brien    Bool  tooLong;
74df9de0ebSDavid E. O'Brien 
75df9de0ebSDavid E. O'Brien    Int32 heap   [ BZ_MAX_ALPHA_SIZE + 2 ];
76df9de0ebSDavid E. O'Brien    Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ];
77df9de0ebSDavid E. O'Brien    Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ];
78df9de0ebSDavid E. O'Brien 
79df9de0ebSDavid E. O'Brien    for (i = 0; i < alphaSize; i++)
80df9de0ebSDavid E. O'Brien       weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
81df9de0ebSDavid E. O'Brien 
82df9de0ebSDavid E. O'Brien    while (True) {
83df9de0ebSDavid E. O'Brien 
84df9de0ebSDavid E. O'Brien       nNodes = alphaSize;
85df9de0ebSDavid E. O'Brien       nHeap = 0;
86df9de0ebSDavid E. O'Brien 
87df9de0ebSDavid E. O'Brien       heap[0] = 0;
88df9de0ebSDavid E. O'Brien       weight[0] = 0;
89df9de0ebSDavid E. O'Brien       parent[0] = -2;
90df9de0ebSDavid E. O'Brien 
91df9de0ebSDavid E. O'Brien       for (i = 1; i <= alphaSize; i++) {
92df9de0ebSDavid E. O'Brien          parent[i] = -1;
93df9de0ebSDavid E. O'Brien          nHeap++;
94df9de0ebSDavid E. O'Brien          heap[nHeap] = i;
95df9de0ebSDavid E. O'Brien          UPHEAP(nHeap);
96df9de0ebSDavid E. O'Brien       }
97df9de0ebSDavid E. O'Brien 
98df9de0ebSDavid E. O'Brien       AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 );
99df9de0ebSDavid E. O'Brien 
100df9de0ebSDavid E. O'Brien       while (nHeap > 1) {
101df9de0ebSDavid E. O'Brien          n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
102df9de0ebSDavid E. O'Brien          n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
103df9de0ebSDavid E. O'Brien          nNodes++;
104df9de0ebSDavid E. O'Brien          parent[n1] = parent[n2] = nNodes;
105df9de0ebSDavid E. O'Brien          weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
106df9de0ebSDavid E. O'Brien          parent[nNodes] = -1;
107df9de0ebSDavid E. O'Brien          nHeap++;
108df9de0ebSDavid E. O'Brien          heap[nHeap] = nNodes;
109df9de0ebSDavid E. O'Brien          UPHEAP(nHeap);
110df9de0ebSDavid E. O'Brien       }
111df9de0ebSDavid E. O'Brien 
112df9de0ebSDavid E. O'Brien       AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 );
113df9de0ebSDavid E. O'Brien 
114df9de0ebSDavid E. O'Brien       tooLong = False;
115df9de0ebSDavid E. O'Brien       for (i = 1; i <= alphaSize; i++) {
116df9de0ebSDavid E. O'Brien          j = 0;
117df9de0ebSDavid E. O'Brien          k = i;
118df9de0ebSDavid E. O'Brien          while (parent[k] >= 0) { k = parent[k]; j++; }
119df9de0ebSDavid E. O'Brien          len[i-1] = j;
120df9de0ebSDavid E. O'Brien          if (j > maxLen) tooLong = True;
121df9de0ebSDavid E. O'Brien       }
122df9de0ebSDavid E. O'Brien 
123df9de0ebSDavid E. O'Brien       if (! tooLong) break;
124df9de0ebSDavid E. O'Brien 
125f7a4f99fSDavid E. O'Brien       /* 17 Oct 04: keep-going condition for the following loop used
126f7a4f99fSDavid E. O'Brien          to be 'i < alphaSize', which missed the last element,
127f7a4f99fSDavid E. O'Brien          theoretically leading to the possibility of the compressor
128f7a4f99fSDavid E. O'Brien          looping.  However, this count-scaling step is only needed if
129f7a4f99fSDavid E. O'Brien          one of the generated Huffman code words is longer than
130f7a4f99fSDavid E. O'Brien          maxLen, which up to and including version 1.0.2 was 20 bits,
131f7a4f99fSDavid E. O'Brien          which is extremely unlikely.  In version 1.0.3 maxLen was
132f7a4f99fSDavid E. O'Brien          changed to 17 bits, which has minimal effect on compression
133f7a4f99fSDavid E. O'Brien          ratio, but does mean this scaling step is used from time to
134f7a4f99fSDavid E. O'Brien          time, enough to verify that it works.
135f7a4f99fSDavid E. O'Brien 
136f7a4f99fSDavid E. O'Brien          This means that bzip2-1.0.3 and later will only produce
137f7a4f99fSDavid E. O'Brien          Huffman codes with a maximum length of 17 bits.  However, in
138f7a4f99fSDavid E. O'Brien          order to preserve backwards compatibility with bitstreams
139f7a4f99fSDavid E. O'Brien          produced by versions pre-1.0.3, the decompressor must still
140f7a4f99fSDavid E. O'Brien          handle lengths of up to 20. */
141f7a4f99fSDavid E. O'Brien 
142f7a4f99fSDavid E. O'Brien       for (i = 1; i <= alphaSize; i++) {
143df9de0ebSDavid E. O'Brien          j = weight[i] >> 8;
144df9de0ebSDavid E. O'Brien          j = 1 + (j / 2);
145df9de0ebSDavid E. O'Brien          weight[i] = j << 8;
146df9de0ebSDavid E. O'Brien       }
147df9de0ebSDavid E. O'Brien    }
148df9de0ebSDavid E. O'Brien }
149df9de0ebSDavid E. O'Brien 
150df9de0ebSDavid E. O'Brien 
151df9de0ebSDavid E. O'Brien /*---------------------------------------------------*/
BZ2_hbAssignCodes(Int32 * code,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)152df9de0ebSDavid E. O'Brien void BZ2_hbAssignCodes ( Int32 *code,
153df9de0ebSDavid E. O'Brien                          UChar *length,
154df9de0ebSDavid E. O'Brien                          Int32 minLen,
155df9de0ebSDavid E. O'Brien                          Int32 maxLen,
156df9de0ebSDavid E. O'Brien                          Int32 alphaSize )
157df9de0ebSDavid E. O'Brien {
158df9de0ebSDavid E. O'Brien    Int32 n, vec, i;
159df9de0ebSDavid E. O'Brien 
160df9de0ebSDavid E. O'Brien    vec = 0;
161df9de0ebSDavid E. O'Brien    for (n = minLen; n <= maxLen; n++) {
162df9de0ebSDavid E. O'Brien       for (i = 0; i < alphaSize; i++)
163df9de0ebSDavid E. O'Brien          if (length[i] == n) { code[i] = vec; vec++; };
164df9de0ebSDavid E. O'Brien       vec <<= 1;
165df9de0ebSDavid E. O'Brien    }
166df9de0ebSDavid E. O'Brien }
167df9de0ebSDavid E. O'Brien 
168df9de0ebSDavid E. O'Brien 
169df9de0ebSDavid E. O'Brien /*---------------------------------------------------*/
BZ2_hbCreateDecodeTables(Int32 * limit,Int32 * base,Int32 * perm,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)170df9de0ebSDavid E. O'Brien void BZ2_hbCreateDecodeTables ( Int32 *limit,
171df9de0ebSDavid E. O'Brien                                 Int32 *base,
172df9de0ebSDavid E. O'Brien                                 Int32 *perm,
173df9de0ebSDavid E. O'Brien                                 UChar *length,
174df9de0ebSDavid E. O'Brien                                 Int32 minLen,
175df9de0ebSDavid E. O'Brien                                 Int32 maxLen,
176df9de0ebSDavid E. O'Brien                                 Int32 alphaSize )
177df9de0ebSDavid E. O'Brien {
178df9de0ebSDavid E. O'Brien    Int32 pp, i, j, vec;
179df9de0ebSDavid E. O'Brien 
180df9de0ebSDavid E. O'Brien    pp = 0;
181df9de0ebSDavid E. O'Brien    for (i = minLen; i <= maxLen; i++)
182df9de0ebSDavid E. O'Brien       for (j = 0; j < alphaSize; j++)
183df9de0ebSDavid E. O'Brien          if (length[j] == i) { perm[pp] = j; pp++; };
184df9de0ebSDavid E. O'Brien 
185df9de0ebSDavid E. O'Brien    for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0;
186df9de0ebSDavid E. O'Brien    for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
187df9de0ebSDavid E. O'Brien 
188df9de0ebSDavid E. O'Brien    for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1];
189df9de0ebSDavid E. O'Brien 
190df9de0ebSDavid E. O'Brien    for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0;
191df9de0ebSDavid E. O'Brien    vec = 0;
192df9de0ebSDavid E. O'Brien 
193df9de0ebSDavid E. O'Brien    for (i = minLen; i <= maxLen; i++) {
194df9de0ebSDavid E. O'Brien       vec += (base[i+1] - base[i]);
195df9de0ebSDavid E. O'Brien       limit[i] = vec-1;
196df9de0ebSDavid E. O'Brien       vec <<= 1;
197df9de0ebSDavid E. O'Brien    }
198df9de0ebSDavid E. O'Brien    for (i = minLen + 1; i <= maxLen; i++)
199df9de0ebSDavid E. O'Brien       base[i] = ((limit[i-1] + 1) << 1) - base[i];
200df9de0ebSDavid E. O'Brien }
201df9de0ebSDavid E. O'Brien 
202df9de0ebSDavid E. O'Brien 
203df9de0ebSDavid E. O'Brien /*-------------------------------------------------------------*/
204df9de0ebSDavid E. O'Brien /*--- end                                         huffman.c ---*/
205df9de0ebSDavid E. O'Brien /*-------------------------------------------------------------*/
206