1df9de0ebSDavid E. O'Brien
2df9de0ebSDavid E. O'Brien /*-------------------------------------------------------------*/
3df9de0ebSDavid E. O'Brien /*--- Huffman coding low-level stuff ---*/
4df9de0ebSDavid E. O'Brien /*--- huffman.c ---*/
5df9de0ebSDavid E. O'Brien /*-------------------------------------------------------------*/
6df9de0ebSDavid E. O'Brien
71b79bae0SXin LI /* ------------------------------------------------------------------
81b79bae0SXin LI This file is part of bzip2/libbzip2, a program and library for
91b79bae0SXin LI lossless, block-sorting data compression.
10df9de0ebSDavid E. O'Brien
11*51f61fc0SXin LI bzip2/libbzip2 version 1.0.8 of 13 July 2019
12*51f61fc0SXin LI Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
13df9de0ebSDavid E. O'Brien
141b79bae0SXin LI Please read the WARNING, DISCLAIMER and PATENTS sections in the
151b79bae0SXin LI README file.
16df9de0ebSDavid E. O'Brien
171b79bae0SXin LI This program is released under the terms of the license contained
181b79bae0SXin LI in the file LICENSE.
191b79bae0SXin LI ------------------------------------------------------------------ */
20df9de0ebSDavid E. O'Brien
21df9de0ebSDavid E. O'Brien
22df9de0ebSDavid E. O'Brien #include "bzlib_private.h"
23df9de0ebSDavid E. O'Brien
24df9de0ebSDavid E. O'Brien /*---------------------------------------------------*/
25df9de0ebSDavid E. O'Brien #define WEIGHTOF(zz0) ((zz0) & 0xffffff00)
26df9de0ebSDavid E. O'Brien #define DEPTHOF(zz1) ((zz1) & 0x000000ff)
27df9de0ebSDavid E. O'Brien #define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
28df9de0ebSDavid E. O'Brien
29df9de0ebSDavid E. O'Brien #define ADDWEIGHTS(zw1,zw2) \
30df9de0ebSDavid E. O'Brien (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \
31df9de0ebSDavid E. O'Brien (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
32df9de0ebSDavid E. O'Brien
33df9de0ebSDavid E. O'Brien #define UPHEAP(z) \
34df9de0ebSDavid E. O'Brien { \
35df9de0ebSDavid E. O'Brien Int32 zz, tmp; \
36df9de0ebSDavid E. O'Brien zz = z; tmp = heap[zz]; \
37df9de0ebSDavid E. O'Brien while (weight[tmp] < weight[heap[zz >> 1]]) { \
38df9de0ebSDavid E. O'Brien heap[zz] = heap[zz >> 1]; \
39df9de0ebSDavid E. O'Brien zz >>= 1; \
40df9de0ebSDavid E. O'Brien } \
41df9de0ebSDavid E. O'Brien heap[zz] = tmp; \
42df9de0ebSDavid E. O'Brien }
43df9de0ebSDavid E. O'Brien
44df9de0ebSDavid E. O'Brien #define DOWNHEAP(z) \
45df9de0ebSDavid E. O'Brien { \
46df9de0ebSDavid E. O'Brien Int32 zz, yy, tmp; \
47df9de0ebSDavid E. O'Brien zz = z; tmp = heap[zz]; \
48df9de0ebSDavid E. O'Brien while (True) { \
49df9de0ebSDavid E. O'Brien yy = zz << 1; \
50df9de0ebSDavid E. O'Brien if (yy > nHeap) break; \
51df9de0ebSDavid E. O'Brien if (yy < nHeap && \
52df9de0ebSDavid E. O'Brien weight[heap[yy+1]] < weight[heap[yy]]) \
53df9de0ebSDavid E. O'Brien yy++; \
54df9de0ebSDavid E. O'Brien if (weight[tmp] < weight[heap[yy]]) break; \
55df9de0ebSDavid E. O'Brien heap[zz] = heap[yy]; \
56df9de0ebSDavid E. O'Brien zz = yy; \
57df9de0ebSDavid E. O'Brien } \
58df9de0ebSDavid E. O'Brien heap[zz] = tmp; \
59df9de0ebSDavid E. O'Brien }
60df9de0ebSDavid E. O'Brien
61df9de0ebSDavid E. O'Brien
62df9de0ebSDavid E. O'Brien /*---------------------------------------------------*/
BZ2_hbMakeCodeLengths(UChar * len,Int32 * freq,Int32 alphaSize,Int32 maxLen)63df9de0ebSDavid E. O'Brien void BZ2_hbMakeCodeLengths ( UChar *len,
64df9de0ebSDavid E. O'Brien Int32 *freq,
65df9de0ebSDavid E. O'Brien Int32 alphaSize,
66df9de0ebSDavid E. O'Brien Int32 maxLen )
67df9de0ebSDavid E. O'Brien {
68df9de0ebSDavid E. O'Brien /*--
69df9de0ebSDavid E. O'Brien Nodes and heap entries run from 1. Entry 0
70df9de0ebSDavid E. O'Brien for both the heap and nodes is a sentinel.
71df9de0ebSDavid E. O'Brien --*/
72df9de0ebSDavid E. O'Brien Int32 nNodes, nHeap, n1, n2, i, j, k;
73df9de0ebSDavid E. O'Brien Bool tooLong;
74df9de0ebSDavid E. O'Brien
75df9de0ebSDavid E. O'Brien Int32 heap [ BZ_MAX_ALPHA_SIZE + 2 ];
76df9de0ebSDavid E. O'Brien Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ];
77df9de0ebSDavid E. O'Brien Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ];
78df9de0ebSDavid E. O'Brien
79df9de0ebSDavid E. O'Brien for (i = 0; i < alphaSize; i++)
80df9de0ebSDavid E. O'Brien weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
81df9de0ebSDavid E. O'Brien
82df9de0ebSDavid E. O'Brien while (True) {
83df9de0ebSDavid E. O'Brien
84df9de0ebSDavid E. O'Brien nNodes = alphaSize;
85df9de0ebSDavid E. O'Brien nHeap = 0;
86df9de0ebSDavid E. O'Brien
87df9de0ebSDavid E. O'Brien heap[0] = 0;
88df9de0ebSDavid E. O'Brien weight[0] = 0;
89df9de0ebSDavid E. O'Brien parent[0] = -2;
90df9de0ebSDavid E. O'Brien
91df9de0ebSDavid E. O'Brien for (i = 1; i <= alphaSize; i++) {
92df9de0ebSDavid E. O'Brien parent[i] = -1;
93df9de0ebSDavid E. O'Brien nHeap++;
94df9de0ebSDavid E. O'Brien heap[nHeap] = i;
95df9de0ebSDavid E. O'Brien UPHEAP(nHeap);
96df9de0ebSDavid E. O'Brien }
97df9de0ebSDavid E. O'Brien
98df9de0ebSDavid E. O'Brien AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 );
99df9de0ebSDavid E. O'Brien
100df9de0ebSDavid E. O'Brien while (nHeap > 1) {
101df9de0ebSDavid E. O'Brien n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
102df9de0ebSDavid E. O'Brien n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
103df9de0ebSDavid E. O'Brien nNodes++;
104df9de0ebSDavid E. O'Brien parent[n1] = parent[n2] = nNodes;
105df9de0ebSDavid E. O'Brien weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
106df9de0ebSDavid E. O'Brien parent[nNodes] = -1;
107df9de0ebSDavid E. O'Brien nHeap++;
108df9de0ebSDavid E. O'Brien heap[nHeap] = nNodes;
109df9de0ebSDavid E. O'Brien UPHEAP(nHeap);
110df9de0ebSDavid E. O'Brien }
111df9de0ebSDavid E. O'Brien
112df9de0ebSDavid E. O'Brien AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 );
113df9de0ebSDavid E. O'Brien
114df9de0ebSDavid E. O'Brien tooLong = False;
115df9de0ebSDavid E. O'Brien for (i = 1; i <= alphaSize; i++) {
116df9de0ebSDavid E. O'Brien j = 0;
117df9de0ebSDavid E. O'Brien k = i;
118df9de0ebSDavid E. O'Brien while (parent[k] >= 0) { k = parent[k]; j++; }
119df9de0ebSDavid E. O'Brien len[i-1] = j;
120df9de0ebSDavid E. O'Brien if (j > maxLen) tooLong = True;
121df9de0ebSDavid E. O'Brien }
122df9de0ebSDavid E. O'Brien
123df9de0ebSDavid E. O'Brien if (! tooLong) break;
124df9de0ebSDavid E. O'Brien
125f7a4f99fSDavid E. O'Brien /* 17 Oct 04: keep-going condition for the following loop used
126f7a4f99fSDavid E. O'Brien to be 'i < alphaSize', which missed the last element,
127f7a4f99fSDavid E. O'Brien theoretically leading to the possibility of the compressor
128f7a4f99fSDavid E. O'Brien looping. However, this count-scaling step is only needed if
129f7a4f99fSDavid E. O'Brien one of the generated Huffman code words is longer than
130f7a4f99fSDavid E. O'Brien maxLen, which up to and including version 1.0.2 was 20 bits,
131f7a4f99fSDavid E. O'Brien which is extremely unlikely. In version 1.0.3 maxLen was
132f7a4f99fSDavid E. O'Brien changed to 17 bits, which has minimal effect on compression
133f7a4f99fSDavid E. O'Brien ratio, but does mean this scaling step is used from time to
134f7a4f99fSDavid E. O'Brien time, enough to verify that it works.
135f7a4f99fSDavid E. O'Brien
136f7a4f99fSDavid E. O'Brien This means that bzip2-1.0.3 and later will only produce
137f7a4f99fSDavid E. O'Brien Huffman codes with a maximum length of 17 bits. However, in
138f7a4f99fSDavid E. O'Brien order to preserve backwards compatibility with bitstreams
139f7a4f99fSDavid E. O'Brien produced by versions pre-1.0.3, the decompressor must still
140f7a4f99fSDavid E. O'Brien handle lengths of up to 20. */
141f7a4f99fSDavid E. O'Brien
142f7a4f99fSDavid E. O'Brien for (i = 1; i <= alphaSize; i++) {
143df9de0ebSDavid E. O'Brien j = weight[i] >> 8;
144df9de0ebSDavid E. O'Brien j = 1 + (j / 2);
145df9de0ebSDavid E. O'Brien weight[i] = j << 8;
146df9de0ebSDavid E. O'Brien }
147df9de0ebSDavid E. O'Brien }
148df9de0ebSDavid E. O'Brien }
149df9de0ebSDavid E. O'Brien
150df9de0ebSDavid E. O'Brien
151df9de0ebSDavid E. O'Brien /*---------------------------------------------------*/
BZ2_hbAssignCodes(Int32 * code,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)152df9de0ebSDavid E. O'Brien void BZ2_hbAssignCodes ( Int32 *code,
153df9de0ebSDavid E. O'Brien UChar *length,
154df9de0ebSDavid E. O'Brien Int32 minLen,
155df9de0ebSDavid E. O'Brien Int32 maxLen,
156df9de0ebSDavid E. O'Brien Int32 alphaSize )
157df9de0ebSDavid E. O'Brien {
158df9de0ebSDavid E. O'Brien Int32 n, vec, i;
159df9de0ebSDavid E. O'Brien
160df9de0ebSDavid E. O'Brien vec = 0;
161df9de0ebSDavid E. O'Brien for (n = minLen; n <= maxLen; n++) {
162df9de0ebSDavid E. O'Brien for (i = 0; i < alphaSize; i++)
163df9de0ebSDavid E. O'Brien if (length[i] == n) { code[i] = vec; vec++; };
164df9de0ebSDavid E. O'Brien vec <<= 1;
165df9de0ebSDavid E. O'Brien }
166df9de0ebSDavid E. O'Brien }
167df9de0ebSDavid E. O'Brien
168df9de0ebSDavid E. O'Brien
169df9de0ebSDavid E. O'Brien /*---------------------------------------------------*/
BZ2_hbCreateDecodeTables(Int32 * limit,Int32 * base,Int32 * perm,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)170df9de0ebSDavid E. O'Brien void BZ2_hbCreateDecodeTables ( Int32 *limit,
171df9de0ebSDavid E. O'Brien Int32 *base,
172df9de0ebSDavid E. O'Brien Int32 *perm,
173df9de0ebSDavid E. O'Brien UChar *length,
174df9de0ebSDavid E. O'Brien Int32 minLen,
175df9de0ebSDavid E. O'Brien Int32 maxLen,
176df9de0ebSDavid E. O'Brien Int32 alphaSize )
177df9de0ebSDavid E. O'Brien {
178df9de0ebSDavid E. O'Brien Int32 pp, i, j, vec;
179df9de0ebSDavid E. O'Brien
180df9de0ebSDavid E. O'Brien pp = 0;
181df9de0ebSDavid E. O'Brien for (i = minLen; i <= maxLen; i++)
182df9de0ebSDavid E. O'Brien for (j = 0; j < alphaSize; j++)
183df9de0ebSDavid E. O'Brien if (length[j] == i) { perm[pp] = j; pp++; };
184df9de0ebSDavid E. O'Brien
185df9de0ebSDavid E. O'Brien for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0;
186df9de0ebSDavid E. O'Brien for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
187df9de0ebSDavid E. O'Brien
188df9de0ebSDavid E. O'Brien for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1];
189df9de0ebSDavid E. O'Brien
190df9de0ebSDavid E. O'Brien for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0;
191df9de0ebSDavid E. O'Brien vec = 0;
192df9de0ebSDavid E. O'Brien
193df9de0ebSDavid E. O'Brien for (i = minLen; i <= maxLen; i++) {
194df9de0ebSDavid E. O'Brien vec += (base[i+1] - base[i]);
195df9de0ebSDavid E. O'Brien limit[i] = vec-1;
196df9de0ebSDavid E. O'Brien vec <<= 1;
197df9de0ebSDavid E. O'Brien }
198df9de0ebSDavid E. O'Brien for (i = minLen + 1; i <= maxLen; i++)
199df9de0ebSDavid E. O'Brien base[i] = ((limit[i-1] + 1) << 1) - base[i];
200df9de0ebSDavid E. O'Brien }
201df9de0ebSDavid E. O'Brien
202df9de0ebSDavid E. O'Brien
203df9de0ebSDavid E. O'Brien /*-------------------------------------------------------------*/
204df9de0ebSDavid E. O'Brien /*--- end huffman.c ---*/
205df9de0ebSDavid E. O'Brien /*-------------------------------------------------------------*/
206