171e7ee59SPeter Avalos
271e7ee59SPeter Avalos /*-------------------------------------------------------------*/
371e7ee59SPeter Avalos /*--- Huffman coding low-level stuff ---*/
471e7ee59SPeter Avalos /*--- huffman.c ---*/
571e7ee59SPeter Avalos /*-------------------------------------------------------------*/
671e7ee59SPeter Avalos
771e7ee59SPeter Avalos /* ------------------------------------------------------------------
871e7ee59SPeter Avalos This file is part of bzip2/libbzip2, a program and library for
971e7ee59SPeter Avalos lossless, block-sorting data compression.
1071e7ee59SPeter Avalos
11*86954436SDaniel Fojt bzip2/libbzip2 version 1.0.8 of 13 July 2019
12*86954436SDaniel Fojt Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
1371e7ee59SPeter Avalos
1471e7ee59SPeter Avalos Please read the WARNING, DISCLAIMER and PATENTS sections in the
1571e7ee59SPeter Avalos README file.
1671e7ee59SPeter Avalos
1771e7ee59SPeter Avalos This program is released under the terms of the license contained
1871e7ee59SPeter Avalos in the file LICENSE.
1971e7ee59SPeter Avalos ------------------------------------------------------------------ */
2071e7ee59SPeter Avalos
2171e7ee59SPeter Avalos
2271e7ee59SPeter Avalos #include "bzlib_private.h"
2371e7ee59SPeter Avalos
2471e7ee59SPeter Avalos /*---------------------------------------------------*/
2571e7ee59SPeter Avalos #define WEIGHTOF(zz0) ((zz0) & 0xffffff00)
2671e7ee59SPeter Avalos #define DEPTHOF(zz1) ((zz1) & 0x000000ff)
2771e7ee59SPeter Avalos #define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
2871e7ee59SPeter Avalos
2971e7ee59SPeter Avalos #define ADDWEIGHTS(zw1,zw2) \
3071e7ee59SPeter Avalos (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \
3171e7ee59SPeter Avalos (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
3271e7ee59SPeter Avalos
3371e7ee59SPeter Avalos #define UPHEAP(z) \
3471e7ee59SPeter Avalos { \
3571e7ee59SPeter Avalos Int32 zz, tmp; \
3671e7ee59SPeter Avalos zz = z; tmp = heap[zz]; \
3771e7ee59SPeter Avalos while (weight[tmp] < weight[heap[zz >> 1]]) { \
3871e7ee59SPeter Avalos heap[zz] = heap[zz >> 1]; \
3971e7ee59SPeter Avalos zz >>= 1; \
4071e7ee59SPeter Avalos } \
4171e7ee59SPeter Avalos heap[zz] = tmp; \
4271e7ee59SPeter Avalos }
4371e7ee59SPeter Avalos
4471e7ee59SPeter Avalos #define DOWNHEAP(z) \
4571e7ee59SPeter Avalos { \
4671e7ee59SPeter Avalos Int32 zz, yy, tmp; \
4771e7ee59SPeter Avalos zz = z; tmp = heap[zz]; \
4871e7ee59SPeter Avalos while (True) { \
4971e7ee59SPeter Avalos yy = zz << 1; \
5071e7ee59SPeter Avalos if (yy > nHeap) break; \
5171e7ee59SPeter Avalos if (yy < nHeap && \
5271e7ee59SPeter Avalos weight[heap[yy+1]] < weight[heap[yy]]) \
5371e7ee59SPeter Avalos yy++; \
5471e7ee59SPeter Avalos if (weight[tmp] < weight[heap[yy]]) break; \
5571e7ee59SPeter Avalos heap[zz] = heap[yy]; \
5671e7ee59SPeter Avalos zz = yy; \
5771e7ee59SPeter Avalos } \
5871e7ee59SPeter Avalos heap[zz] = tmp; \
5971e7ee59SPeter Avalos }
6071e7ee59SPeter Avalos
6171e7ee59SPeter Avalos
6271e7ee59SPeter Avalos /*---------------------------------------------------*/
BZ2_hbMakeCodeLengths(UChar * len,Int32 * freq,Int32 alphaSize,Int32 maxLen)6371e7ee59SPeter Avalos void BZ2_hbMakeCodeLengths ( UChar *len,
6471e7ee59SPeter Avalos Int32 *freq,
6571e7ee59SPeter Avalos Int32 alphaSize,
6671e7ee59SPeter Avalos Int32 maxLen )
6771e7ee59SPeter Avalos {
6871e7ee59SPeter Avalos /*--
6971e7ee59SPeter Avalos Nodes and heap entries run from 1. Entry 0
7071e7ee59SPeter Avalos for both the heap and nodes is a sentinel.
7171e7ee59SPeter Avalos --*/
7271e7ee59SPeter Avalos Int32 nNodes, nHeap, n1, n2, i, j, k;
7371e7ee59SPeter Avalos Bool tooLong;
7471e7ee59SPeter Avalos
7571e7ee59SPeter Avalos Int32 heap [ BZ_MAX_ALPHA_SIZE + 2 ];
7671e7ee59SPeter Avalos Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ];
7771e7ee59SPeter Avalos Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ];
7871e7ee59SPeter Avalos
7971e7ee59SPeter Avalos for (i = 0; i < alphaSize; i++)
8071e7ee59SPeter Avalos weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
8171e7ee59SPeter Avalos
8271e7ee59SPeter Avalos while (True) {
8371e7ee59SPeter Avalos
8471e7ee59SPeter Avalos nNodes = alphaSize;
8571e7ee59SPeter Avalos nHeap = 0;
8671e7ee59SPeter Avalos
8771e7ee59SPeter Avalos heap[0] = 0;
8871e7ee59SPeter Avalos weight[0] = 0;
8971e7ee59SPeter Avalos parent[0] = -2;
9071e7ee59SPeter Avalos
9171e7ee59SPeter Avalos for (i = 1; i <= alphaSize; i++) {
9271e7ee59SPeter Avalos parent[i] = -1;
9371e7ee59SPeter Avalos nHeap++;
9471e7ee59SPeter Avalos heap[nHeap] = i;
9571e7ee59SPeter Avalos UPHEAP(nHeap);
9671e7ee59SPeter Avalos }
9771e7ee59SPeter Avalos
9871e7ee59SPeter Avalos AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 );
9971e7ee59SPeter Avalos
10071e7ee59SPeter Avalos while (nHeap > 1) {
10171e7ee59SPeter Avalos n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
10271e7ee59SPeter Avalos n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
10371e7ee59SPeter Avalos nNodes++;
10471e7ee59SPeter Avalos parent[n1] = parent[n2] = nNodes;
10571e7ee59SPeter Avalos weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
10671e7ee59SPeter Avalos parent[nNodes] = -1;
10771e7ee59SPeter Avalos nHeap++;
10871e7ee59SPeter Avalos heap[nHeap] = nNodes;
10971e7ee59SPeter Avalos UPHEAP(nHeap);
11071e7ee59SPeter Avalos }
11171e7ee59SPeter Avalos
11271e7ee59SPeter Avalos AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 );
11371e7ee59SPeter Avalos
11471e7ee59SPeter Avalos tooLong = False;
11571e7ee59SPeter Avalos for (i = 1; i <= alphaSize; i++) {
11671e7ee59SPeter Avalos j = 0;
11771e7ee59SPeter Avalos k = i;
11871e7ee59SPeter Avalos while (parent[k] >= 0) { k = parent[k]; j++; }
11971e7ee59SPeter Avalos len[i-1] = j;
12071e7ee59SPeter Avalos if (j > maxLen) tooLong = True;
12171e7ee59SPeter Avalos }
12271e7ee59SPeter Avalos
12371e7ee59SPeter Avalos if (! tooLong) break;
12471e7ee59SPeter Avalos
12571e7ee59SPeter Avalos /* 17 Oct 04: keep-going condition for the following loop used
12671e7ee59SPeter Avalos to be 'i < alphaSize', which missed the last element,
12771e7ee59SPeter Avalos theoretically leading to the possibility of the compressor
12871e7ee59SPeter Avalos looping. However, this count-scaling step is only needed if
12971e7ee59SPeter Avalos one of the generated Huffman code words is longer than
13071e7ee59SPeter Avalos maxLen, which up to and including version 1.0.2 was 20 bits,
13171e7ee59SPeter Avalos which is extremely unlikely. In version 1.0.3 maxLen was
13271e7ee59SPeter Avalos changed to 17 bits, which has minimal effect on compression
13371e7ee59SPeter Avalos ratio, but does mean this scaling step is used from time to
13471e7ee59SPeter Avalos time, enough to verify that it works.
13571e7ee59SPeter Avalos
13671e7ee59SPeter Avalos This means that bzip2-1.0.3 and later will only produce
13771e7ee59SPeter Avalos Huffman codes with a maximum length of 17 bits. However, in
13871e7ee59SPeter Avalos order to preserve backwards compatibility with bitstreams
13971e7ee59SPeter Avalos produced by versions pre-1.0.3, the decompressor must still
14071e7ee59SPeter Avalos handle lengths of up to 20. */
14171e7ee59SPeter Avalos
14271e7ee59SPeter Avalos for (i = 1; i <= alphaSize; i++) {
14371e7ee59SPeter Avalos j = weight[i] >> 8;
14471e7ee59SPeter Avalos j = 1 + (j / 2);
14571e7ee59SPeter Avalos weight[i] = j << 8;
14671e7ee59SPeter Avalos }
14771e7ee59SPeter Avalos }
14871e7ee59SPeter Avalos }
14971e7ee59SPeter Avalos
15071e7ee59SPeter Avalos
15171e7ee59SPeter Avalos /*---------------------------------------------------*/
BZ2_hbAssignCodes(Int32 * code,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)15271e7ee59SPeter Avalos void BZ2_hbAssignCodes ( Int32 *code,
15371e7ee59SPeter Avalos UChar *length,
15471e7ee59SPeter Avalos Int32 minLen,
15571e7ee59SPeter Avalos Int32 maxLen,
15671e7ee59SPeter Avalos Int32 alphaSize )
15771e7ee59SPeter Avalos {
15871e7ee59SPeter Avalos Int32 n, vec, i;
15971e7ee59SPeter Avalos
16071e7ee59SPeter Avalos vec = 0;
16171e7ee59SPeter Avalos for (n = minLen; n <= maxLen; n++) {
16271e7ee59SPeter Avalos for (i = 0; i < alphaSize; i++)
16371e7ee59SPeter Avalos if (length[i] == n) { code[i] = vec; vec++; };
16471e7ee59SPeter Avalos vec <<= 1;
16571e7ee59SPeter Avalos }
16671e7ee59SPeter Avalos }
16771e7ee59SPeter Avalos
16871e7ee59SPeter Avalos
16971e7ee59SPeter Avalos /*---------------------------------------------------*/
BZ2_hbCreateDecodeTables(Int32 * limit,Int32 * base,Int32 * perm,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)17071e7ee59SPeter Avalos void BZ2_hbCreateDecodeTables ( Int32 *limit,
17171e7ee59SPeter Avalos Int32 *base,
17271e7ee59SPeter Avalos Int32 *perm,
17371e7ee59SPeter Avalos UChar *length,
17471e7ee59SPeter Avalos Int32 minLen,
17571e7ee59SPeter Avalos Int32 maxLen,
17671e7ee59SPeter Avalos Int32 alphaSize )
17771e7ee59SPeter Avalos {
17871e7ee59SPeter Avalos Int32 pp, i, j, vec;
17971e7ee59SPeter Avalos
18071e7ee59SPeter Avalos pp = 0;
18171e7ee59SPeter Avalos for (i = minLen; i <= maxLen; i++)
18271e7ee59SPeter Avalos for (j = 0; j < alphaSize; j++)
18371e7ee59SPeter Avalos if (length[j] == i) { perm[pp] = j; pp++; };
18471e7ee59SPeter Avalos
18571e7ee59SPeter Avalos for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0;
18671e7ee59SPeter Avalos for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
18771e7ee59SPeter Avalos
18871e7ee59SPeter Avalos for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1];
18971e7ee59SPeter Avalos
19071e7ee59SPeter Avalos for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0;
19171e7ee59SPeter Avalos vec = 0;
19271e7ee59SPeter Avalos
19371e7ee59SPeter Avalos for (i = minLen; i <= maxLen; i++) {
19471e7ee59SPeter Avalos vec += (base[i+1] - base[i]);
19571e7ee59SPeter Avalos limit[i] = vec-1;
19671e7ee59SPeter Avalos vec <<= 1;
19771e7ee59SPeter Avalos }
19871e7ee59SPeter Avalos for (i = minLen + 1; i <= maxLen; i++)
19971e7ee59SPeter Avalos base[i] = ((limit[i-1] + 1) << 1) - base[i];
20071e7ee59SPeter Avalos }
20171e7ee59SPeter Avalos
20271e7ee59SPeter Avalos
20371e7ee59SPeter Avalos /*-------------------------------------------------------------*/
20471e7ee59SPeter Avalos /*--- end huffman.c ---*/
20571e7ee59SPeter Avalos /*-------------------------------------------------------------*/
206