1*c12ab3f1Smaya /* $NetBSD: huffman.c,v 1.1.1.3 2019/07/21 11:35:30 maya Exp $ */
24f9a1459Swiz
34f9a1459Swiz
44f9a1459Swiz /*-------------------------------------------------------------*/
54f9a1459Swiz /*--- Huffman coding low-level stuff ---*/
64f9a1459Swiz /*--- huffman.c ---*/
74f9a1459Swiz /*-------------------------------------------------------------*/
84f9a1459Swiz
94f9a1459Swiz /* ------------------------------------------------------------------
104f9a1459Swiz This file is part of bzip2/libbzip2, a program and library for
114f9a1459Swiz lossless, block-sorting data compression.
124f9a1459Swiz
13*c12ab3f1Smaya bzip2/libbzip2 version 1.0.8 of 13 July 2019
14*c12ab3f1Smaya Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
154f9a1459Swiz
164f9a1459Swiz Please read the WARNING, DISCLAIMER and PATENTS sections in the
174f9a1459Swiz README file.
184f9a1459Swiz
194f9a1459Swiz This program is released under the terms of the license contained
204f9a1459Swiz in the file LICENSE.
214f9a1459Swiz ------------------------------------------------------------------ */
224f9a1459Swiz
234f9a1459Swiz
244f9a1459Swiz #include "bzlib_private.h"
254f9a1459Swiz
264f9a1459Swiz /*---------------------------------------------------*/
274f9a1459Swiz #define WEIGHTOF(zz0) ((zz0) & 0xffffff00)
284f9a1459Swiz #define DEPTHOF(zz1) ((zz1) & 0x000000ff)
294f9a1459Swiz #define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
304f9a1459Swiz
314f9a1459Swiz #define ADDWEIGHTS(zw1,zw2) \
324f9a1459Swiz (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \
334f9a1459Swiz (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
344f9a1459Swiz
354f9a1459Swiz #define UPHEAP(z) \
364f9a1459Swiz { \
374f9a1459Swiz Int32 zz, tmp; \
384f9a1459Swiz zz = z; tmp = heap[zz]; \
394f9a1459Swiz while (weight[tmp] < weight[heap[zz >> 1]]) { \
404f9a1459Swiz heap[zz] = heap[zz >> 1]; \
414f9a1459Swiz zz >>= 1; \
424f9a1459Swiz } \
434f9a1459Swiz heap[zz] = tmp; \
444f9a1459Swiz }
454f9a1459Swiz
464f9a1459Swiz #define DOWNHEAP(z) \
474f9a1459Swiz { \
484f9a1459Swiz Int32 zz, yy, tmp; \
494f9a1459Swiz zz = z; tmp = heap[zz]; \
504f9a1459Swiz while (True) { \
514f9a1459Swiz yy = zz << 1; \
524f9a1459Swiz if (yy > nHeap) break; \
534f9a1459Swiz if (yy < nHeap && \
544f9a1459Swiz weight[heap[yy+1]] < weight[heap[yy]]) \
554f9a1459Swiz yy++; \
564f9a1459Swiz if (weight[tmp] < weight[heap[yy]]) break; \
574f9a1459Swiz heap[zz] = heap[yy]; \
584f9a1459Swiz zz = yy; \
594f9a1459Swiz } \
604f9a1459Swiz heap[zz] = tmp; \
614f9a1459Swiz }
624f9a1459Swiz
634f9a1459Swiz
644f9a1459Swiz /*---------------------------------------------------*/
BZ2_hbMakeCodeLengths(UChar * len,Int32 * freq,Int32 alphaSize,Int32 maxLen)654f9a1459Swiz void BZ2_hbMakeCodeLengths ( UChar *len,
664f9a1459Swiz Int32 *freq,
674f9a1459Swiz Int32 alphaSize,
684f9a1459Swiz Int32 maxLen )
694f9a1459Swiz {
704f9a1459Swiz /*--
714f9a1459Swiz Nodes and heap entries run from 1. Entry 0
724f9a1459Swiz for both the heap and nodes is a sentinel.
734f9a1459Swiz --*/
744f9a1459Swiz Int32 nNodes, nHeap, n1, n2, i, j, k;
754f9a1459Swiz Bool tooLong;
764f9a1459Swiz
774f9a1459Swiz Int32 heap [ BZ_MAX_ALPHA_SIZE + 2 ];
784f9a1459Swiz Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ];
794f9a1459Swiz Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ];
804f9a1459Swiz
814f9a1459Swiz for (i = 0; i < alphaSize; i++)
824f9a1459Swiz weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
834f9a1459Swiz
844f9a1459Swiz while (True) {
854f9a1459Swiz
864f9a1459Swiz nNodes = alphaSize;
874f9a1459Swiz nHeap = 0;
884f9a1459Swiz
894f9a1459Swiz heap[0] = 0;
904f9a1459Swiz weight[0] = 0;
914f9a1459Swiz parent[0] = -2;
924f9a1459Swiz
934f9a1459Swiz for (i = 1; i <= alphaSize; i++) {
944f9a1459Swiz parent[i] = -1;
954f9a1459Swiz nHeap++;
964f9a1459Swiz heap[nHeap] = i;
974f9a1459Swiz UPHEAP(nHeap);
984f9a1459Swiz }
994f9a1459Swiz
1004f9a1459Swiz AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 );
1014f9a1459Swiz
1024f9a1459Swiz while (nHeap > 1) {
1034f9a1459Swiz n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
1044f9a1459Swiz n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
1054f9a1459Swiz nNodes++;
1064f9a1459Swiz parent[n1] = parent[n2] = nNodes;
1074f9a1459Swiz weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
1084f9a1459Swiz parent[nNodes] = -1;
1094f9a1459Swiz nHeap++;
1104f9a1459Swiz heap[nHeap] = nNodes;
1114f9a1459Swiz UPHEAP(nHeap);
1124f9a1459Swiz }
1134f9a1459Swiz
1144f9a1459Swiz AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 );
1154f9a1459Swiz
1164f9a1459Swiz tooLong = False;
1174f9a1459Swiz for (i = 1; i <= alphaSize; i++) {
1184f9a1459Swiz j = 0;
1194f9a1459Swiz k = i;
1204f9a1459Swiz while (parent[k] >= 0) { k = parent[k]; j++; }
1214f9a1459Swiz len[i-1] = j;
1224f9a1459Swiz if (j > maxLen) tooLong = True;
1234f9a1459Swiz }
1244f9a1459Swiz
1254f9a1459Swiz if (! tooLong) break;
1264f9a1459Swiz
1274f9a1459Swiz /* 17 Oct 04: keep-going condition for the following loop used
1284f9a1459Swiz to be 'i < alphaSize', which missed the last element,
1294f9a1459Swiz theoretically leading to the possibility of the compressor
1304f9a1459Swiz looping. However, this count-scaling step is only needed if
1314f9a1459Swiz one of the generated Huffman code words is longer than
1324f9a1459Swiz maxLen, which up to and including version 1.0.2 was 20 bits,
1334f9a1459Swiz which is extremely unlikely. In version 1.0.3 maxLen was
1344f9a1459Swiz changed to 17 bits, which has minimal effect on compression
1354f9a1459Swiz ratio, but does mean this scaling step is used from time to
1364f9a1459Swiz time, enough to verify that it works.
1374f9a1459Swiz
1384f9a1459Swiz This means that bzip2-1.0.3 and later will only produce
1394f9a1459Swiz Huffman codes with a maximum length of 17 bits. However, in
1404f9a1459Swiz order to preserve backwards compatibility with bitstreams
1414f9a1459Swiz produced by versions pre-1.0.3, the decompressor must still
1424f9a1459Swiz handle lengths of up to 20. */
1434f9a1459Swiz
1444f9a1459Swiz for (i = 1; i <= alphaSize; i++) {
1454f9a1459Swiz j = weight[i] >> 8;
1464f9a1459Swiz j = 1 + (j / 2);
1474f9a1459Swiz weight[i] = j << 8;
1484f9a1459Swiz }
1494f9a1459Swiz }
1504f9a1459Swiz }
1514f9a1459Swiz
1524f9a1459Swiz
1534f9a1459Swiz /*---------------------------------------------------*/
BZ2_hbAssignCodes(Int32 * code,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)1544f9a1459Swiz void BZ2_hbAssignCodes ( Int32 *code,
1554f9a1459Swiz UChar *length,
1564f9a1459Swiz Int32 minLen,
1574f9a1459Swiz Int32 maxLen,
1584f9a1459Swiz Int32 alphaSize )
1594f9a1459Swiz {
1604f9a1459Swiz Int32 n, vec, i;
1614f9a1459Swiz
1624f9a1459Swiz vec = 0;
1634f9a1459Swiz for (n = minLen; n <= maxLen; n++) {
1644f9a1459Swiz for (i = 0; i < alphaSize; i++)
1654f9a1459Swiz if (length[i] == n) { code[i] = vec; vec++; };
1664f9a1459Swiz vec <<= 1;
1674f9a1459Swiz }
1684f9a1459Swiz }
1694f9a1459Swiz
1704f9a1459Swiz
1714f9a1459Swiz /*---------------------------------------------------*/
BZ2_hbCreateDecodeTables(Int32 * limit,Int32 * base,Int32 * perm,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)1724f9a1459Swiz void BZ2_hbCreateDecodeTables ( Int32 *limit,
1734f9a1459Swiz Int32 *base,
1744f9a1459Swiz Int32 *perm,
1754f9a1459Swiz UChar *length,
1764f9a1459Swiz Int32 minLen,
1774f9a1459Swiz Int32 maxLen,
1784f9a1459Swiz Int32 alphaSize )
1794f9a1459Swiz {
1804f9a1459Swiz Int32 pp, i, j, vec;
1814f9a1459Swiz
1824f9a1459Swiz pp = 0;
1834f9a1459Swiz for (i = minLen; i <= maxLen; i++)
1844f9a1459Swiz for (j = 0; j < alphaSize; j++)
1854f9a1459Swiz if (length[j] == i) { perm[pp] = j; pp++; };
1864f9a1459Swiz
1874f9a1459Swiz for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0;
1884f9a1459Swiz for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
1894f9a1459Swiz
1904f9a1459Swiz for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1];
1914f9a1459Swiz
1924f9a1459Swiz for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0;
1934f9a1459Swiz vec = 0;
1944f9a1459Swiz
1954f9a1459Swiz for (i = minLen; i <= maxLen; i++) {
1964f9a1459Swiz vec += (base[i+1] - base[i]);
1974f9a1459Swiz limit[i] = vec-1;
1984f9a1459Swiz vec <<= 1;
1994f9a1459Swiz }
2004f9a1459Swiz for (i = minLen + 1; i <= maxLen; i++)
2014f9a1459Swiz base[i] = ((limit[i-1] + 1) << 1) - base[i];
2024f9a1459Swiz }
2034f9a1459Swiz
2044f9a1459Swiz
2054f9a1459Swiz /*-------------------------------------------------------------*/
2064f9a1459Swiz /*--- end huffman.c ---*/
2074f9a1459Swiz /*-------------------------------------------------------------*/
208