xref: /netbsd-src/external/bsd/bzip2/dist/huffman.c (revision c12ab3f1404d3e6320413d6099c78880423d6a49)
1*c12ab3f1Smaya /*	$NetBSD: huffman.c,v 1.1.1.3 2019/07/21 11:35:30 maya Exp $	*/
24f9a1459Swiz 
34f9a1459Swiz 
44f9a1459Swiz /*-------------------------------------------------------------*/
54f9a1459Swiz /*--- Huffman coding low-level stuff                        ---*/
64f9a1459Swiz /*---                                             huffman.c ---*/
74f9a1459Swiz /*-------------------------------------------------------------*/
84f9a1459Swiz 
94f9a1459Swiz /* ------------------------------------------------------------------
104f9a1459Swiz    This file is part of bzip2/libbzip2, a program and library for
114f9a1459Swiz    lossless, block-sorting data compression.
124f9a1459Swiz 
13*c12ab3f1Smaya    bzip2/libbzip2 version 1.0.8 of 13 July 2019
14*c12ab3f1Smaya    Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
154f9a1459Swiz 
164f9a1459Swiz    Please read the WARNING, DISCLAIMER and PATENTS sections in the
174f9a1459Swiz    README file.
184f9a1459Swiz 
194f9a1459Swiz    This program is released under the terms of the license contained
204f9a1459Swiz    in the file LICENSE.
214f9a1459Swiz    ------------------------------------------------------------------ */
224f9a1459Swiz 
234f9a1459Swiz 
244f9a1459Swiz #include "bzlib_private.h"
254f9a1459Swiz 
264f9a1459Swiz /*---------------------------------------------------*/
274f9a1459Swiz #define WEIGHTOF(zz0)  ((zz0) & 0xffffff00)
284f9a1459Swiz #define DEPTHOF(zz1)   ((zz1) & 0x000000ff)
294f9a1459Swiz #define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
304f9a1459Swiz 
314f9a1459Swiz #define ADDWEIGHTS(zw1,zw2)                           \
324f9a1459Swiz    (WEIGHTOF(zw1)+WEIGHTOF(zw2)) |                    \
334f9a1459Swiz    (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
344f9a1459Swiz 
354f9a1459Swiz #define UPHEAP(z)                                     \
364f9a1459Swiz {                                                     \
374f9a1459Swiz    Int32 zz, tmp;                                     \
384f9a1459Swiz    zz = z; tmp = heap[zz];                            \
394f9a1459Swiz    while (weight[tmp] < weight[heap[zz >> 1]]) {      \
404f9a1459Swiz       heap[zz] = heap[zz >> 1];                       \
414f9a1459Swiz       zz >>= 1;                                       \
424f9a1459Swiz    }                                                  \
434f9a1459Swiz    heap[zz] = tmp;                                    \
444f9a1459Swiz }
454f9a1459Swiz 
464f9a1459Swiz #define DOWNHEAP(z)                                   \
474f9a1459Swiz {                                                     \
484f9a1459Swiz    Int32 zz, yy, tmp;                                 \
494f9a1459Swiz    zz = z; tmp = heap[zz];                            \
504f9a1459Swiz    while (True) {                                     \
514f9a1459Swiz       yy = zz << 1;                                   \
524f9a1459Swiz       if (yy > nHeap) break;                          \
534f9a1459Swiz       if (yy < nHeap &&                               \
544f9a1459Swiz           weight[heap[yy+1]] < weight[heap[yy]])      \
554f9a1459Swiz          yy++;                                        \
564f9a1459Swiz       if (weight[tmp] < weight[heap[yy]]) break;      \
574f9a1459Swiz       heap[zz] = heap[yy];                            \
584f9a1459Swiz       zz = yy;                                        \
594f9a1459Swiz    }                                                  \
604f9a1459Swiz    heap[zz] = tmp;                                    \
614f9a1459Swiz }
624f9a1459Swiz 
634f9a1459Swiz 
644f9a1459Swiz /*---------------------------------------------------*/
BZ2_hbMakeCodeLengths(UChar * len,Int32 * freq,Int32 alphaSize,Int32 maxLen)654f9a1459Swiz void BZ2_hbMakeCodeLengths ( UChar *len,
664f9a1459Swiz                              Int32 *freq,
674f9a1459Swiz                              Int32 alphaSize,
684f9a1459Swiz                              Int32 maxLen )
694f9a1459Swiz {
704f9a1459Swiz    /*--
714f9a1459Swiz       Nodes and heap entries run from 1.  Entry 0
724f9a1459Swiz       for both the heap and nodes is a sentinel.
734f9a1459Swiz    --*/
744f9a1459Swiz    Int32 nNodes, nHeap, n1, n2, i, j, k;
754f9a1459Swiz    Bool  tooLong;
764f9a1459Swiz 
774f9a1459Swiz    Int32 heap   [ BZ_MAX_ALPHA_SIZE + 2 ];
784f9a1459Swiz    Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ];
794f9a1459Swiz    Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ];
804f9a1459Swiz 
814f9a1459Swiz    for (i = 0; i < alphaSize; i++)
824f9a1459Swiz       weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
834f9a1459Swiz 
844f9a1459Swiz    while (True) {
854f9a1459Swiz 
864f9a1459Swiz       nNodes = alphaSize;
874f9a1459Swiz       nHeap = 0;
884f9a1459Swiz 
894f9a1459Swiz       heap[0] = 0;
904f9a1459Swiz       weight[0] = 0;
914f9a1459Swiz       parent[0] = -2;
924f9a1459Swiz 
934f9a1459Swiz       for (i = 1; i <= alphaSize; i++) {
944f9a1459Swiz          parent[i] = -1;
954f9a1459Swiz          nHeap++;
964f9a1459Swiz          heap[nHeap] = i;
974f9a1459Swiz          UPHEAP(nHeap);
984f9a1459Swiz       }
994f9a1459Swiz 
1004f9a1459Swiz       AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 );
1014f9a1459Swiz 
1024f9a1459Swiz       while (nHeap > 1) {
1034f9a1459Swiz          n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
1044f9a1459Swiz          n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
1054f9a1459Swiz          nNodes++;
1064f9a1459Swiz          parent[n1] = parent[n2] = nNodes;
1074f9a1459Swiz          weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
1084f9a1459Swiz          parent[nNodes] = -1;
1094f9a1459Swiz          nHeap++;
1104f9a1459Swiz          heap[nHeap] = nNodes;
1114f9a1459Swiz          UPHEAP(nHeap);
1124f9a1459Swiz       }
1134f9a1459Swiz 
1144f9a1459Swiz       AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 );
1154f9a1459Swiz 
1164f9a1459Swiz       tooLong = False;
1174f9a1459Swiz       for (i = 1; i <= alphaSize; i++) {
1184f9a1459Swiz          j = 0;
1194f9a1459Swiz          k = i;
1204f9a1459Swiz          while (parent[k] >= 0) { k = parent[k]; j++; }
1214f9a1459Swiz          len[i-1] = j;
1224f9a1459Swiz          if (j > maxLen) tooLong = True;
1234f9a1459Swiz       }
1244f9a1459Swiz 
1254f9a1459Swiz       if (! tooLong) break;
1264f9a1459Swiz 
1274f9a1459Swiz       /* 17 Oct 04: keep-going condition for the following loop used
1284f9a1459Swiz          to be 'i < alphaSize', which missed the last element,
1294f9a1459Swiz          theoretically leading to the possibility of the compressor
1304f9a1459Swiz          looping.  However, this count-scaling step is only needed if
1314f9a1459Swiz          one of the generated Huffman code words is longer than
1324f9a1459Swiz          maxLen, which up to and including version 1.0.2 was 20 bits,
1334f9a1459Swiz          which is extremely unlikely.  In version 1.0.3 maxLen was
1344f9a1459Swiz          changed to 17 bits, which has minimal effect on compression
1354f9a1459Swiz          ratio, but does mean this scaling step is used from time to
1364f9a1459Swiz          time, enough to verify that it works.
1374f9a1459Swiz 
1384f9a1459Swiz          This means that bzip2-1.0.3 and later will only produce
1394f9a1459Swiz          Huffman codes with a maximum length of 17 bits.  However, in
1404f9a1459Swiz          order to preserve backwards compatibility with bitstreams
1414f9a1459Swiz          produced by versions pre-1.0.3, the decompressor must still
1424f9a1459Swiz          handle lengths of up to 20. */
1434f9a1459Swiz 
1444f9a1459Swiz       for (i = 1; i <= alphaSize; i++) {
1454f9a1459Swiz          j = weight[i] >> 8;
1464f9a1459Swiz          j = 1 + (j / 2);
1474f9a1459Swiz          weight[i] = j << 8;
1484f9a1459Swiz       }
1494f9a1459Swiz    }
1504f9a1459Swiz }
1514f9a1459Swiz 
1524f9a1459Swiz 
1534f9a1459Swiz /*---------------------------------------------------*/
BZ2_hbAssignCodes(Int32 * code,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)1544f9a1459Swiz void BZ2_hbAssignCodes ( Int32 *code,
1554f9a1459Swiz                          UChar *length,
1564f9a1459Swiz                          Int32 minLen,
1574f9a1459Swiz                          Int32 maxLen,
1584f9a1459Swiz                          Int32 alphaSize )
1594f9a1459Swiz {
1604f9a1459Swiz    Int32 n, vec, i;
1614f9a1459Swiz 
1624f9a1459Swiz    vec = 0;
1634f9a1459Swiz    for (n = minLen; n <= maxLen; n++) {
1644f9a1459Swiz       for (i = 0; i < alphaSize; i++)
1654f9a1459Swiz          if (length[i] == n) { code[i] = vec; vec++; };
1664f9a1459Swiz       vec <<= 1;
1674f9a1459Swiz    }
1684f9a1459Swiz }
1694f9a1459Swiz 
1704f9a1459Swiz 
1714f9a1459Swiz /*---------------------------------------------------*/
BZ2_hbCreateDecodeTables(Int32 * limit,Int32 * base,Int32 * perm,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)1724f9a1459Swiz void BZ2_hbCreateDecodeTables ( Int32 *limit,
1734f9a1459Swiz                                 Int32 *base,
1744f9a1459Swiz                                 Int32 *perm,
1754f9a1459Swiz                                 UChar *length,
1764f9a1459Swiz                                 Int32 minLen,
1774f9a1459Swiz                                 Int32 maxLen,
1784f9a1459Swiz                                 Int32 alphaSize )
1794f9a1459Swiz {
1804f9a1459Swiz    Int32 pp, i, j, vec;
1814f9a1459Swiz 
1824f9a1459Swiz    pp = 0;
1834f9a1459Swiz    for (i = minLen; i <= maxLen; i++)
1844f9a1459Swiz       for (j = 0; j < alphaSize; j++)
1854f9a1459Swiz          if (length[j] == i) { perm[pp] = j; pp++; };
1864f9a1459Swiz 
1874f9a1459Swiz    for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0;
1884f9a1459Swiz    for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
1894f9a1459Swiz 
1904f9a1459Swiz    for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1];
1914f9a1459Swiz 
1924f9a1459Swiz    for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0;
1934f9a1459Swiz    vec = 0;
1944f9a1459Swiz 
1954f9a1459Swiz    for (i = minLen; i <= maxLen; i++) {
1964f9a1459Swiz       vec += (base[i+1] - base[i]);
1974f9a1459Swiz       limit[i] = vec-1;
1984f9a1459Swiz       vec <<= 1;
1994f9a1459Swiz    }
2004f9a1459Swiz    for (i = minLen + 1; i <= maxLen; i++)
2014f9a1459Swiz       base[i] = ((limit[i-1] + 1) << 1) - base[i];
2024f9a1459Swiz }
2034f9a1459Swiz 
2044f9a1459Swiz 
2054f9a1459Swiz /*-------------------------------------------------------------*/
2064f9a1459Swiz /*--- end                                         huffman.c ---*/
2074f9a1459Swiz /*-------------------------------------------------------------*/
208