10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
50Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only
60Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance
70Sstevel@tonic-gate * with the License.
80Sstevel@tonic-gate *
90Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
100Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
110Sstevel@tonic-gate * See the License for the specific language governing permissions
120Sstevel@tonic-gate * and limitations under the License.
130Sstevel@tonic-gate *
140Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
150Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
160Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
170Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
180Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
190Sstevel@tonic-gate *
200Sstevel@tonic-gate * CDDL HEADER END
210Sstevel@tonic-gate */
220Sstevel@tonic-gate /*
23*239Sceastha * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
240Sstevel@tonic-gate * Use is subject to license terms.
250Sstevel@tonic-gate */
260Sstevel@tonic-gate
270Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
280Sstevel@tonic-gate /* All Rights Reserved */
290Sstevel@tonic-gate
300Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI"
310Sstevel@tonic-gate
320Sstevel@tonic-gate
330Sstevel@tonic-gate #include <unistd.h>
340Sstevel@tonic-gate #include <stdlib.h>
350Sstevel@tonic-gate #include <stdio.h>
360Sstevel@tonic-gate
370Sstevel@tonic-gate #define BYTE 8
380Sstevel@tonic-gate #define QW 1 /* width of bas-q digit in bits */
390Sstevel@tonic-gate
400Sstevel@tonic-gate /*
410Sstevel@tonic-gate * this stuff should be local and hidden; it was made
420Sstevel@tonic-gate * accessible outside for dirty reasons: 20% faster spell
430Sstevel@tonic-gate */
440Sstevel@tonic-gate #include "huff.h"
450Sstevel@tonic-gate struct huff huffcode;
460Sstevel@tonic-gate
470Sstevel@tonic-gate /*
480Sstevel@tonic-gate * Infinite Huffman code
490Sstevel@tonic-gate *
500Sstevel@tonic-gate * Let the messages be exponentially distributed with ratio r:
510Sstevel@tonic-gate * P {message k} = r^k*(1-r), k = 0, 1, ...
520Sstevel@tonic-gate * Let the messages be coded in base q, and suppose
530Sstevel@tonic-gate * r^n = 1/q
540Sstevel@tonic-gate * If each decade(base q) contains n codes, then
550Sstevel@tonic-gate * the messages assigned to each decade will be q times
560Sstevel@tonic-gate * as probable as the next. Moreover the code for the tail of
570Sstevel@tonic-gate * the distribution after truncating one decade should look
580Sstevel@tonic-gate * just like the original, but longer by one leading digit q-1.
590Sstevel@tonic-gate * q(z+n) = z + (q-1)q^w
600Sstevel@tonic-gate * where z is first code of decade, w is width of code, in shortest
610Sstevel@tonic-gate * full decade. Examples, base 2:
620Sstevel@tonic-gate * r^1 = 1/2 r^5 = 1/2
630Sstevel@tonic-gate * 0 0110
640Sstevel@tonic-gate * 10 0111
650Sstevel@tonic-gate * 110 1000
660Sstevel@tonic-gate * 1110 1001
670Sstevel@tonic-gate * ... 1010
680Sstevel@tonic-gate * 10110
690Sstevel@tonic-gate * w = 1, z = 0 w = 4, z = 0110
700Sstevel@tonic-gate * Rewriting slightly
710Sstevel@tonic-gate * (q-1)z + q*n = (q-1)q^w
720Sstevel@tonic-gate * whence z is a multiple of q and n is a multiple of q-1. Let
730Sstevel@tonic-gate * z = cq, n = d(q-1)
740Sstevel@tonic-gate * We pick w to be the least integer such that
750Sstevel@tonic-gate * d = n/(q-1) <= q^(w-1)
760Sstevel@tonic-gate * Then solve for c
770Sstevel@tonic-gate * c = q^(w-1) - d
780Sstevel@tonic-gate * If c is not zero, the first decade may be preceded by
790Sstevel@tonic-gate * even shorter (w-1)-digit codes 0, 1, ..., c-1. Thus
800Sstevel@tonic-gate * the example code with r^5 = 1/2 becomes
810Sstevel@tonic-gate * 000
820Sstevel@tonic-gate * 001
830Sstevel@tonic-gate * 010
840Sstevel@tonic-gate * 0110
850Sstevel@tonic-gate * 0111
860Sstevel@tonic-gate * 1000
870Sstevel@tonic-gate * 1001
880Sstevel@tonic-gate * 1010
890Sstevel@tonic-gate * 10110
900Sstevel@tonic-gate * ...
910Sstevel@tonic-gate * 110110
920Sstevel@tonic-gate * ...
930Sstevel@tonic-gate * The expected number of base-q digits in a codeword is then
940Sstevel@tonic-gate * w - 1 + r^c/(1-r^n)
950Sstevel@tonic-gate * The present routines require q to be a power of 2
960Sstevel@tonic-gate */
970Sstevel@tonic-gate /*
980Sstevel@tonic-gate * There is a lot of hanky-panky with left justification against
990Sstevel@tonic-gate * sign instead of simple left justification because
1000Sstevel@tonic-gate * unsigned long is not available
1010Sstevel@tonic-gate */
1020Sstevel@tonic-gate #define L (BYTE*(sizeof (long))-1) /* length of signless long */
1030Sstevel@tonic-gate #define MASK (~((unsigned long)1<<L)) /* mask out sign */
1040Sstevel@tonic-gate
1050Sstevel@tonic-gate /*
1060Sstevel@tonic-gate * decode the prefix of word y (which is left justified against sign)
1070Sstevel@tonic-gate * place mesage number into place pointed to by kp
1080Sstevel@tonic-gate * return length (in bits) of decoded prefix or 0 if code is out of
1090Sstevel@tonic-gate * range
1100Sstevel@tonic-gate */
1110Sstevel@tonic-gate int
decode(long y,long * pk)1120Sstevel@tonic-gate decode(long y, long *pk)
1130Sstevel@tonic-gate {
114*239Sceastha int l;
1150Sstevel@tonic-gate long v;
1160Sstevel@tonic-gate if (y < cs) {
1170Sstevel@tonic-gate *pk = y >> (long)(L+QW-w);
1180Sstevel@tonic-gate return (w-QW);
1190Sstevel@tonic-gate }
1200Sstevel@tonic-gate for (l = w, v = v0; y >= qcs;
1210Sstevel@tonic-gate y = ((unsigned long)y << QW) & MASK, v += n)
1220Sstevel@tonic-gate if ((l += QW) > L)
1230Sstevel@tonic-gate return (0);
1240Sstevel@tonic-gate *pk = v + (y>>(long)(L-w));
1250Sstevel@tonic-gate return (l);
1260Sstevel@tonic-gate }
1270Sstevel@tonic-gate
1280Sstevel@tonic-gate /*
1290Sstevel@tonic-gate * encode message k and put result (right justified) into
1300Sstevel@tonic-gate * place pointed to by py.
1310Sstevel@tonic-gate * return length (in bits) of result,
1320Sstevel@tonic-gate * or 0 if code is too long
1330Sstevel@tonic-gate */
1340Sstevel@tonic-gate
1350Sstevel@tonic-gate int
encode(long k,long * py)1360Sstevel@tonic-gate encode(long k, long *py)
1370Sstevel@tonic-gate {
138*239Sceastha int l;
1390Sstevel@tonic-gate long y;
1400Sstevel@tonic-gate if (k < c) {
1410Sstevel@tonic-gate *py = k;
1420Sstevel@tonic-gate return (w-QW);
1430Sstevel@tonic-gate }
1440Sstevel@tonic-gate for (k -= c, y = 1, l = w; k >= n; k -= n, y <<= QW)
1450Sstevel@tonic-gate if ((l += QW) > L)
1460Sstevel@tonic-gate return (0);
1470Sstevel@tonic-gate *py = ((y-1)<<w) + cq + k;
1480Sstevel@tonic-gate return (l);
1490Sstevel@tonic-gate }
1500Sstevel@tonic-gate
1510Sstevel@tonic-gate
1520Sstevel@tonic-gate /*
1530Sstevel@tonic-gate * Initialization code, given expected value of k
1540Sstevel@tonic-gate * E(k) = r/(1-r) = a
1550Sstevel@tonic-gate * and given base width b
1560Sstevel@tonic-gate * return expected length of coded messages
1570Sstevel@tonic-gate */
1580Sstevel@tonic-gate static struct qlog {
1590Sstevel@tonic-gate long p;
1600Sstevel@tonic-gate double u;
1610Sstevel@tonic-gate } z;
1620Sstevel@tonic-gate
1630Sstevel@tonic-gate static struct qlog
qlog(double x,double y,long p,double u)1640Sstevel@tonic-gate qlog(double x, double y, long p, double u) /* find smallest p so x^p<=y */
1650Sstevel@tonic-gate {
1660Sstevel@tonic-gate
1670Sstevel@tonic-gate if (u/x <= y) {
1680Sstevel@tonic-gate z.p = 0;
1690Sstevel@tonic-gate z.u = 1;
1700Sstevel@tonic-gate } else {
1710Sstevel@tonic-gate z = qlog(x, y, p+p, u*u);
1720Sstevel@tonic-gate if (u*z.u/x > y) {
1730Sstevel@tonic-gate z.p += p;
1740Sstevel@tonic-gate z.u *= u;
1750Sstevel@tonic-gate }
1760Sstevel@tonic-gate }
1770Sstevel@tonic-gate return (z);
1780Sstevel@tonic-gate }
1790Sstevel@tonic-gate
1800Sstevel@tonic-gate double
huff(float a)1810Sstevel@tonic-gate huff(float a)
1820Sstevel@tonic-gate {
183*239Sceastha int i, q;
1840Sstevel@tonic-gate long d, j;
1850Sstevel@tonic-gate double r = a/(1.0 + a);
1860Sstevel@tonic-gate double rc, rq;
1870Sstevel@tonic-gate
1880Sstevel@tonic-gate for (i = 0, q = 1, rq = r; i < QW; i++, q *= 2, rq *= rq)
1890Sstevel@tonic-gate continue;
1900Sstevel@tonic-gate rq /= r; /* rq = r^(q-1) */
1910Sstevel@tonic-gate (void) qlog(rq, 1./q, 1L, rq);
1920Sstevel@tonic-gate d = z.p;
1930Sstevel@tonic-gate n = d*(q-1);
1940Sstevel@tonic-gate if (n != d * (q - 1))
1950Sstevel@tonic-gate abort(); /* time to make n long */
1960Sstevel@tonic-gate for (w = QW, j = 1; j < d; w += QW, j *= q)
1970Sstevel@tonic-gate continue;
1980Sstevel@tonic-gate c = j - d;
1990Sstevel@tonic-gate cq = c*q;
2000Sstevel@tonic-gate cs = cq<<(L-w);
2010Sstevel@tonic-gate qcs = (((long)(q-1)<<w) + cq) << (L-QW-w);
2020Sstevel@tonic-gate v0 = c - cq;
2030Sstevel@tonic-gate for (i = 0, rc = 1; i < c; i++, rc *= r) /* rc = r^c */
2040Sstevel@tonic-gate continue;
2050Sstevel@tonic-gate return (w + QW*(rc/(1-z.u) - 1));
2060Sstevel@tonic-gate }
2070Sstevel@tonic-gate
2080Sstevel@tonic-gate void
whuff(void)2090Sstevel@tonic-gate whuff(void)
2100Sstevel@tonic-gate {
2110Sstevel@tonic-gate (void) fwrite((char *) & huffcode, sizeof (huffcode), 1, stdout);
2120Sstevel@tonic-gate }
2130Sstevel@tonic-gate
2140Sstevel@tonic-gate int
rhuff(FILE * f)2150Sstevel@tonic-gate rhuff(FILE *f)
2160Sstevel@tonic-gate {
2170Sstevel@tonic-gate return (read(fileno(f), (char *)&huffcode, sizeof (huffcode)) ==
2180Sstevel@tonic-gate sizeof (huffcode));
2190Sstevel@tonic-gate }
220