1 /* Copyright (C) 1994, 1996, 1998 Aladdin Enterprises. All rights reserved.
2
3 This software is provided AS-IS with no warranty, either express or
4 implied.
5
6 This software is distributed under license and may not be copied,
7 modified or distributed except as expressly authorized under the terms
8 of the license contained in the file LICENSE in this distribution.
9
10 For more information about licensing, please refer to
11 http://www.ghostscript.com/licensing/. For information on
12 commercial licensing, go to http://www.artifex.com/licensing/ or
13 contact Artifex Software, Inc., 101 Lucas Valley Road #110,
14 San Rafael, CA 94903, U.S.A., +1(415)492-9861.
15 */
16
17 /* $Id: shcgen.c,v 1.4 2002/02/21 22:24:54 giles Exp $ */
18 /* Generate (bounded) Huffman code definitions from frequencies, */
19 /* and tables from definitions. */
20 #include "memory_.h"
21 #include "stdio_.h"
22 #include <stdlib.h> /* for qsort */
23 #include "gdebug.h"
24 #include "gserror.h"
25 #include "gserrors.h"
26 #include "gsmemory.h"
27 #include "scommon.h"
28 #include "shc.h"
29 #include "shcgen.h"
30
31 /* ------ Frequency -> definition procedure ------ */
32
33 /* Define a node for the Huffman code tree. */
34 typedef struct count_node_s count_node;
35 struct count_node_s {
36 long freq; /* frequency of value */
37 uint value; /* data value being encoded */
38 uint code_length; /* length of Huffman code */
39 count_node *next; /* next node in freq-sorted list */
40 count_node *left; /* left child in tree (smaller code_length) */
41 count_node *right; /* right child in tree (greater code_length) */
42 };
43
44 #ifdef DEBUG
45 # define debug_print_nodes(nodes, n, tag, lengths)\
46 if ( gs_debug_c('W') ) print_nodes_proc(nodes, n, tag, lengths);
47 private void
print_nodes_proc(const count_node * nodes,int n,const char * tag,int lengths)48 print_nodes_proc(const count_node * nodes, int n, const char *tag, int lengths)
49 {
50 int i;
51
52 dlprintf1("[w]---------------- %s ----------------\n", tag);
53 for (i = 0; i < n; ++i)
54 dlprintf7("[w]node %d: f=%ld v=%d len=%d N=%d L=%d R=%d\n",
55 i, nodes[i].freq, nodes[i].value, nodes[i].code_length,
56 (nodes[i].next == 0 ? -1 : (int)(nodes[i].next - nodes)),
57 (nodes[i].left == 0 ? -1 : (int)(nodes[i].left - nodes)),
58 (nodes[i].right == 0 ? -1 : (int)(nodes[i].right - nodes)));
59 for (i = lengths; i > 0;) {
60 int j = i;
61 int len = nodes[--j].code_length;
62
63 while (j > 0 && nodes[j - 1].code_length == len)
64 --j;
65 dlprintf2("[w]%d codes of length %d\n", i - j, len);
66 i = j;
67 }
68 }
69 #else
70 # define debug_print_nodes(nodes, n, tag, lengths) DO_NOTHING
71 #endif
72
73 /* Node comparison procedures for sorting. */
74 #define pn1 ((const count_node *)p1)
75 #define pn2 ((const count_node *)p2)
76 /* Sort by decreasing frequency. */
77 private int
compare_freqs(const void * p1,const void * p2)78 compare_freqs(const void *p1, const void *p2)
79 {
80 long diff = pn2->freq - pn1->freq;
81
82 return (diff < 0 ? -1 : diff > 0 ? 1 : 0);
83 }
84 /* Sort by increasing code length, and secondarily by decreasing frequency. */
85 private int
compare_code_lengths(const void * p1,const void * p2)86 compare_code_lengths(const void *p1, const void *p2)
87 {
88 int diff = pn1->code_length - pn2->code_length;
89
90 return (diff < 0 ? -1 : diff > 0 ? 1 : compare_freqs(p1, p2));
91 }
92 /* Sort by increasing code value. */
93 private int
compare_values(const void * p1,const void * p2)94 compare_values(const void *p1, const void *p2)
95 {
96 return (pn1->value < pn2->value ? -1 :
97 pn1->value > pn2->value ? 1 : 0);
98 }
99 #undef pn1
100 #undef pn2
101
102 /* Adjust code lengths so that none of them exceeds max_length. */
103 /* We break this out just to help organize the code; it's only called */
104 /* from one place in hc_compute. */
105 private void
hc_limit_code_lengths(count_node * nodes,uint num_values,int max_length)106 hc_limit_code_lengths(count_node * nodes, uint num_values, int max_length)
107 {
108 int needed; /* # of max_length codes we need to free up */
109 count_node *longest = nodes + num_values;
110
111 { /* Compute the number of additional max_length codes */
112 /* we need to make available. */
113 int length = longest[-1].code_length;
114 int next_length;
115 int avail = 0;
116
117 while ((next_length = longest[-1].code_length) > max_length) {
118 avail >>= length - next_length;
119 length = next_length;
120 (--longest)->code_length = max_length;
121 ++avail;
122 }
123 needed = (nodes + num_values - longest) -
124 (avail >>= (length - max_length));
125 if_debug2('W', "[w]avail=%d, needed=%d\n",
126 avail, needed);
127 }
128 /* Skip over all max_length codes. */
129 while (longest[-1].code_length == max_length)
130 --longest;
131
132 /*
133 * To make available a code of length N, suppose that the next
134 * shortest used code is of length M.
135 * We take the lowest-frequency code of length M and change it
136 * to M+1; we then have to compensate by reducing the length of
137 * some of the highest-frequency codes of length N, as follows:
138 * M new lengths for codes of length N
139 * --- -----------
140 * N-1 (none)
141 * N-2 N-1
142 * <N-2 M+2, M+2, N-1
143 * In the present situation, N = max_length.
144 */
145 for (; needed > 0; --needed) { /* longest points to the first code of length max_length. */
146 /* Since codes are sorted by increasing code length, */
147 /* longest-1 is the desired code of length M. */
148 int M1 = ++(longest[-1].code_length);
149
150 switch (max_length - M1) {
151 case 0: /* M == N-1 */
152 --longest;
153 break;
154 case 1: /* M == N-2 */
155 longest++->code_length = M1;
156 break;
157 default:
158 longest->code_length = M1 + 1;
159 longest[1].code_length = M1 + 1;
160 longest[2].code_length--;
161 longest += 3;
162 }
163 }
164 }
165
166 /* Compute an optimal Huffman code from an input data set. */
167 /* The client must have set all the elements of *def. */
168 int
hc_compute(hc_definition * def,const long * freqs,gs_memory_t * mem)169 hc_compute(hc_definition * def, const long *freqs, gs_memory_t * mem)
170 {
171 uint num_values = def->num_values;
172 count_node *nodes =
173 (count_node *) gs_alloc_byte_array(mem, num_values * 2 - 1,
174 sizeof(count_node), "hc_compute");
175 int i;
176 count_node *lowest;
177 count_node *comb;
178
179 if (nodes == 0)
180 return_error(gs_error_VMerror);
181
182 /* Create leaf nodes for the input data. */
183 for (i = 0; i < num_values; ++i)
184 nodes[i].freq = freqs[i], nodes[i].value = i;
185
186 /* Create a list sorted by increasing frequency. */
187 /* Also initialize the tree structure. */
188 qsort(nodes, num_values, sizeof(count_node), compare_freqs);
189 for (i = 0; i < num_values; ++i)
190 nodes[i].next = &nodes[i - 1],
191 nodes[i].code_length = 0,
192 nodes[i].left = nodes[i].right = 0;
193 nodes[0].next = 0;
194 debug_print_nodes(nodes, num_values, "after sort", 0);
195
196 /* Construct the Huffman code tree. */
197 for (lowest = &nodes[num_values - 1], comb = &nodes[num_values];;
198 ++comb
199 ) {
200 count_node *pn1 = lowest;
201 count_node *pn2 = pn1->next;
202 long freq = pn1->freq + pn2->freq;
203
204 /* Create a parent for the two lowest-frequency nodes. */
205 lowest = pn2->next;
206 comb->freq = freq;
207 if (pn1->code_length <= pn2->code_length)
208 comb->left = pn1, comb->right = pn2,
209 comb->code_length = pn2->code_length + 1;
210 else
211 comb->left = pn2, comb->right = pn1,
212 comb->code_length = pn1->code_length + 1;
213 if (lowest == 0) /* no nodes left to combine */
214 break;
215 /* Insert comb in the sorted list. */
216 if (freq < lowest->freq)
217 comb->next = lowest, lowest = comb;
218 else {
219 count_node *here = lowest;
220
221 while (here->next != 0 && freq >= here->next->freq)
222 here = here->next;
223 comb->next = here->next;
224 here->next = comb;
225 }
226 }
227
228 /* comb (i.e., &nodes[num_values * 2 - 2] is the root of the tree. */
229 /* Note that the left and right children of an interior node */
230 /* were constructed before, and therefore have lower indices */
231 /* in the nodes array than, the parent node. Thus we can assign */
232 /* the code lengths (node depths) in a single descending-order */
233 /* sweep. */
234 comb++->code_length = 0;
235 while (comb > nodes + num_values) {
236 --comb;
237 comb->left->code_length = comb->right->code_length =
238 comb->code_length + 1;
239 }
240 debug_print_nodes(nodes, num_values * 2 - 1, "after combine", 0);
241
242 /* Sort the leaves again by code length. */
243 qsort(nodes, num_values, sizeof(count_node), compare_code_lengths);
244 debug_print_nodes(nodes, num_values, "after re-sort", num_values);
245
246 /* Limit the code length to def->num_counts. */
247 hc_limit_code_lengths(nodes, num_values, def->num_counts);
248 debug_print_nodes(nodes, num_values, "after limit", num_values);
249
250 /* Sort within each code length by increasing code value. */
251 /* This doesn't affect data compression, but it makes */
252 /* the code definition itself compress better using our */
253 /* incremental encoding. */
254 for (i = num_values; i > 0;) {
255 int j = i;
256 int len = nodes[--j].code_length;
257
258 while (j > 0 && nodes[j - 1].code_length == len)
259 --j;
260 qsort(&nodes[j], i - j, sizeof(count_node), compare_values);
261 i = j;
262 }
263
264 /* Extract the definition from the nodes. */
265 memset(def->counts, 0, sizeof(*def->counts) * (def->num_counts + 1));
266 for (i = 0; i < num_values; ++i) {
267 def->values[i] = nodes[i].value;
268 def->counts[nodes[i].code_length]++;
269 }
270
271 /* All done, release working storage. */
272 gs_free_object(mem, nodes, "hc_compute");
273 return 0;
274 }
275
276 /* ------ Byte string <-> definition procedures ------ */
277
278 /*
279 * We define a compressed representation for (well-behaved) definitions
280 * as a byte string. A "well-behaved" definition is one where if
281 * code values A and B have the same code length and A < B,
282 * A precedes B in the values table of the definition, and hence
283 * A's encoding lexicographically precedes B's.
284 *
285 * The successive bytes in the compressed string give the code lengths for
286 * runs of decoded values, in the form nnnnllll where nnnn is the number of
287 * consecutive values -1 and llll is the code length -1.
288 */
289
290 /* Convert a definition to a byte string. */
291 /* The caller must provide the byte string, of length def->num_values. */
292 /* Assume (do not check) that the definition is well-behaved. */
293 /* Return the actual length of the string. */
294 int
hc_bytes_from_definition(byte * dbytes,const hc_definition * def)295 hc_bytes_from_definition(byte * dbytes, const hc_definition * def)
296 {
297 int i, j;
298 byte *bp = dbytes;
299 const byte *lp = dbytes;
300 const byte *end = dbytes + def->num_values;
301 const ushort *values = def->values;
302
303 /* Temporarily use the output string as a map from */
304 /* values to code lengths. */
305 for (i = 1; i <= def->num_counts; i++)
306 for (j = 0; j < def->counts[i]; j++)
307 bp[*values++] = i;
308
309 /* Now construct the actual string. */
310 while (lp < end) {
311 const byte *vp;
312 byte len = *lp;
313
314 for (vp = lp + 1; vp < end && vp < lp + 16 && *vp == len;)
315 vp++;
316 *bp++ = ((vp - lp - 1) << 4) + (len - 1);
317 lp = vp;
318 }
319
320 return bp - dbytes;
321 }
322
323 /* Extract num_counts and num_values from a byte string. */
324 void
hc_sizes_from_bytes(hc_definition * def,const byte * dbytes,int num_bytes)325 hc_sizes_from_bytes(hc_definition * def, const byte * dbytes, int num_bytes)
326 {
327 uint num_counts = 0, num_values = 0;
328 int i;
329
330 for (i = 0; i < num_bytes; i++) {
331 int n = (dbytes[i] >> 4) + 1;
332 int l = (dbytes[i] & 15) + 1;
333
334 if (l > num_counts)
335 num_counts = l;
336 num_values += n;
337 }
338 def->num_counts = num_counts;
339 def->num_values = num_values;
340 }
341
342 /* Convert a byte string back to a definition. */
343 /* The caller must initialize *def, including allocating counts and values. */
344 void
hc_definition_from_bytes(hc_definition * def,const byte * dbytes)345 hc_definition_from_bytes(hc_definition * def, const byte * dbytes)
346 {
347 int v, i;
348 ushort counts[max_hc_length + 1];
349
350 /* Make a first pass to set the counts for each code length. */
351 memset(counts, 0, sizeof(counts[0]) * (def->num_counts + 1));
352 for (i = 0, v = 0; v < def->num_values; i++) {
353 int n = (dbytes[i] >> 4) + 1;
354 int l = (dbytes[i] & 15) + 1;
355
356 counts[l] += n;
357 v += n;
358 }
359
360 /* Now fill in the definition. */
361 memcpy(def->counts, counts, sizeof(counts[0]) * (def->num_counts + 1));
362 for (i = 1, v = 0; i <= def->num_counts; i++) {
363 uint prev = counts[i];
364
365 counts[i] = v;
366 v += prev;
367 }
368 for (i = 0, v = 0; v < def->num_values; i++) {
369 int n = (dbytes[i] >> 4) + 1;
370 int l = (dbytes[i] & 15) + 1;
371 int j;
372
373 for (j = 0; j < n; n++)
374 def->values[counts[l]++] = v++;
375 }
376 }
377
378 /* ------ Definition -> table procedures ------ */
379
380 /* Generate the encoding table from the definition. */
381 /* The size of the encode array is def->num_values. */
382 void
hc_make_encoding(hce_code * encode,const hc_definition * def)383 hc_make_encoding(hce_code * encode, const hc_definition * def)
384 {
385 uint next = 0;
386 const ushort *pvalue = def->values;
387 uint i, k;
388
389 for (i = 1; i <= def->num_counts; i++) {
390 for (k = 0; k < def->counts[i]; k++, pvalue++, next++) {
391 hce_code *pce = encode + *pvalue;
392
393 pce->code = next;
394 pce->code_length = i;
395 }
396 next <<= 1;
397 }
398 }
399
400 /* We decode in two steps, first indexing into a table with */
401 /* a fixed number of bits from the source, and then indexing into */
402 /* an auxiliary table if necessary. (See shc.h for details.) */
403
404 /* Calculate the size of the decoding table. */
405 uint
hc_sizeof_decoding(const hc_definition * def,int initial_bits)406 hc_sizeof_decoding(const hc_definition * def, int initial_bits)
407 {
408 uint size = 1 << initial_bits;
409 uint carry = 0, mask = (uint) ~ 1;
410 uint i;
411
412 for (i = initial_bits + 1; i <= def->num_counts;
413 i++, carry <<= 1, mask <<= 1
414 ) {
415 carry += def->counts[i];
416 size += carry & mask;
417 carry &= ~mask;
418 }
419 return size;
420 }
421
422 /* Generate the decoding tables. */
423 void
hc_make_decoding(hcd_code * decode,const hc_definition * def,int initial_bits)424 hc_make_decoding(hcd_code * decode, const hc_definition * def,
425 int initial_bits)
426 { /* Make entries for single-dispatch codes. */
427 {
428 hcd_code *pcd = decode;
429 const ushort *pvalue = def->values;
430 uint i, k, d;
431
432 for (i = 0; i <= initial_bits; i++) {
433 for (k = 0; k < def->counts[i]; k++, pvalue++) {
434 for (d = 1 << (initial_bits - i); d > 0;
435 d--, pcd++
436 )
437 pcd->value = *pvalue,
438 pcd->code_length = i;
439 }
440 }
441 }
442 /* Make entries for two-dispatch codes. */
443 /* By working backward, we can do this more easily */
444 /* in a single pass. */
445 {
446 uint dsize = hc_sizeof_decoding(def, initial_bits);
447 hcd_code *pcd = decode + (1 << initial_bits);
448 hcd_code *pcd2 = decode + dsize;
449 const ushort *pvalue = def->values + def->num_values;
450 uint entries_left = 0, slots_left = 0, mult_shift = 0;
451 uint i = def->num_counts + 1, j;
452
453 for (;;) {
454 if (slots_left == 0) {
455 if (entries_left != 0) {
456 slots_left = 1 << (i - initial_bits);
457 mult_shift = 0;
458 continue;
459 }
460 if (--i <= initial_bits)
461 break;
462 entries_left = def->counts[i];
463 continue;
464 }
465 if (entries_left == 0) {
466 entries_left = def->counts[--i];
467 mult_shift++;
468 continue;
469 }
470 --entries_left, --pvalue;
471 for (j = 1 << mult_shift; j > 0; j--) {
472 --pcd2;
473 pcd2->value = *pvalue;
474 pcd2->code_length = i - initial_bits;
475 }
476 if ((slots_left -= 1 << mult_shift) == 0) {
477 --pcd;
478 pcd->value = pcd2 - decode;
479 pcd->code_length = i + mult_shift;
480 }
481 }
482 }
483 }
484