xref: /plan9/sys/src/cmd/bzip2/lib/bzcompress.c (revision 59cc4ca53493a3c6d2349fe2b7f7c40f7dce7294)
1 /*
2  * THIS FILE IS NOT IDENTICAL TO THE ORIGINAL
3  * FROM THE BZIP2 DISTRIBUTION.
4  *
5  * It has been modified, mainly to break the library
6  * into smaller pieces.
7  *
8  * Russ Cox
9  * rsc@plan9.bell-labs.com
10  * July 2000
11  */
12 
13 /*-------------------------------------------------------------*/
14 /*--- Library top-level functions.                          ---*/
15 /*---                                               bzlib.c ---*/
16 /*-------------------------------------------------------------*/
17 
18 /*--
19   This file is a part of bzip2 and/or libbzip2, a program and
20   library for lossless, block-sorting data compression.
21 
22   Copyright (C) 1996-2000 Julian R Seward.  All rights reserved.
23 
24   Redistribution and use in source and binary forms, with or without
25   modification, are permitted provided that the following conditions
26   are met:
27 
28   1. Redistributions of source code must retain the above copyright
29      notice, this list of conditions and the following disclaimer.
30 
31   2. The origin of this software must not be misrepresented; you must
32      not claim that you wrote the original software.  If you use this
33      software in a product, an acknowledgment in the product
34      documentation would be appreciated but is not required.
35 
36   3. Altered source versions must be plainly marked as such, and must
37      not be misrepresented as being the original software.
38 
39   4. The name of the author may not be used to endorse or promote
40      products derived from this software without specific prior written
41      permission.
42 
43   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
44   OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
45   WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46   ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
47   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
49   GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
50   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
51   WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
52   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
53   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
54 
55   Julian Seward, Cambridge, UK.
56   jseward@acm.org
57   bzip2/libbzip2 version 1.0 of 21 March 2000
58 
59   This program is based on (at least) the work of:
60      Mike Burrows
61      David Wheeler
62      Peter Fenwick
63      Alistair Moffat
64      Radford Neal
65      Ian H. Witten
66      Robert Sedgewick
67      Jon L. Bentley
68 
69   For more information on these sources, see the manual.
70 --*/
71 
72 /*--
73    CHANGES
74    ~~~~~~~
75    0.9.0 -- original version.
76 
77    0.9.0a/b -- no changes in this file.
78 
79    0.9.0c
80       * made zero-length BZ_FLUSH work correctly in bzCompress().
81       * fixed bzWrite/bzRead to ignore zero-length requests.
82       * fixed bzread to correctly handle read requests after EOF.
83       * wrong parameter order in call to bzDecompressInit in
84         bzBuffToBuffDecompress.  Fixed.
85 --*/
86 
87 #include "os.h"
88 #include "bzlib.h"
89 #include "bzlib_private.h"
90 
91 /*---------------------------------------------------*/
92 /*--- Compression stuff                           ---*/
93 /*---------------------------------------------------*/
94 
95 /*---------------------------------------------------*/
96 static
prepare_new_block(EState * s)97 void prepare_new_block ( EState* s )
98 {
99    Int32 i;
100    s->nblock = 0;
101    s->numZ = 0;
102    s->state_out_pos = 0;
103    BZ_INITIALISE_CRC ( s->blockCRC );
104    for (i = 0; i < 256; i++) s->inUse[i] = False;
105    s->blockNo++;
106 }
107 
108 
109 /*---------------------------------------------------*/
110 static
init_RL(EState * s)111 void init_RL ( EState* s )
112 {
113    s->state_in_ch  = 256;
114    s->state_in_len = 0;
115 }
116 
117 
118 static
isempty_RL(EState * s)119 Bool isempty_RL ( EState* s )
120 {
121    if (s->state_in_ch < 256 && s->state_in_len > 0)
122       return False; else
123       return True;
124 }
125 
126 
127 /*---------------------------------------------------*/
BZ_API(BZ2_bzCompressInit)128 int BZ_API(BZ2_bzCompressInit)
129                     ( bz_stream* strm,
130                      int        blockSize100k,
131                      int        verbosity,
132                      int        workFactor )
133 {
134    Int32   n;
135    EState* s;
136 
137    if (!bz_config_ok()) return BZ_CONFIG_ERROR;
138 
139    if (strm == NULL ||
140        blockSize100k < 1 || blockSize100k > 9 ||
141        workFactor < 0 || workFactor > 250)
142      return BZ_PARAM_ERROR;
143 
144    if (workFactor == 0) workFactor = 30;
145    if (strm->bzalloc == NULL) strm->bzalloc = default_bzalloc;
146    if (strm->bzfree == NULL) strm->bzfree = default_bzfree;
147 
148    s = BZALLOC( sizeof(EState) );
149    if (s == NULL) return BZ_MEM_ERROR;
150    s->strm = strm;
151 
152    s->arr1 = NULL;
153    s->arr2 = NULL;
154    s->ftab = NULL;
155 
156    n       = 100000 * blockSize100k;
157    s->arr1 = BZALLOC( n                  * sizeof(UInt32) );
158    s->arr2 = BZALLOC( (n+BZ_N_OVERSHOOT) * sizeof(UInt32) );
159    s->ftab = BZALLOC( 65537              * sizeof(UInt32) );
160 
161    if (s->arr1 == NULL || s->arr2 == NULL || s->ftab == NULL) {
162       if (s->arr1 != NULL) BZFREE(s->arr1);
163       if (s->arr2 != NULL) BZFREE(s->arr2);
164       if (s->ftab != NULL) BZFREE(s->ftab);
165       if (s       != NULL) BZFREE(s);
166       return BZ_MEM_ERROR;
167    }
168 
169    s->blockNo           = 0;
170    s->state             = BZ_S_INPUT;
171    s->mode              = BZ_M_RUNNING;
172    s->combinedCRC       = 0;
173    s->blockSize100k     = blockSize100k;
174    s->nblockMAX         = 100000 * blockSize100k - 19;
175    s->verbosity         = verbosity;
176    s->workFactor        = workFactor;
177 
178    s->block             = (UChar*)s->arr2;
179    s->mtfv              = (UInt16*)s->arr1;
180    s->zbits             = NULL;
181    s->ptr               = (UInt32*)s->arr1;
182 
183    strm->state          = s;
184    strm->total_in_lo32  = 0;
185    strm->total_in_hi32  = 0;
186    strm->total_out_lo32 = 0;
187    strm->total_out_hi32 = 0;
188    init_RL ( s );
189    prepare_new_block ( s );
190    return BZ_OK;
191 }
192 
193 
194 /*---------------------------------------------------*/
195 static
add_pair_to_block(EState * s)196 void add_pair_to_block ( EState* s )
197 {
198    Int32 i;
199    UChar ch = (UChar)(s->state_in_ch);
200    for (i = 0; i < s->state_in_len; i++) {
201       BZ_UPDATE_CRC( s->blockCRC, ch );
202    }
203    s->inUse[s->state_in_ch] = True;
204    switch (s->state_in_len) {
205       case 1:
206          s->block[s->nblock] = (UChar)ch; s->nblock++;
207          break;
208       case 2:
209          s->block[s->nblock] = (UChar)ch; s->nblock++;
210          s->block[s->nblock] = (UChar)ch; s->nblock++;
211          break;
212       case 3:
213          s->block[s->nblock] = (UChar)ch; s->nblock++;
214          s->block[s->nblock] = (UChar)ch; s->nblock++;
215          s->block[s->nblock] = (UChar)ch; s->nblock++;
216          break;
217       default:
218          s->inUse[s->state_in_len-4] = True;
219          s->block[s->nblock] = (UChar)ch; s->nblock++;
220          s->block[s->nblock] = (UChar)ch; s->nblock++;
221          s->block[s->nblock] = (UChar)ch; s->nblock++;
222          s->block[s->nblock] = (UChar)ch; s->nblock++;
223          s->block[s->nblock] = ((UChar)(s->state_in_len-4));
224          s->nblock++;
225          break;
226    }
227 }
228 
229 
230 /*---------------------------------------------------*/
231 static
flush_RL(EState * s)232 void flush_RL ( EState* s )
233 {
234    if (s->state_in_ch < 256) add_pair_to_block ( s );
235    init_RL ( s );
236 }
237 
238 
239 /*---------------------------------------------------*/
240 #define ADD_CHAR_TO_BLOCK(zs,zchh0)               \
241 {                                                 \
242    UInt32 zchh = (UInt32)(zchh0);                 \
243    /*-- fast track the common case --*/           \
244    if (zchh != zs->state_in_ch &&                 \
245        zs->state_in_len == 1) {                   \
246       UChar ch = (UChar)(zs->state_in_ch);        \
247       BZ_UPDATE_CRC( zs->blockCRC, ch );          \
248       zs->inUse[zs->state_in_ch] = True;          \
249       zs->block[zs->nblock] = (UChar)ch;          \
250       zs->nblock++;                               \
251       zs->state_in_ch = zchh;                     \
252    }                                              \
253    else                                           \
254    /*-- general, uncommon cases --*/              \
255    if (zchh != zs->state_in_ch ||                 \
256       zs->state_in_len == 255) {                  \
257       if (zs->state_in_ch < 256)                  \
258          add_pair_to_block ( zs );                \
259       zs->state_in_ch = zchh;                     \
260       zs->state_in_len = 1;                       \
261    } else {                                       \
262       zs->state_in_len++;                         \
263    }                                              \
264 }
265 
266 
267 /*---------------------------------------------------*/
268 static
copy_input_until_stop(EState * s)269 Bool copy_input_until_stop ( EState* s )
270 {
271    Bool progress_in = False;
272 
273    if (s->mode == BZ_M_RUNNING) {
274 
275       /*-- fast track the common case --*/
276       while (True) {
277          /*-- block full? --*/
278          if (s->nblock >= s->nblockMAX) break;
279          /*-- no input? --*/
280          if (s->strm->avail_in == 0) break;
281          progress_in = True;
282          ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) );
283          s->strm->next_in++;
284          s->strm->avail_in--;
285          s->strm->total_in_lo32++;
286          if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++;
287       }
288 
289    } else {
290 
291       /*-- general, uncommon case --*/
292       while (True) {
293          /*-- block full? --*/
294          if (s->nblock >= s->nblockMAX) break;
295          /*-- no input? --*/
296          if (s->strm->avail_in == 0) break;
297          /*-- flush/finish end? --*/
298          if (s->avail_in_expect == 0) break;
299          progress_in = True;
300          ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) );
301          s->strm->next_in++;
302          s->strm->avail_in--;
303          s->strm->total_in_lo32++;
304          if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++;
305          s->avail_in_expect--;
306       }
307    }
308    return progress_in;
309 }
310 
311 
312 /*---------------------------------------------------*/
313 static
copy_output_until_stop(EState * s)314 Bool copy_output_until_stop ( EState* s )
315 {
316    Bool progress_out = False;
317 
318    while (True) {
319 
320       /*-- no output space? --*/
321       if (s->strm->avail_out == 0) break;
322 
323       /*-- block done? --*/
324       if (s->state_out_pos >= s->numZ) break;
325 
326       progress_out = True;
327       *(s->strm->next_out) = s->zbits[s->state_out_pos];
328       s->state_out_pos++;
329       s->strm->avail_out--;
330       s->strm->next_out++;
331       s->strm->total_out_lo32++;
332       if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
333    }
334 
335    return progress_out;
336 }
337 
338 
339 /*---------------------------------------------------*/
340 static
handle_compress(bz_stream * strm)341 Bool handle_compress ( bz_stream* strm )
342 {
343    Bool progress_in  = False;
344    Bool progress_out = False;
345    EState* s = strm->state;
346 
347    while (True) {
348 
349       if (s->state == BZ_S_OUTPUT) {
350          progress_out |= copy_output_until_stop ( s );
351          if (s->state_out_pos < s->numZ) break;
352          if (s->mode == BZ_M_FINISHING &&
353              s->avail_in_expect == 0 &&
354              isempty_RL(s)) break;
355          prepare_new_block ( s );
356          s->state = BZ_S_INPUT;
357          if (s->mode == BZ_M_FLUSHING &&
358              s->avail_in_expect == 0 &&
359              isempty_RL(s)) break;
360       }
361 
362       if (s->state == BZ_S_INPUT) {
363          progress_in |= copy_input_until_stop ( s );
364          if (s->mode != BZ_M_RUNNING && s->avail_in_expect == 0) {
365             flush_RL ( s );
366             BZ2_compressBlock ( s, (Bool)(s->mode == BZ_M_FINISHING) );
367             s->state = BZ_S_OUTPUT;
368          }
369          else
370          if (s->nblock >= s->nblockMAX) {
371             BZ2_compressBlock ( s, False );
372             s->state = BZ_S_OUTPUT;
373          }
374          else
375          if (s->strm->avail_in == 0) {
376             break;
377          }
378       }
379 
380    }
381 
382    return progress_in || progress_out;
383 }
384 
385 /*---------------------------------------------------*/
BZ_API(BZ2_bzCompress)386 int BZ_API(BZ2_bzCompress) ( bz_stream *strm, int action )
387 {
388    Bool progress;
389    EState* s;
390    if (strm == NULL) return BZ_PARAM_ERROR;
391    s = strm->state;
392    if (s == NULL) return BZ_PARAM_ERROR;
393    if (s->strm != strm) return BZ_PARAM_ERROR;
394 
395    preswitch:
396    switch (s->mode) {
397 
398       case BZ_M_IDLE:
399          return BZ_SEQUENCE_ERROR;
400 
401       case BZ_M_RUNNING:
402          if (action == BZ_RUN) {
403             progress = handle_compress ( strm );
404             return progress ? BZ_RUN_OK : BZ_PARAM_ERROR;
405          }
406          else
407 	 if (action == BZ_FLUSH) {
408             s->avail_in_expect = strm->avail_in;
409             s->mode = BZ_M_FLUSHING;
410             goto preswitch;
411          }
412          else
413          if (action == BZ_FINISH) {
414             s->avail_in_expect = strm->avail_in;
415             s->mode = BZ_M_FINISHING;
416             goto preswitch;
417          }
418          else
419             return BZ_PARAM_ERROR;
420 
421       case BZ_M_FLUSHING:
422          if (action != BZ_FLUSH) return BZ_SEQUENCE_ERROR;
423          if (s->avail_in_expect != s->strm->avail_in)
424             return BZ_SEQUENCE_ERROR;
425          progress = handle_compress ( strm );
426          if (!progress) return BZ_SEQUENCE_ERROR;	//rsc added
427          if (s->avail_in_expect > 0 || !isempty_RL(s) ||
428              s->state_out_pos < s->numZ) return BZ_FLUSH_OK;
429          s->mode = BZ_M_RUNNING;
430          return BZ_RUN_OK;
431 
432       case BZ_M_FINISHING:
433          if (action != BZ_FINISH) return BZ_SEQUENCE_ERROR;
434          if (s->avail_in_expect != s->strm->avail_in)
435             return BZ_SEQUENCE_ERROR;
436          progress = handle_compress ( strm );
437          if (!progress) return BZ_SEQUENCE_ERROR;
438          if (s->avail_in_expect > 0 || !isempty_RL(s) ||
439              s->state_out_pos < s->numZ) return BZ_FINISH_OK;
440          s->mode = BZ_M_IDLE;
441          return BZ_STREAM_END;
442    }
443    return BZ_OK; /*--not reached--*/
444 }
445 
446 
447 /*---------------------------------------------------*/
BZ_API(BZ2_bzCompressEnd)448 int BZ_API(BZ2_bzCompressEnd)  ( bz_stream *strm )
449 {
450    EState* s;
451    if (strm == NULL) return BZ_PARAM_ERROR;
452    s = strm->state;
453    if (s == NULL) return BZ_PARAM_ERROR;
454    if (s->strm != strm) return BZ_PARAM_ERROR;
455 
456    if (s->arr1 != NULL) BZFREE(s->arr1);
457    if (s->arr2 != NULL) BZFREE(s->arr2);
458    if (s->ftab != NULL) BZFREE(s->ftab);
459    BZFREE(strm->state);
460 
461    strm->state = NULL;
462 
463    return BZ_OK;
464 }
465 
466