xref: /netbsd-src/external/bsd/bzip2/dist/bzip2recover.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /*	$NetBSD: bzip2recover.c,v 1.3 2012/05/07 00:45:47 wiz Exp $	*/
2 
3 /*-----------------------------------------------------------*/
4 /*--- Block recoverer program for bzip2                   ---*/
5 /*---                                      bzip2recover.c ---*/
6 /*-----------------------------------------------------------*/
7 
8 /* ------------------------------------------------------------------
9    This file is part of bzip2/libbzip2, a program and library for
10    lossless, block-sorting data compression.
11 
12    bzip2/libbzip2 version 1.0.6 of 6 September 2010
13    Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
14 
15    Please read the WARNING, DISCLAIMER and PATENTS sections in the
16    README file.
17 
18    This program is released under the terms of the license contained
19    in the file LICENSE.
20    ------------------------------------------------------------------ */
21 
22 /* This program is a complete hack and should be rewritten properly.
23 	 It isn't very complicated. */
24 
25 #include <inttypes.h>
26 #include <stdio.h>
27 #include <errno.h>
28 #include <stdlib.h>
29 #include <string.h>
30 
31 
32 /* This program records bit locations in the file to be recovered.
33    That means that if 64-bit ints are not supported, we will not
34    be able to recover .bz2 files over 512MB (2^32 bits) long.
35    On GNU supported platforms, we take advantage of the 64-bit
36    int support to circumvent this problem.  Ditto MSVC.
37 
38    This change occurred in version 1.0.2; all prior versions have
39    the 512MB limitation.
40 */
41 #if 1
42    typedef uint64_t  MaybeUInt64;
43 #  define MaybeUInt64_FMT "%" PRIu64
44 #else
45 #ifdef _MSC_VER
46    typedef  unsigned __int64  MaybeUInt64;
47 #  define MaybeUInt64_FMT "%I64u"
48 #else
49    typedef  unsigned int   MaybeUInt64;
50 #  define MaybeUInt64_FMT "%u"
51 #endif
52 #endif
53 
54 typedef  unsigned int   UInt32;
55 typedef  int            Int32;
56 typedef  unsigned char  UChar;
57 typedef  char           Char;
58 typedef  unsigned char  Bool;
59 #define True    ((Bool)1)
60 #define False   ((Bool)0)
61 
62 
63 #define BZ_MAX_FILENAME 2000
64 
65 Char inFileName[BZ_MAX_FILENAME];
66 Char outFileName[BZ_MAX_FILENAME];
67 Char progName[BZ_MAX_FILENAME];
68 
69 MaybeUInt64 bytesOut = 0;
70 MaybeUInt64 bytesIn  = 0;
71 
72 /*---------------------------------------------------*/
73 /*--- Bit stream I/O                              ---*/
74 /*---------------------------------------------------*/
75 
76 typedef
77    struct {
78       FILE*  handle;
79       Int32  buffer;
80       Int32  buffLive;
81       Char   mode;
82    }
83    BitStream;
84 
85 static void readError ( void );
86 static void writeError ( void );
87 static void mallocFail ( Int32 n );
88 static BitStream* bsOpenReadStream ( FILE* stream );
89 static BitStream* bsOpenWriteStream ( FILE* stream );
90 static void bsPutBit ( BitStream* bs, Int32 bit );
91 static Int32 bsGetBit ( BitStream* bs );
92 static void bsClose ( BitStream* bs );
93 static void bsPutUChar ( BitStream* bs, UChar c );
94 static void bsPutUInt32 ( BitStream* bs, UInt32 c );
95 static Bool endsInBz2 ( Char* name );
96 static void tooManyBlocks ( Int32 max_handled_blocks );
97 
98 
99 /*---------------------------------------------------*/
100 /*--- Header bytes                                ---*/
101 /*---------------------------------------------------*/
102 
103 #define BZ_HDR_B 0x42                         /* 'B' */
104 #define BZ_HDR_Z 0x5a                         /* 'Z' */
105 #define BZ_HDR_h 0x68                         /* 'h' */
106 #define BZ_HDR_0 0x30                         /* '0' */
107 
108 
109 /*---------------------------------------------------*/
110 /*--- I/O errors                                  ---*/
111 /*---------------------------------------------------*/
112 
113 /*---------------------------------------------*/
114 __dead static void readError ( void )
115 {
116    fprintf ( stderr,
117              "%s: I/O error reading `%s', possible reason follows.\n",
118             progName, inFileName );
119    perror ( progName );
120    fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
121              progName );
122    exit ( 1 );
123 }
124 
125 
126 /*---------------------------------------------*/
127 __dead static void writeError ( void )
128 {
129    fprintf ( stderr,
130              "%s: I/O error reading `%s', possible reason follows.\n",
131             progName, inFileName );
132    perror ( progName );
133    fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
134              progName );
135    exit ( 1 );
136 }
137 
138 
139 /*---------------------------------------------*/
140 __dead static void mallocFail ( Int32 n )
141 {
142    fprintf ( stderr,
143              "%s: malloc failed on request for %d bytes.\n",
144             progName, n );
145    fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
146              progName );
147    exit ( 1 );
148 }
149 
150 
151 /*---------------------------------------------*/
152 __dead static void tooManyBlocks ( Int32 max_handled_blocks )
153 {
154    fprintf ( stderr,
155              "%s: `%s' appears to contain more than %d blocks\n",
156             progName, inFileName, max_handled_blocks );
157    fprintf ( stderr,
158              "%s: and cannot be handled.  To fix, increase\n",
159              progName );
160    fprintf ( stderr,
161              "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n",
162              progName );
163    exit ( 1 );
164 }
165 
166 
167 
168 /*---------------------------------------------*/
169 static BitStream* bsOpenReadStream ( FILE* stream )
170 {
171    BitStream *bs = malloc ( sizeof(BitStream) );
172    if (bs == NULL) mallocFail ( sizeof(BitStream) );
173    bs->handle = stream;
174    bs->buffer = 0;
175    bs->buffLive = 0;
176    bs->mode = 'r';
177    return bs;
178 }
179 
180 
181 /*---------------------------------------------*/
182 static BitStream* bsOpenWriteStream ( FILE* stream )
183 {
184    BitStream *bs = malloc ( sizeof(BitStream) );
185    if (bs == NULL) mallocFail ( sizeof(BitStream) );
186    bs->handle = stream;
187    bs->buffer = 0;
188    bs->buffLive = 0;
189    bs->mode = 'w';
190    return bs;
191 }
192 
193 
194 /*---------------------------------------------*/
195 static void bsPutBit ( BitStream* bs, Int32 bit )
196 {
197    if (bs->buffLive == 8) {
198       Int32 retVal = putc ( (UChar) bs->buffer, bs->handle );
199       if (retVal == EOF) writeError();
200       bytesOut++;
201       bs->buffLive = 1;
202       bs->buffer = bit & 0x1;
203    } else {
204       bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) );
205       bs->buffLive++;
206    };
207 }
208 
209 
210 /*---------------------------------------------*/
211 /*--
212    Returns 0 or 1, or 2 to indicate EOF.
213 --*/
214 static Int32 bsGetBit ( BitStream* bs )
215 {
216    if (bs->buffLive > 0) {
217       bs->buffLive --;
218       return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 );
219    } else {
220       Int32 retVal = getc ( bs->handle );
221       if ( retVal == EOF ) {
222          if (errno != 0) readError();
223          return 2;
224       }
225       bs->buffLive = 7;
226       bs->buffer = retVal;
227       return ( ((bs->buffer) >> 7) & 0x1 );
228    }
229 }
230 
231 
232 /*---------------------------------------------*/
233 static void bsClose ( BitStream* bs )
234 {
235    Int32 retVal;
236 
237    if ( bs->mode == 'w' ) {
238       while ( bs->buffLive < 8 ) {
239          bs->buffLive++;
240          bs->buffer <<= 1;
241       };
242       retVal = putc ( (UChar) (bs->buffer), bs->handle );
243       if (retVal == EOF) writeError();
244       bytesOut++;
245       retVal = fflush ( bs->handle );
246       if (retVal == EOF) writeError();
247    }
248    retVal = fclose ( bs->handle );
249    if (retVal == EOF) {
250       if (bs->mode == 'w') writeError(); else readError();
251    }
252    free ( bs );
253 }
254 
255 
256 /*---------------------------------------------*/
257 static void bsPutUChar ( BitStream* bs, UChar c )
258 {
259    Int32 i;
260    for (i = 7; i >= 0; i--)
261       bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 );
262 }
263 
264 
265 /*---------------------------------------------*/
266 static void bsPutUInt32 ( BitStream* bs, UInt32 c )
267 {
268    Int32 i;
269 
270    for (i = 31; i >= 0; i--)
271       bsPutBit ( bs, (c >> i) & 0x1 );
272 }
273 
274 
275 /*---------------------------------------------*/
276 static Bool endsInBz2 ( Char* name )
277 {
278    Int32 n = strlen ( name );
279    if (n <= 4) return False;
280    return
281       (name[n-4] == '.' &&
282        name[n-3] == 'b' &&
283        name[n-2] == 'z' &&
284        name[n-1] == '2');
285 }
286 
287 
288 /*---------------------------------------------------*/
289 /*---                                             ---*/
290 /*---------------------------------------------------*/
291 
292 /* This logic isn't really right when it comes to Cygwin. */
293 #ifdef _WIN32
294 #  define  BZ_SPLIT_SYM  '\\'  /* path splitter on Windows platform */
295 #else
296 #  define  BZ_SPLIT_SYM  '/'   /* path splitter on Unix platform */
297 #endif
298 
299 #define BLOCK_HEADER_HI  0x00003141UL
300 #define BLOCK_HEADER_LO  0x59265359UL
301 
302 #define BLOCK_ENDMARK_HI 0x00001772UL
303 #define BLOCK_ENDMARK_LO 0x45385090UL
304 
305 /* Increase if necessary.  However, a .bz2 file with > 50000 blocks
306    would have an uncompressed size of at least 40GB, so the chances
307    are low you'll need to up this.
308 */
309 #define BZ_MAX_HANDLED_BLOCKS 50000
310 
311 MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS];
312 MaybeUInt64 bEnd   [BZ_MAX_HANDLED_BLOCKS];
313 MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS];
314 MaybeUInt64 rbEnd  [BZ_MAX_HANDLED_BLOCKS];
315 
316 Int32 main ( Int32 argc, Char** argv )
317 {
318    FILE*       inFile;
319    FILE*       outFile;
320    BitStream*  bsIn, *bsWr;
321    Int32       b, wrBlock, currBlock, rbCtr;
322    MaybeUInt64 bitsRead;
323 
324    UInt32      buffHi, buffLo, blockCRC;
325    Char*       p;
326 
327    strcpy ( progName, argv[0] );
328    inFileName[0] = outFileName[0] = 0;
329 
330    fprintf ( stderr,
331              "bzip2recover 1.0.6: extracts blocks from damaged .bz2 files.\n" );
332 
333    if (argc != 2) {
334       fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
335                         progName, progName );
336       switch (sizeof(MaybeUInt64)) {
337          case 8:
338             fprintf(stderr,
339                     "\trestrictions on size of recovered file: None\n");
340             break;
341          case 4:
342             fprintf(stderr,
343                     "\trestrictions on size of recovered file: 512 MB\n");
344             fprintf(stderr,
345                     "\tto circumvent, recompile with MaybeUInt64 as an\n"
346                     "\tunsigned 64-bit int.\n");
347             break;
348          default:
349             fprintf(stderr,
350                     "\tsizeof(MaybeUInt64) is not 4 or 8 -- "
351                     "configuration error.\n");
352             break;
353       }
354       exit(1);
355    }
356 
357    if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) {
358       fprintf ( stderr,
359                 "%s: supplied filename is suspiciously (>= %d chars) long.  Bye!\n",
360                 progName, (int)strlen(argv[1]) );
361       exit(1);
362    }
363 
364    strcpy ( inFileName, argv[1] );
365 
366    inFile = fopen ( inFileName, "rb" );
367    if (inFile == NULL) {
368       fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName );
369       exit(1);
370    }
371 
372    bsIn = bsOpenReadStream ( inFile );
373    fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );
374 
375    bitsRead = 0;
376    buffHi = buffLo = 0;
377    currBlock = 0;
378    bStart[currBlock] = 0;
379 
380    rbCtr = 0;
381 
382    while (True) {
383       b = bsGetBit ( bsIn );
384       bitsRead++;
385       if (b == 2) {
386          if (bitsRead >= bStart[currBlock] &&
387             (bitsRead - bStart[currBlock]) >= 40) {
388             bEnd[currBlock] = bitsRead-1;
389             if (currBlock > 0)
390                fprintf ( stderr, "   block %d runs from " MaybeUInt64_FMT
391                                  " to " MaybeUInt64_FMT " (incomplete)\n",
392                          currBlock,  bStart[currBlock], bEnd[currBlock] );
393          } else
394             currBlock--;
395          break;
396       }
397       buffHi = (buffHi << 1) | (buffLo >> 31);
398       buffLo = (buffLo << 1) | (b & 1);
399       if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI
400              && buffLo == BLOCK_HEADER_LO)
401            ||
402            ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI
403              && buffLo == BLOCK_ENDMARK_LO)
404          ) {
405          if (bitsRead > 49) {
406             bEnd[currBlock] = bitsRead-49;
407          } else {
408             bEnd[currBlock] = 0;
409          }
410          if (currBlock > 0 &&
411 	     (bEnd[currBlock] - bStart[currBlock]) >= 130) {
412             fprintf ( stderr, "   block %d runs from " MaybeUInt64_FMT
413                               " to " MaybeUInt64_FMT "\n",
414                       rbCtr+1,  bStart[currBlock], bEnd[currBlock] );
415             rbStart[rbCtr] = bStart[currBlock];
416             rbEnd[rbCtr] = bEnd[currBlock];
417             rbCtr++;
418          }
419          if (currBlock >= BZ_MAX_HANDLED_BLOCKS)
420             tooManyBlocks(BZ_MAX_HANDLED_BLOCKS);
421          currBlock++;
422 
423          bStart[currBlock] = bitsRead;
424       }
425    }
426 
427    bsClose ( bsIn );
428 
429    /*-- identified blocks run from 1 to rbCtr inclusive. --*/
430 
431    if (rbCtr < 1) {
432       fprintf ( stderr,
433                 "%s: sorry, I couldn't find any block boundaries.\n",
434                 progName );
435       exit(1);
436    };
437 
438    fprintf ( stderr, "%s: splitting into blocks\n", progName );
439 
440    inFile = fopen ( inFileName, "rb" );
441    if (inFile == NULL) {
442       fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName );
443       exit(1);
444    }
445    bsIn = bsOpenReadStream ( inFile );
446 
447    /*-- placate gcc's dataflow analyser --*/
448    blockCRC = 0; bsWr = 0;
449 
450    bitsRead = 0;
451    outFile = NULL;
452    wrBlock = 0;
453    while (True) {
454       b = bsGetBit(bsIn);
455       if (b == 2) break;
456       buffHi = (buffHi << 1) | (buffLo >> 31);
457       buffLo = (buffLo << 1) | (b & 1);
458       if (bitsRead == 47+rbStart[wrBlock])
459          blockCRC = (buffHi << 16) | (buffLo >> 16);
460 
461       if (outFile != NULL && bitsRead >= rbStart[wrBlock]
462                           && bitsRead <= rbEnd[wrBlock]) {
463          bsPutBit ( bsWr, b );
464       }
465 
466       bitsRead++;
467 
468       if (bitsRead == rbEnd[wrBlock]+1) {
469          if (outFile != NULL) {
470             bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 );
471             bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 );
472             bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 );
473             bsPutUInt32 ( bsWr, blockCRC );
474             bsClose ( bsWr );
475          }
476          if (wrBlock >= rbCtr) break;
477          wrBlock++;
478       } else
479       if (bitsRead == rbStart[wrBlock]) {
480          /* Create the output file name, correctly handling leading paths.
481             (31.10.2001 by Sergey E. Kusikov) */
482          Char* split;
483          Int32 ofs, k;
484          for (k = 0; k < BZ_MAX_FILENAME; k++)
485             outFileName[k] = 0;
486          strcpy (outFileName, inFileName);
487          split = strrchr (outFileName, BZ_SPLIT_SYM);
488          if (split == NULL) {
489             split = outFileName;
490          } else {
491             ++split;
492 	 }
493 	 /* Now split points to the start of the basename. */
494          ofs  = split - outFileName;
495          sprintf (split, "rec%5d", wrBlock+1);
496          for (p = split; *p != 0; p++) if (*p == ' ') *p = '0';
497          strcat (outFileName, inFileName + ofs);
498 
499          if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
500 
501          fprintf ( stderr, "   writing block %d to `%s' ...\n",
502                            wrBlock+1, outFileName );
503 
504          outFile = fopen ( outFileName, "wb" );
505          if (outFile == NULL) {
506             fprintf ( stderr, "%s: can't write `%s'\n",
507                       progName, outFileName );
508             exit(1);
509          }
510          bsWr = bsOpenWriteStream ( outFile );
511          bsPutUChar ( bsWr, BZ_HDR_B );
512          bsPutUChar ( bsWr, BZ_HDR_Z );
513          bsPutUChar ( bsWr, BZ_HDR_h );
514          bsPutUChar ( bsWr, BZ_HDR_0 + 9 );
515          bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 );
516          bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 );
517          bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 );
518       }
519    }
520 
521    fprintf ( stderr, "%s: finished\n", progName );
522    return 0;
523 }
524 
525 
526 
527 /*-----------------------------------------------------------*/
528 /*--- end                                  bzip2recover.c ---*/
529 /*-----------------------------------------------------------*/
530