xref: /netbsd-src/external/bsd/bzip2/dist/bzip2recover.c (revision c4f47eb4fbbce2c384f784d0295d2f037ed4819c)
1 /*	$NetBSD: bzip2recover.c,v 1.5 2019/07/21 11:52:14 maya Exp $	*/
2 
3 /*-----------------------------------------------------------*/
4 /*--- Block recoverer program for bzip2                   ---*/
5 /*---                                      bzip2recover.c ---*/
6 /*-----------------------------------------------------------*/
7 
8 /* ------------------------------------------------------------------
9    This file is part of bzip2/libbzip2, a program and library for
10    lossless, block-sorting data compression.
11 
12    bzip2/libbzip2 version 1.0.8 of 13 July 2019
13    Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
14 
15    Please read the WARNING, DISCLAIMER and PATENTS sections in the
16    README file.
17 
18    This program is released under the terms of the license contained
19    in the file LICENSE.
20    ------------------------------------------------------------------ */
21 
22 /* This program is a complete hack and should be rewritten properly.
23 	 It isn't very complicated. */
24 
25 #include <inttypes.h>
26 #include <stdio.h>
27 #include <errno.h>
28 #include <stdlib.h>
29 #include <string.h>
30 
31 
32 /* This program records bit locations in the file to be recovered.
33    That means that if 64-bit ints are not supported, we will not
34    be able to recover .bz2 files over 512MB (2^32 bits) long.
35    On GNU supported platforms, we take advantage of the 64-bit
36    int support to circumvent this problem.  Ditto MSVC.
37 
38    This change occurred in version 1.0.2; all prior versions have
39    the 512MB limitation.
40 */
41 #if 1
42    typedef uint64_t  MaybeUInt64;
43 #  define MaybeUInt64_FMT "%" PRIu64
44 #else
45 #ifdef _MSC_VER
46    typedef  unsigned __int64  MaybeUInt64;
47 #  define MaybeUInt64_FMT "%I64u"
48 #else
49    typedef  unsigned int   MaybeUInt64;
50 #  define MaybeUInt64_FMT "%u"
51 #endif
52 #endif
53 
54 typedef  unsigned int   UInt32;
55 typedef  int            Int32;
56 typedef  unsigned char  UChar;
57 typedef  char           Char;
58 typedef  unsigned char  Bool;
59 #define True    ((Bool)1)
60 #define False   ((Bool)0)
61 
62 
63 #define BZ_MAX_FILENAME 2000
64 
65 Char inFileName[BZ_MAX_FILENAME];
66 Char outFileName[BZ_MAX_FILENAME];
67 Char progName[BZ_MAX_FILENAME];
68 
69 MaybeUInt64 bytesOut = 0;
70 MaybeUInt64 bytesIn  = 0;
71 
72 /*---------------------------------------------------*/
73 /*--- Bit stream I/O                              ---*/
74 /*---------------------------------------------------*/
75 
76 typedef
77    struct {
78       FILE*  handle;
79       Int32  buffer;
80       Int32  buffLive;
81       Char   mode;
82    }
83    BitStream;
84 
85 static void readError ( void );
86 static void writeError ( void );
87 static void mallocFail ( Int32 n );
88 static BitStream* bsOpenReadStream ( FILE* stream );
89 static BitStream* bsOpenWriteStream ( FILE* stream );
90 static void bsPutBit ( BitStream* bs, Int32 bit );
91 static Int32 bsGetBit ( BitStream* bs );
92 static void bsClose ( BitStream* bs );
93 static void bsPutUChar ( BitStream* bs, UChar c );
94 static void bsPutUInt32 ( BitStream* bs, UInt32 c );
95 static Bool endsInBz2 ( Char* name );
96 static void tooManyBlocks ( Int32 max_handled_blocks );
97 
98 
99 /*---------------------------------------------------*/
100 /*--- Header bytes                                ---*/
101 /*---------------------------------------------------*/
102 
103 #define BZ_HDR_B 0x42                         /* 'B' */
104 #define BZ_HDR_Z 0x5a                         /* 'Z' */
105 #define BZ_HDR_h 0x68                         /* 'h' */
106 #define BZ_HDR_0 0x30                         /* '0' */
107 
108 
109 /*---------------------------------------------------*/
110 /*--- I/O errors                                  ---*/
111 /*---------------------------------------------------*/
112 
113 /*---------------------------------------------*/
readError(void)114 __dead static void readError ( void )
115 {
116    fprintf ( stderr,
117              "%s: I/O error reading `%s', possible reason follows.\n",
118             progName, inFileName );
119    perror ( progName );
120    fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
121              progName );
122    exit ( 1 );
123 }
124 
125 
126 /*---------------------------------------------*/
writeError(void)127 __dead static void writeError ( void )
128 {
129    fprintf ( stderr,
130              "%s: I/O error reading `%s', possible reason follows.\n",
131             progName, inFileName );
132    perror ( progName );
133    fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
134              progName );
135    exit ( 1 );
136 }
137 
138 
139 /*---------------------------------------------*/
mallocFail(Int32 n)140 __dead static void mallocFail ( Int32 n )
141 {
142    fprintf ( stderr,
143              "%s: malloc failed on request for %d bytes.\n",
144             progName, n );
145    fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
146              progName );
147    exit ( 1 );
148 }
149 
150 
151 /*---------------------------------------------*/
tooManyBlocks(Int32 max_handled_blocks)152 __dead static void tooManyBlocks ( Int32 max_handled_blocks )
153 {
154    fprintf ( stderr,
155              "%s: `%s' appears to contain more than %d blocks\n",
156             progName, inFileName, max_handled_blocks );
157    fprintf ( stderr,
158              "%s: and cannot be handled.  To fix, increase\n",
159              progName );
160    fprintf ( stderr,
161              "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n",
162              progName );
163    exit ( 1 );
164 }
165 
166 
167 
168 /*---------------------------------------------*/
bsOpenReadStream(FILE * stream)169 static BitStream* bsOpenReadStream ( FILE* stream )
170 {
171    BitStream *bs = malloc ( sizeof(BitStream) );
172    if (bs == NULL) mallocFail ( sizeof(BitStream) );
173    bs->handle = stream;
174    bs->buffer = 0;
175    bs->buffLive = 0;
176    bs->mode = 'r';
177    return bs;
178 }
179 
180 
181 /*---------------------------------------------*/
bsOpenWriteStream(FILE * stream)182 static BitStream* bsOpenWriteStream ( FILE* stream )
183 {
184    BitStream *bs = malloc ( sizeof(BitStream) );
185    if (bs == NULL) mallocFail ( sizeof(BitStream) );
186    bs->handle = stream;
187    bs->buffer = 0;
188    bs->buffLive = 0;
189    bs->mode = 'w';
190    return bs;
191 }
192 
193 
194 /*---------------------------------------------*/
bsPutBit(BitStream * bs,Int32 bit)195 static void bsPutBit ( BitStream* bs, Int32 bit )
196 {
197    if (bs->buffLive == 8) {
198       Int32 retVal = putc ( (UChar) bs->buffer, bs->handle );
199       if (retVal == EOF) writeError();
200       bytesOut++;
201       bs->buffLive = 1;
202       bs->buffer = bit & 0x1;
203    } else {
204       bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) );
205       bs->buffLive++;
206    };
207 }
208 
209 
210 /*---------------------------------------------*/
211 /*--
212    Returns 0 or 1, or 2 to indicate EOF.
213 --*/
bsGetBit(BitStream * bs)214 static Int32 bsGetBit ( BitStream* bs )
215 {
216    if (bs->buffLive > 0) {
217       bs->buffLive --;
218       return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 );
219    } else {
220       Int32 retVal = getc ( bs->handle );
221       if ( retVal == EOF ) {
222          if (errno != 0) readError();
223          return 2;
224       }
225       bs->buffLive = 7;
226       bs->buffer = retVal;
227       return ( ((bs->buffer) >> 7) & 0x1 );
228    }
229 }
230 
231 
232 /*---------------------------------------------*/
bsClose(BitStream * bs)233 static void bsClose ( BitStream* bs )
234 {
235    Int32 retVal;
236 
237    if ( bs->mode == 'w' ) {
238       while ( bs->buffLive < 8 ) {
239          bs->buffLive++;
240          bs->buffer <<= 1;
241       };
242       retVal = putc ( (UChar) (bs->buffer), bs->handle );
243       if (retVal == EOF) writeError();
244       bytesOut++;
245       retVal = fflush ( bs->handle );
246       if (retVal == EOF) writeError();
247    }
248    retVal = fclose ( bs->handle );
249    if (retVal == EOF) {
250       if (bs->mode == 'w') writeError(); else readError();
251    }
252    free ( bs );
253 }
254 
255 
256 /*---------------------------------------------*/
bsPutUChar(BitStream * bs,UChar c)257 static void bsPutUChar ( BitStream* bs, UChar c )
258 {
259    Int32 i;
260    for (i = 7; i >= 0; i--)
261       bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 );
262 }
263 
264 
265 /*---------------------------------------------*/
bsPutUInt32(BitStream * bs,UInt32 c)266 static void bsPutUInt32 ( BitStream* bs, UInt32 c )
267 {
268    Int32 i;
269 
270    for (i = 31; i >= 0; i--)
271       bsPutBit ( bs, (c >> i) & 0x1 );
272 }
273 
274 
275 /*---------------------------------------------*/
endsInBz2(Char * name)276 static Bool endsInBz2 ( Char* name )
277 {
278    Int32 n = strlen ( name );
279    if (n <= 4) return False;
280    return
281       (name[n-4] == '.' &&
282        name[n-3] == 'b' &&
283        name[n-2] == 'z' &&
284        name[n-1] == '2');
285 }
286 
287 
288 /*---------------------------------------------------*/
289 /*---                                             ---*/
290 /*---------------------------------------------------*/
291 
292 /* This logic isn't really right when it comes to Cygwin. */
293 #ifdef _WIN32
294 #  define  BZ_SPLIT_SYM  '\\'  /* path splitter on Windows platform */
295 #else
296 #  define  BZ_SPLIT_SYM  '/'   /* path splitter on Unix platform */
297 #endif
298 
299 #define BLOCK_HEADER_HI  0x00003141UL
300 #define BLOCK_HEADER_LO  0x59265359UL
301 
302 #define BLOCK_ENDMARK_HI 0x00001772UL
303 #define BLOCK_ENDMARK_LO 0x45385090UL
304 
305 /* Increase if necessary.  However, a .bz2 file with > 50000 blocks
306    would have an uncompressed size of at least 40GB, so the chances
307    are low you'll need to up this.
308 */
309 #define BZ_MAX_HANDLED_BLOCKS 50000
310 
311 MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS];
312 MaybeUInt64 bEnd   [BZ_MAX_HANDLED_BLOCKS];
313 MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS];
314 MaybeUInt64 rbEnd  [BZ_MAX_HANDLED_BLOCKS];
315 
main(Int32 argc,Char ** argv)316 Int32 main ( Int32 argc, Char** argv )
317 {
318    FILE*       inFile;
319    FILE*       outFile;
320    BitStream*  bsIn, *bsWr;
321    Int32       b, wrBlock, currBlock, rbCtr;
322    MaybeUInt64 bitsRead;
323 
324    UInt32      buffHi, buffLo, blockCRC;
325    Char*       p;
326 
327    strncpy ( progName, argv[0], BZ_MAX_FILENAME-1);
328    progName[BZ_MAX_FILENAME-1]='\0';
329    inFileName[0] = outFileName[0] = 0;
330 
331    fprintf ( stderr,
332              "bzip2recover 1.0.8: extracts blocks from damaged .bz2 files.\n" );
333 
334    if (argc != 2) {
335       fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
336                         progName, progName );
337       switch (sizeof(MaybeUInt64)) {
338          case 8:
339             fprintf(stderr,
340                     "\trestrictions on size of recovered file: None\n");
341             break;
342          case 4:
343             fprintf(stderr,
344                     "\trestrictions on size of recovered file: 512 MB\n");
345             fprintf(stderr,
346                     "\tto circumvent, recompile with MaybeUInt64 as an\n"
347                     "\tunsigned 64-bit int.\n");
348             break;
349          default:
350             fprintf(stderr,
351                     "\tsizeof(MaybeUInt64) is not 4 or 8 -- "
352                     "configuration error.\n");
353             break;
354       }
355       exit(1);
356    }
357 
358    if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) {
359       fprintf ( stderr,
360                 "%s: supplied filename is suspiciously (>= %d chars) long.  Bye!\n",
361                 progName, (int)strlen(argv[1]) );
362       exit(1);
363    }
364 
365    strcpy ( inFileName, argv[1] );
366 
367    inFile = fopen ( inFileName, "rb" );
368    if (inFile == NULL) {
369       fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName );
370       exit(1);
371    }
372 
373    bsIn = bsOpenReadStream ( inFile );
374    fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );
375 
376    bitsRead = 0;
377    buffHi = buffLo = 0;
378    currBlock = 0;
379    bStart[currBlock] = 0;
380 
381    rbCtr = 0;
382 
383    while (True) {
384       b = bsGetBit ( bsIn );
385       bitsRead++;
386       if (b == 2) {
387          if (bitsRead >= bStart[currBlock] &&
388             (bitsRead - bStart[currBlock]) >= 40) {
389             bEnd[currBlock] = bitsRead-1;
390             if (currBlock > 0)
391                fprintf ( stderr, "   block %d runs from " MaybeUInt64_FMT
392                                  " to " MaybeUInt64_FMT " (incomplete)\n",
393                          currBlock,  bStart[currBlock], bEnd[currBlock] );
394          } else
395             currBlock--;
396          break;
397       }
398       buffHi = (buffHi << 1) | (buffLo >> 31);
399       buffLo = (buffLo << 1) | (b & 1);
400       if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI
401              && buffLo == BLOCK_HEADER_LO)
402            ||
403            ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI
404              && buffLo == BLOCK_ENDMARK_LO)
405          ) {
406          if (bitsRead > 49) {
407             bEnd[currBlock] = bitsRead-49;
408          } else {
409             bEnd[currBlock] = 0;
410          }
411          if (currBlock > 0 &&
412 	     (bEnd[currBlock] - bStart[currBlock]) >= 130) {
413             fprintf ( stderr, "   block %d runs from " MaybeUInt64_FMT
414                               " to " MaybeUInt64_FMT "\n",
415                       rbCtr+1,  bStart[currBlock], bEnd[currBlock] );
416             rbStart[rbCtr] = bStart[currBlock];
417             rbEnd[rbCtr] = bEnd[currBlock];
418             rbCtr++;
419          }
420          if (currBlock >= BZ_MAX_HANDLED_BLOCKS)
421             tooManyBlocks(BZ_MAX_HANDLED_BLOCKS);
422          currBlock++;
423 
424          bStart[currBlock] = bitsRead;
425       }
426    }
427 
428    bsClose ( bsIn );
429 
430    /*-- identified blocks run from 1 to rbCtr inclusive. --*/
431 
432    if (rbCtr < 1) {
433       fprintf ( stderr,
434                 "%s: sorry, I couldn't find any block boundaries.\n",
435                 progName );
436       exit(1);
437    };
438 
439    fprintf ( stderr, "%s: splitting into blocks\n", progName );
440 
441    inFile = fopen ( inFileName, "rb" );
442    if (inFile == NULL) {
443       fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName );
444       exit(1);
445    }
446    bsIn = bsOpenReadStream ( inFile );
447 
448    /*-- placate gcc's dataflow analyser --*/
449    blockCRC = 0; bsWr = 0;
450 
451    bitsRead = 0;
452    outFile = NULL;
453    wrBlock = 0;
454    while (True) {
455       b = bsGetBit(bsIn);
456       if (b == 2) break;
457       buffHi = (buffHi << 1) | (buffLo >> 31);
458       buffLo = (buffLo << 1) | (b & 1);
459       if (bitsRead == 47+rbStart[wrBlock])
460          blockCRC = (buffHi << 16) | (buffLo >> 16);
461 
462       if (outFile != NULL && bitsRead >= rbStart[wrBlock]
463                           && bitsRead <= rbEnd[wrBlock]) {
464          bsPutBit ( bsWr, b );
465       }
466 
467       bitsRead++;
468 
469       if (bitsRead == rbEnd[wrBlock]+1) {
470          if (outFile != NULL) {
471             bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 );
472             bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 );
473             bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 );
474             bsPutUInt32 ( bsWr, blockCRC );
475             bsClose ( bsWr );
476             outFile = NULL;
477          }
478          if (wrBlock >= rbCtr) break;
479          wrBlock++;
480       } else
481       if (bitsRead == rbStart[wrBlock]) {
482          /* Create the output file name, correctly handling leading paths.
483             (31.10.2001 by Sergey E. Kusikov) */
484          Char* split;
485          Int32 ofs, k;
486          for (k = 0; k < BZ_MAX_FILENAME; k++)
487             outFileName[k] = 0;
488          strcpy (outFileName, inFileName);
489          split = strrchr (outFileName, BZ_SPLIT_SYM);
490          if (split == NULL) {
491             split = outFileName;
492          } else {
493             ++split;
494 	 }
495 	 /* Now split points to the start of the basename. */
496          ofs  = split - outFileName;
497          sprintf (split, "rec%5d", wrBlock+1);
498          for (p = split; *p != 0; p++) if (*p == ' ') *p = '0';
499          strcat (outFileName, inFileName + ofs);
500 
501          if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
502 
503          fprintf ( stderr, "   writing block %d to `%s' ...\n",
504                            wrBlock+1, outFileName );
505 
506          outFile = fopen ( outFileName, "wb" );
507          if (outFile == NULL) {
508             fprintf ( stderr, "%s: can't write `%s'\n",
509                       progName, outFileName );
510             exit(1);
511          }
512          bsWr = bsOpenWriteStream ( outFile );
513          bsPutUChar ( bsWr, BZ_HDR_B );
514          bsPutUChar ( bsWr, BZ_HDR_Z );
515          bsPutUChar ( bsWr, BZ_HDR_h );
516          bsPutUChar ( bsWr, BZ_HDR_0 + 9 );
517          bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 );
518          bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 );
519          bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 );
520       }
521    }
522 
523    fprintf ( stderr, "%s: finished\n", progName );
524    return 0;
525 }
526 
527 
528 
529 /*-----------------------------------------------------------*/
530 /*--- end                                  bzip2recover.c ---*/
531 /*-----------------------------------------------------------*/
532