1 /* $NetBSD: bzip2recover.c,v 1.5 2019/07/21 11:52:14 maya Exp $ */
2
3 /*-----------------------------------------------------------*/
4 /*--- Block recoverer program for bzip2 ---*/
5 /*--- bzip2recover.c ---*/
6 /*-----------------------------------------------------------*/
7
8 /* ------------------------------------------------------------------
9 This file is part of bzip2/libbzip2, a program and library for
10 lossless, block-sorting data compression.
11
12 bzip2/libbzip2 version 1.0.8 of 13 July 2019
13 Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
14
15 Please read the WARNING, DISCLAIMER and PATENTS sections in the
16 README file.
17
18 This program is released under the terms of the license contained
19 in the file LICENSE.
20 ------------------------------------------------------------------ */
21
22 /* This program is a complete hack and should be rewritten properly.
23 It isn't very complicated. */
24
25 #include <inttypes.h>
26 #include <stdio.h>
27 #include <errno.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31
32 /* This program records bit locations in the file to be recovered.
33 That means that if 64-bit ints are not supported, we will not
34 be able to recover .bz2 files over 512MB (2^32 bits) long.
35 On GNU supported platforms, we take advantage of the 64-bit
36 int support to circumvent this problem. Ditto MSVC.
37
38 This change occurred in version 1.0.2; all prior versions have
39 the 512MB limitation.
40 */
41 #if 1
42 typedef uint64_t MaybeUInt64;
43 # define MaybeUInt64_FMT "%" PRIu64
44 #else
45 #ifdef _MSC_VER
46 typedef unsigned __int64 MaybeUInt64;
47 # define MaybeUInt64_FMT "%I64u"
48 #else
49 typedef unsigned int MaybeUInt64;
50 # define MaybeUInt64_FMT "%u"
51 #endif
52 #endif
53
54 typedef unsigned int UInt32;
55 typedef int Int32;
56 typedef unsigned char UChar;
57 typedef char Char;
58 typedef unsigned char Bool;
59 #define True ((Bool)1)
60 #define False ((Bool)0)
61
62
63 #define BZ_MAX_FILENAME 2000
64
65 Char inFileName[BZ_MAX_FILENAME];
66 Char outFileName[BZ_MAX_FILENAME];
67 Char progName[BZ_MAX_FILENAME];
68
69 MaybeUInt64 bytesOut = 0;
70 MaybeUInt64 bytesIn = 0;
71
72 /*---------------------------------------------------*/
73 /*--- Bit stream I/O ---*/
74 /*---------------------------------------------------*/
75
76 typedef
77 struct {
78 FILE* handle;
79 Int32 buffer;
80 Int32 buffLive;
81 Char mode;
82 }
83 BitStream;
84
85 static void readError ( void );
86 static void writeError ( void );
87 static void mallocFail ( Int32 n );
88 static BitStream* bsOpenReadStream ( FILE* stream );
89 static BitStream* bsOpenWriteStream ( FILE* stream );
90 static void bsPutBit ( BitStream* bs, Int32 bit );
91 static Int32 bsGetBit ( BitStream* bs );
92 static void bsClose ( BitStream* bs );
93 static void bsPutUChar ( BitStream* bs, UChar c );
94 static void bsPutUInt32 ( BitStream* bs, UInt32 c );
95 static Bool endsInBz2 ( Char* name );
96 static void tooManyBlocks ( Int32 max_handled_blocks );
97
98
99 /*---------------------------------------------------*/
100 /*--- Header bytes ---*/
101 /*---------------------------------------------------*/
102
103 #define BZ_HDR_B 0x42 /* 'B' */
104 #define BZ_HDR_Z 0x5a /* 'Z' */
105 #define BZ_HDR_h 0x68 /* 'h' */
106 #define BZ_HDR_0 0x30 /* '0' */
107
108
109 /*---------------------------------------------------*/
110 /*--- I/O errors ---*/
111 /*---------------------------------------------------*/
112
113 /*---------------------------------------------*/
readError(void)114 __dead static void readError ( void )
115 {
116 fprintf ( stderr,
117 "%s: I/O error reading `%s', possible reason follows.\n",
118 progName, inFileName );
119 perror ( progName );
120 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
121 progName );
122 exit ( 1 );
123 }
124
125
126 /*---------------------------------------------*/
writeError(void)127 __dead static void writeError ( void )
128 {
129 fprintf ( stderr,
130 "%s: I/O error reading `%s', possible reason follows.\n",
131 progName, inFileName );
132 perror ( progName );
133 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
134 progName );
135 exit ( 1 );
136 }
137
138
139 /*---------------------------------------------*/
mallocFail(Int32 n)140 __dead static void mallocFail ( Int32 n )
141 {
142 fprintf ( stderr,
143 "%s: malloc failed on request for %d bytes.\n",
144 progName, n );
145 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
146 progName );
147 exit ( 1 );
148 }
149
150
151 /*---------------------------------------------*/
tooManyBlocks(Int32 max_handled_blocks)152 __dead static void tooManyBlocks ( Int32 max_handled_blocks )
153 {
154 fprintf ( stderr,
155 "%s: `%s' appears to contain more than %d blocks\n",
156 progName, inFileName, max_handled_blocks );
157 fprintf ( stderr,
158 "%s: and cannot be handled. To fix, increase\n",
159 progName );
160 fprintf ( stderr,
161 "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n",
162 progName );
163 exit ( 1 );
164 }
165
166
167
168 /*---------------------------------------------*/
bsOpenReadStream(FILE * stream)169 static BitStream* bsOpenReadStream ( FILE* stream )
170 {
171 BitStream *bs = malloc ( sizeof(BitStream) );
172 if (bs == NULL) mallocFail ( sizeof(BitStream) );
173 bs->handle = stream;
174 bs->buffer = 0;
175 bs->buffLive = 0;
176 bs->mode = 'r';
177 return bs;
178 }
179
180
181 /*---------------------------------------------*/
bsOpenWriteStream(FILE * stream)182 static BitStream* bsOpenWriteStream ( FILE* stream )
183 {
184 BitStream *bs = malloc ( sizeof(BitStream) );
185 if (bs == NULL) mallocFail ( sizeof(BitStream) );
186 bs->handle = stream;
187 bs->buffer = 0;
188 bs->buffLive = 0;
189 bs->mode = 'w';
190 return bs;
191 }
192
193
194 /*---------------------------------------------*/
bsPutBit(BitStream * bs,Int32 bit)195 static void bsPutBit ( BitStream* bs, Int32 bit )
196 {
197 if (bs->buffLive == 8) {
198 Int32 retVal = putc ( (UChar) bs->buffer, bs->handle );
199 if (retVal == EOF) writeError();
200 bytesOut++;
201 bs->buffLive = 1;
202 bs->buffer = bit & 0x1;
203 } else {
204 bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) );
205 bs->buffLive++;
206 };
207 }
208
209
210 /*---------------------------------------------*/
211 /*--
212 Returns 0 or 1, or 2 to indicate EOF.
213 --*/
bsGetBit(BitStream * bs)214 static Int32 bsGetBit ( BitStream* bs )
215 {
216 if (bs->buffLive > 0) {
217 bs->buffLive --;
218 return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 );
219 } else {
220 Int32 retVal = getc ( bs->handle );
221 if ( retVal == EOF ) {
222 if (errno != 0) readError();
223 return 2;
224 }
225 bs->buffLive = 7;
226 bs->buffer = retVal;
227 return ( ((bs->buffer) >> 7) & 0x1 );
228 }
229 }
230
231
232 /*---------------------------------------------*/
bsClose(BitStream * bs)233 static void bsClose ( BitStream* bs )
234 {
235 Int32 retVal;
236
237 if ( bs->mode == 'w' ) {
238 while ( bs->buffLive < 8 ) {
239 bs->buffLive++;
240 bs->buffer <<= 1;
241 };
242 retVal = putc ( (UChar) (bs->buffer), bs->handle );
243 if (retVal == EOF) writeError();
244 bytesOut++;
245 retVal = fflush ( bs->handle );
246 if (retVal == EOF) writeError();
247 }
248 retVal = fclose ( bs->handle );
249 if (retVal == EOF) {
250 if (bs->mode == 'w') writeError(); else readError();
251 }
252 free ( bs );
253 }
254
255
256 /*---------------------------------------------*/
bsPutUChar(BitStream * bs,UChar c)257 static void bsPutUChar ( BitStream* bs, UChar c )
258 {
259 Int32 i;
260 for (i = 7; i >= 0; i--)
261 bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 );
262 }
263
264
265 /*---------------------------------------------*/
bsPutUInt32(BitStream * bs,UInt32 c)266 static void bsPutUInt32 ( BitStream* bs, UInt32 c )
267 {
268 Int32 i;
269
270 for (i = 31; i >= 0; i--)
271 bsPutBit ( bs, (c >> i) & 0x1 );
272 }
273
274
275 /*---------------------------------------------*/
endsInBz2(Char * name)276 static Bool endsInBz2 ( Char* name )
277 {
278 Int32 n = strlen ( name );
279 if (n <= 4) return False;
280 return
281 (name[n-4] == '.' &&
282 name[n-3] == 'b' &&
283 name[n-2] == 'z' &&
284 name[n-1] == '2');
285 }
286
287
288 /*---------------------------------------------------*/
289 /*--- ---*/
290 /*---------------------------------------------------*/
291
292 /* This logic isn't really right when it comes to Cygwin. */
293 #ifdef _WIN32
294 # define BZ_SPLIT_SYM '\\' /* path splitter on Windows platform */
295 #else
296 # define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */
297 #endif
298
299 #define BLOCK_HEADER_HI 0x00003141UL
300 #define BLOCK_HEADER_LO 0x59265359UL
301
302 #define BLOCK_ENDMARK_HI 0x00001772UL
303 #define BLOCK_ENDMARK_LO 0x45385090UL
304
305 /* Increase if necessary. However, a .bz2 file with > 50000 blocks
306 would have an uncompressed size of at least 40GB, so the chances
307 are low you'll need to up this.
308 */
309 #define BZ_MAX_HANDLED_BLOCKS 50000
310
311 MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS];
312 MaybeUInt64 bEnd [BZ_MAX_HANDLED_BLOCKS];
313 MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS];
314 MaybeUInt64 rbEnd [BZ_MAX_HANDLED_BLOCKS];
315
main(Int32 argc,Char ** argv)316 Int32 main ( Int32 argc, Char** argv )
317 {
318 FILE* inFile;
319 FILE* outFile;
320 BitStream* bsIn, *bsWr;
321 Int32 b, wrBlock, currBlock, rbCtr;
322 MaybeUInt64 bitsRead;
323
324 UInt32 buffHi, buffLo, blockCRC;
325 Char* p;
326
327 strncpy ( progName, argv[0], BZ_MAX_FILENAME-1);
328 progName[BZ_MAX_FILENAME-1]='\0';
329 inFileName[0] = outFileName[0] = 0;
330
331 fprintf ( stderr,
332 "bzip2recover 1.0.8: extracts blocks from damaged .bz2 files.\n" );
333
334 if (argc != 2) {
335 fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
336 progName, progName );
337 switch (sizeof(MaybeUInt64)) {
338 case 8:
339 fprintf(stderr,
340 "\trestrictions on size of recovered file: None\n");
341 break;
342 case 4:
343 fprintf(stderr,
344 "\trestrictions on size of recovered file: 512 MB\n");
345 fprintf(stderr,
346 "\tto circumvent, recompile with MaybeUInt64 as an\n"
347 "\tunsigned 64-bit int.\n");
348 break;
349 default:
350 fprintf(stderr,
351 "\tsizeof(MaybeUInt64) is not 4 or 8 -- "
352 "configuration error.\n");
353 break;
354 }
355 exit(1);
356 }
357
358 if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) {
359 fprintf ( stderr,
360 "%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n",
361 progName, (int)strlen(argv[1]) );
362 exit(1);
363 }
364
365 strcpy ( inFileName, argv[1] );
366
367 inFile = fopen ( inFileName, "rb" );
368 if (inFile == NULL) {
369 fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName );
370 exit(1);
371 }
372
373 bsIn = bsOpenReadStream ( inFile );
374 fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );
375
376 bitsRead = 0;
377 buffHi = buffLo = 0;
378 currBlock = 0;
379 bStart[currBlock] = 0;
380
381 rbCtr = 0;
382
383 while (True) {
384 b = bsGetBit ( bsIn );
385 bitsRead++;
386 if (b == 2) {
387 if (bitsRead >= bStart[currBlock] &&
388 (bitsRead - bStart[currBlock]) >= 40) {
389 bEnd[currBlock] = bitsRead-1;
390 if (currBlock > 0)
391 fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT
392 " to " MaybeUInt64_FMT " (incomplete)\n",
393 currBlock, bStart[currBlock], bEnd[currBlock] );
394 } else
395 currBlock--;
396 break;
397 }
398 buffHi = (buffHi << 1) | (buffLo >> 31);
399 buffLo = (buffLo << 1) | (b & 1);
400 if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI
401 && buffLo == BLOCK_HEADER_LO)
402 ||
403 ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI
404 && buffLo == BLOCK_ENDMARK_LO)
405 ) {
406 if (bitsRead > 49) {
407 bEnd[currBlock] = bitsRead-49;
408 } else {
409 bEnd[currBlock] = 0;
410 }
411 if (currBlock > 0 &&
412 (bEnd[currBlock] - bStart[currBlock]) >= 130) {
413 fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT
414 " to " MaybeUInt64_FMT "\n",
415 rbCtr+1, bStart[currBlock], bEnd[currBlock] );
416 rbStart[rbCtr] = bStart[currBlock];
417 rbEnd[rbCtr] = bEnd[currBlock];
418 rbCtr++;
419 }
420 if (currBlock >= BZ_MAX_HANDLED_BLOCKS)
421 tooManyBlocks(BZ_MAX_HANDLED_BLOCKS);
422 currBlock++;
423
424 bStart[currBlock] = bitsRead;
425 }
426 }
427
428 bsClose ( bsIn );
429
430 /*-- identified blocks run from 1 to rbCtr inclusive. --*/
431
432 if (rbCtr < 1) {
433 fprintf ( stderr,
434 "%s: sorry, I couldn't find any block boundaries.\n",
435 progName );
436 exit(1);
437 };
438
439 fprintf ( stderr, "%s: splitting into blocks\n", progName );
440
441 inFile = fopen ( inFileName, "rb" );
442 if (inFile == NULL) {
443 fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName );
444 exit(1);
445 }
446 bsIn = bsOpenReadStream ( inFile );
447
448 /*-- placate gcc's dataflow analyser --*/
449 blockCRC = 0; bsWr = 0;
450
451 bitsRead = 0;
452 outFile = NULL;
453 wrBlock = 0;
454 while (True) {
455 b = bsGetBit(bsIn);
456 if (b == 2) break;
457 buffHi = (buffHi << 1) | (buffLo >> 31);
458 buffLo = (buffLo << 1) | (b & 1);
459 if (bitsRead == 47+rbStart[wrBlock])
460 blockCRC = (buffHi << 16) | (buffLo >> 16);
461
462 if (outFile != NULL && bitsRead >= rbStart[wrBlock]
463 && bitsRead <= rbEnd[wrBlock]) {
464 bsPutBit ( bsWr, b );
465 }
466
467 bitsRead++;
468
469 if (bitsRead == rbEnd[wrBlock]+1) {
470 if (outFile != NULL) {
471 bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 );
472 bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 );
473 bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 );
474 bsPutUInt32 ( bsWr, blockCRC );
475 bsClose ( bsWr );
476 outFile = NULL;
477 }
478 if (wrBlock >= rbCtr) break;
479 wrBlock++;
480 } else
481 if (bitsRead == rbStart[wrBlock]) {
482 /* Create the output file name, correctly handling leading paths.
483 (31.10.2001 by Sergey E. Kusikov) */
484 Char* split;
485 Int32 ofs, k;
486 for (k = 0; k < BZ_MAX_FILENAME; k++)
487 outFileName[k] = 0;
488 strcpy (outFileName, inFileName);
489 split = strrchr (outFileName, BZ_SPLIT_SYM);
490 if (split == NULL) {
491 split = outFileName;
492 } else {
493 ++split;
494 }
495 /* Now split points to the start of the basename. */
496 ofs = split - outFileName;
497 sprintf (split, "rec%5d", wrBlock+1);
498 for (p = split; *p != 0; p++) if (*p == ' ') *p = '0';
499 strcat (outFileName, inFileName + ofs);
500
501 if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
502
503 fprintf ( stderr, " writing block %d to `%s' ...\n",
504 wrBlock+1, outFileName );
505
506 outFile = fopen ( outFileName, "wb" );
507 if (outFile == NULL) {
508 fprintf ( stderr, "%s: can't write `%s'\n",
509 progName, outFileName );
510 exit(1);
511 }
512 bsWr = bsOpenWriteStream ( outFile );
513 bsPutUChar ( bsWr, BZ_HDR_B );
514 bsPutUChar ( bsWr, BZ_HDR_Z );
515 bsPutUChar ( bsWr, BZ_HDR_h );
516 bsPutUChar ( bsWr, BZ_HDR_0 + 9 );
517 bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 );
518 bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 );
519 bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 );
520 }
521 }
522
523 fprintf ( stderr, "%s: finished\n", progName );
524 return 0;
525 }
526
527
528
529 /*-----------------------------------------------------------*/
530 /*--- end bzip2recover.c ---*/
531 /*-----------------------------------------------------------*/
532