1
2 /*-----------------------------------------------------------*/
3 /*--- Block recoverer program for bzip2 ---*/
4 /*--- bzip2recover.c ---*/
5 /*-----------------------------------------------------------*/
6
7 /*--
8 This program is bzip2recover, a program to attempt data
9 salvage from damaged files created by the accompanying
10 bzip2-1.0 program.
11
12 Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
13
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions
16 are met:
17
18 1. Redistributions of source code must retain the above copyright
19 notice, this list of conditions and the following disclaimer.
20
21 2. The origin of this software must not be misrepresented; you must
22 not claim that you wrote the original software. If you use this
23 software in a product, an acknowledgment in the product
24 documentation would be appreciated but is not required.
25
26 3. Altered source versions must be plainly marked as such, and must
27 not be misrepresented as being the original software.
28
29 4. The name of the author may not be used to endorse or promote
30 products derived from this software without specific prior written
31 permission.
32
33 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
34 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
37 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
39 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
41 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
42 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
43 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44
45 Julian Seward, Cambridge, UK.
46 jseward@acm.org
47 bzip2/libbzip2 version 1.0 of 21 March 2000
48 --*/
49
50 /*--
51 This program is a complete hack and should be rewritten
52 properly. It isn't very complicated.
53 --*/
54
55 #include <stdio.h>
56 #include <errno.h>
57 #include <stdlib.h>
58 #include <string.h>
59
60 typedef unsigned int UInt32;
61 typedef int Int32;
62 typedef unsigned char UChar;
63 typedef char Char;
64 typedef unsigned char Bool;
65 #define True ((Bool)1)
66 #define False ((Bool)0)
67
68
69 Char inFileName[2000];
70 Char outFileName[2000];
71 Char progName[2000];
72
73 UInt32 bytesOut = 0;
74 UInt32 bytesIn = 0;
75
76
77 /*---------------------------------------------------*/
78 /*--- I/O errors ---*/
79 /*---------------------------------------------------*/
80
81 /*---------------------------------------------*/
readError(void)82 void readError ( void )
83 {
84 fprintf ( stderr,
85 "%s: I/O error reading `%s', possible reason follows.\n",
86 progName, inFileName );
87 perror ( progName );
88 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
89 progName );
90 exit ( 1 );
91 }
92
93
94 /*---------------------------------------------*/
writeError(void)95 void writeError ( void )
96 {
97 fprintf ( stderr,
98 "%s: I/O error reading `%s', possible reason follows.\n",
99 progName, inFileName );
100 perror ( progName );
101 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
102 progName );
103 exit ( 1 );
104 }
105
106
107 /*---------------------------------------------*/
mallocFail(Int32 n)108 void mallocFail ( Int32 n )
109 {
110 fprintf ( stderr,
111 "%s: malloc failed on request for %d bytes.\n",
112 progName, n );
113 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
114 progName );
115 exit ( 1 );
116 }
117
118
119 /*---------------------------------------------------*/
120 /*--- Bit stream I/O ---*/
121 /*---------------------------------------------------*/
122
123 typedef
124 struct {
125 FILE* handle;
126 Int32 buffer;
127 Int32 buffLive;
128 Char mode;
129 }
130 BitStream;
131
132
133 /*---------------------------------------------*/
bsOpenReadStream(FILE * stream)134 BitStream* bsOpenReadStream ( FILE* stream )
135 {
136 BitStream *bs = malloc ( sizeof(BitStream) );
137 if (bs == NULL) mallocFail ( sizeof(BitStream) );
138 bs->handle = stream;
139 bs->buffer = 0;
140 bs->buffLive = 0;
141 bs->mode = 'r';
142 return bs;
143 }
144
145
146 /*---------------------------------------------*/
bsOpenWriteStream(FILE * stream)147 BitStream* bsOpenWriteStream ( FILE* stream )
148 {
149 BitStream *bs = malloc ( sizeof(BitStream) );
150 if (bs == NULL) mallocFail ( sizeof(BitStream) );
151 bs->handle = stream;
152 bs->buffer = 0;
153 bs->buffLive = 0;
154 bs->mode = 'w';
155 return bs;
156 }
157
158
159 /*---------------------------------------------*/
bsPutBit(BitStream * bs,Int32 bit)160 void bsPutBit ( BitStream* bs, Int32 bit )
161 {
162 if (bs->buffLive == 8) {
163 Int32 retVal = putc ( (UChar) bs->buffer, bs->handle );
164 if (retVal == EOF) writeError();
165 bytesOut++;
166 bs->buffLive = 1;
167 bs->buffer = bit & 0x1;
168 } else {
169 bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) );
170 bs->buffLive++;
171 };
172 }
173
174
175 /*---------------------------------------------*/
176 /*--
177 Returns 0 or 1, or 2 to indicate EOF.
178 --*/
bsGetBit(BitStream * bs)179 Int32 bsGetBit ( BitStream* bs )
180 {
181 if (bs->buffLive > 0) {
182 bs->buffLive --;
183 return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 );
184 } else {
185 Int32 retVal = getc ( bs->handle );
186 if ( retVal == EOF ) {
187 if (errno != 0) readError();
188 return 2;
189 }
190 bs->buffLive = 7;
191 bs->buffer = retVal;
192 return ( ((bs->buffer) >> 7) & 0x1 );
193 }
194 }
195
196
197 /*---------------------------------------------*/
bsClose(BitStream * bs)198 void bsClose ( BitStream* bs )
199 {
200 Int32 retVal;
201
202 if ( bs->mode == 'w' ) {
203 while ( bs->buffLive < 8 ) {
204 bs->buffLive++;
205 bs->buffer <<= 1;
206 };
207 retVal = putc ( (UChar) (bs->buffer), bs->handle );
208 if (retVal == EOF) writeError();
209 bytesOut++;
210 retVal = fflush ( bs->handle );
211 if (retVal == EOF) writeError();
212 }
213 retVal = fclose ( bs->handle );
214 if (retVal == EOF) {
215 if (bs->mode == 'w') writeError(); else readError();
216 }
217 free ( bs );
218 }
219
220
221 /*---------------------------------------------*/
bsPutUChar(BitStream * bs,UChar c)222 void bsPutUChar ( BitStream* bs, UChar c )
223 {
224 Int32 i;
225 for (i = 7; i >= 0; i--)
226 bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 );
227 }
228
229
230 /*---------------------------------------------*/
bsPutUInt32(BitStream * bs,UInt32 c)231 void bsPutUInt32 ( BitStream* bs, UInt32 c )
232 {
233 Int32 i;
234
235 for (i = 31; i >= 0; i--)
236 bsPutBit ( bs, (c >> i) & 0x1 );
237 }
238
239
240 /*---------------------------------------------*/
endsInBz2(Char * name)241 Bool endsInBz2 ( Char* name )
242 {
243 Int32 n = strlen ( name );
244 if (n <= 4) return False;
245 return
246 (name[n-4] == '.' &&
247 name[n-3] == 'b' &&
248 name[n-2] == 'z' &&
249 name[n-1] == '2');
250 }
251
252
253 /*---------------------------------------------------*/
254 /*--- ---*/
255 /*---------------------------------------------------*/
256
257 #define BLOCK_HEADER_HI 0x00003141UL
258 #define BLOCK_HEADER_LO 0x59265359UL
259
260 #define BLOCK_ENDMARK_HI 0x00001772UL
261 #define BLOCK_ENDMARK_LO 0x45385090UL
262
263
264 UInt32 bStart[20000];
265 UInt32 bEnd[20000];
266 UInt32 rbStart[20000];
267 UInt32 rbEnd[20000];
268
main(Int32 argc,Char ** argv)269 Int32 main ( Int32 argc, Char** argv )
270 {
271 FILE* inFile;
272 FILE* outFile;
273 BitStream* bsIn, *bsWr;
274 Int32 currBlock, b, wrBlock;
275 UInt32 bitsRead;
276 Int32 rbCtr;
277
278
279 UInt32 buffHi, buffLo, blockCRC;
280 Char* p;
281
282 strncpy ( progName, argv[0], sizeof progName );
283 progName[sizeof progName-1] = 0;
284 inFileName[0] = outFileName[0] = 0;
285
286 fprintf ( stderr, "bzip2recover 1.0: extracts blocks from damaged .bz2 files.\n" );
287
288 if (argc != 2) {
289 fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
290 progName, progName );
291 exit(1);
292 }
293
294 strcpy ( inFileName, argv[1] );
295
296 inFile = fopen ( inFileName, "rb" );
297 if (inFile == NULL) {
298 fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName );
299 exit(1);
300 }
301
302 bsIn = bsOpenReadStream ( inFile );
303 fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );
304
305 bitsRead = 0;
306 buffHi = buffLo = 0;
307 currBlock = 0;
308 bStart[currBlock] = 0;
309
310 rbCtr = 0;
311
312 while (True) {
313 b = bsGetBit ( bsIn );
314 bitsRead++;
315 if (b == 2) {
316 if (bitsRead >= bStart[currBlock] &&
317 (bitsRead - bStart[currBlock]) >= 40) {
318 bEnd[currBlock] = bitsRead-1;
319 if (currBlock > 0)
320 fprintf ( stderr, " block %d runs from %d to %d (incomplete)\n",
321 currBlock, bStart[currBlock], bEnd[currBlock] );
322 } else
323 currBlock--;
324 break;
325 }
326 buffHi = (buffHi << 1) | (buffLo >> 31);
327 buffLo = (buffLo << 1) | (b & 1);
328 if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI
329 && buffLo == BLOCK_HEADER_LO)
330 ||
331 ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI
332 && buffLo == BLOCK_ENDMARK_LO)
333 ) {
334 if (bitsRead > 49)
335 bEnd[currBlock] = bitsRead-49; else
336 bEnd[currBlock] = 0;
337 if (currBlock > 0 &&
338 (bEnd[currBlock] - bStart[currBlock]) >= 130) {
339 fprintf ( stderr, " block %d runs from %d to %d\n",
340 rbCtr+1, bStart[currBlock], bEnd[currBlock] );
341 rbStart[rbCtr] = bStart[currBlock];
342 rbEnd[rbCtr] = bEnd[currBlock];
343 rbCtr++;
344 }
345 currBlock++;
346
347 bStart[currBlock] = bitsRead;
348 }
349 }
350
351 bsClose ( bsIn );
352
353 /*-- identified blocks run from 1 to rbCtr inclusive. --*/
354
355 if (rbCtr < 1) {
356 fprintf ( stderr,
357 "%s: sorry, I couldn't find any block boundaries.\n",
358 progName );
359 exit(1);
360 };
361
362 fprintf ( stderr, "%s: splitting into blocks\n", progName );
363
364 inFile = fopen ( inFileName, "rb" );
365 if (inFile == NULL) {
366 fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName );
367 exit(1);
368 }
369 bsIn = bsOpenReadStream ( inFile );
370
371 /*-- placate gcc's dataflow analyser --*/
372 blockCRC = 0; bsWr = 0;
373
374 bitsRead = 0;
375 outFile = NULL;
376 wrBlock = 0;
377 while (True) {
378 b = bsGetBit(bsIn);
379 if (b == 2) break;
380 buffHi = (buffHi << 1) | (buffLo >> 31);
381 buffLo = (buffLo << 1) | (b & 1);
382 if (bitsRead == 47+rbStart[wrBlock])
383 blockCRC = (buffHi << 16) | (buffLo >> 16);
384
385 if (outFile != NULL && bitsRead >= rbStart[wrBlock]
386 && bitsRead <= rbEnd[wrBlock]) {
387 bsPutBit ( bsWr, b );
388 }
389
390 bitsRead++;
391
392 if (bitsRead == rbEnd[wrBlock]+1) {
393 if (outFile != NULL) {
394 bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 );
395 bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 );
396 bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 );
397 bsPutUInt32 ( bsWr, blockCRC );
398 bsClose ( bsWr );
399 }
400 if (wrBlock >= rbCtr) break;
401 wrBlock++;
402 } else
403 if (bitsRead == rbStart[wrBlock]) {
404 outFileName[0] = 0;
405 sprintf ( outFileName, "rec%4d", wrBlock+1 );
406 for (p = outFileName; *p != 0; p++) if (*p == ' ') *p = '0';
407 strcat ( outFileName, inFileName );
408 if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
409
410 fprintf ( stderr, " writing block %d to `%s' ...\n",
411 wrBlock+1, outFileName );
412
413 outFile = fopen ( outFileName, "wb" );
414 if (outFile == NULL) {
415 fprintf ( stderr, "%s: can't write `%s'\n",
416 progName, outFileName );
417 exit(1);
418 }
419 bsWr = bsOpenWriteStream ( outFile );
420 bsPutUChar ( bsWr, 'B' ); bsPutUChar ( bsWr, 'Z' );
421 bsPutUChar ( bsWr, 'h' ); bsPutUChar ( bsWr, '9' );
422 bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 );
423 bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 );
424 bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 );
425 }
426 }
427
428 fprintf ( stderr, "%s: finished\n", progName );
429 return 0;
430 }
431
432
433
434 /*-----------------------------------------------------------*/
435 /*--- end bzip2recover.c ---*/
436 /*-----------------------------------------------------------*/
437