1 /*- 2 * Copyright (c) 2018 Grzegorz Antoniak (http://antoniak.org) 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include "archive_platform.h" 27 #include "archive_endian.h" 28 29 #ifdef HAVE_ERRNO_H 30 #include <errno.h> 31 #endif 32 #include <time.h> 33 #ifdef HAVE_ZLIB_H 34 #include <zlib.h> /* crc32 */ 35 #endif 36 #ifdef HAVE_LIMITS_H 37 #include <limits.h> 38 #endif 39 40 #include "archive.h" 41 #ifndef HAVE_ZLIB_H 42 #include "archive_crc32.h" 43 #endif 44 45 #include "archive_entry.h" 46 #include "archive_entry_locale.h" 47 #include "archive_ppmd7_private.h" 48 #include "archive_entry_private.h" 49 50 #ifdef HAVE_BLAKE2_H 51 #include <blake2.h> 52 #else 53 #include "archive_blake2.h" 54 #endif 55 56 /*#define CHECK_CRC_ON_SOLID_SKIP*/ 57 /*#define DONT_FAIL_ON_CRC_ERROR*/ 58 /*#define DEBUG*/ 59 60 #define rar5_min(a, b) (((a) > (b)) ? (b) : (a)) 61 #define rar5_max(a, b) (((a) > (b)) ? (a) : (b)) 62 #define rar5_countof(X) ((const ssize_t) (sizeof(X) / sizeof(*X))) 63 64 #if defined DEBUG 65 #define DEBUG_CODE if(1) 66 #define LOG(...) do { printf("rar5: " __VA_ARGS__); puts(""); } while(0) 67 #else 68 #define DEBUG_CODE if(0) 69 #endif 70 71 /* Real RAR5 magic number is: 72 * 73 * 0x52, 0x61, 0x72, 0x21, 0x1a, 0x07, 0x01, 0x00 74 * "Rar!→•☺·\x00" 75 * 76 * Retrieved with `rar5_signature()` by XOR'ing it with 0xA1, because I don't 77 * want to put this magic sequence in each binary that uses libarchive, so 78 * applications that scan through the file for this marker won't trigger on 79 * this "false" one. 80 * 81 * The array itself is decrypted in `rar5_init` function. */ 82 83 static unsigned char rar5_signature_xor[] = { 243, 192, 211, 128, 187, 166, 160, 161 }; 84 static const size_t g_unpack_window_size = 0x20000; 85 86 /* These could have been static const's, but they aren't, because of 87 * Visual Studio. */ 88 #define MAX_NAME_IN_CHARS 2048 89 #define MAX_NAME_IN_BYTES (4 * MAX_NAME_IN_CHARS) 90 91 struct file_header { 92 ssize_t bytes_remaining; 93 ssize_t unpacked_size; 94 int64_t last_offset; /* Used in sanity checks. */ 95 int64_t last_size; /* Used in sanity checks. */ 96 97 uint8_t solid : 1; /* Is this a solid stream? */ 98 uint8_t service : 1; /* Is this file a service data? */ 99 uint8_t eof : 1; /* Did we finish unpacking the file? */ 100 uint8_t dir : 1; /* Is this file entry a directory? */ 101 102 /* Optional time fields. */ 103 uint64_t e_mtime; 104 uint64_t e_ctime; 105 uint64_t e_atime; 106 uint32_t e_unix_ns; 107 108 /* Optional hash fields. */ 109 uint32_t stored_crc32; 110 uint32_t calculated_crc32; 111 uint8_t blake2sp[32]; 112 blake2sp_state b2state; 113 char has_blake2; 114 115 /* Optional redir fields */ 116 uint64_t redir_type; 117 uint64_t redir_flags; 118 119 ssize_t solid_window_size; /* Used in file format check. */ 120 }; 121 122 enum EXTRA { 123 EX_CRYPT = 0x01, 124 EX_HASH = 0x02, 125 EX_HTIME = 0x03, 126 EX_VERSION = 0x04, 127 EX_REDIR = 0x05, 128 EX_UOWNER = 0x06, 129 EX_SUBDATA = 0x07 130 }; 131 132 #define REDIR_SYMLINK_IS_DIR 1 133 134 enum REDIR_TYPE { 135 REDIR_TYPE_NONE = 0, 136 REDIR_TYPE_UNIXSYMLINK = 1, 137 REDIR_TYPE_WINSYMLINK = 2, 138 REDIR_TYPE_JUNCTION = 3, 139 REDIR_TYPE_HARDLINK = 4, 140 REDIR_TYPE_FILECOPY = 5, 141 }; 142 143 #define OWNER_USER_NAME 0x01 144 #define OWNER_GROUP_NAME 0x02 145 #define OWNER_USER_UID 0x04 146 #define OWNER_GROUP_GID 0x08 147 #define OWNER_MAXNAMELEN 256 148 149 enum FILTER_TYPE { 150 FILTER_DELTA = 0, /* Generic pattern. */ 151 FILTER_E8 = 1, /* Intel x86 code. */ 152 FILTER_E8E9 = 2, /* Intel x86 code. */ 153 FILTER_ARM = 3, /* ARM code. */ 154 FILTER_AUDIO = 4, /* Audio filter, not used in RARv5. */ 155 FILTER_RGB = 5, /* Color palette, not used in RARv5. */ 156 FILTER_ITANIUM = 6, /* Intel's Itanium, not used in RARv5. */ 157 FILTER_PPM = 7, /* Predictive pattern matching, not used in 158 RARv5. */ 159 FILTER_NONE = 8, 160 }; 161 162 struct filter_info { 163 int type; 164 int channels; 165 int pos_r; 166 167 int64_t block_start; 168 ssize_t block_length; 169 uint16_t width; 170 }; 171 172 struct data_ready { 173 char used; 174 const uint8_t* buf; 175 size_t size; 176 int64_t offset; 177 }; 178 179 struct cdeque { 180 uint16_t beg_pos; 181 uint16_t end_pos; 182 uint16_t cap_mask; 183 uint16_t size; 184 size_t* arr; 185 }; 186 187 struct decode_table { 188 uint32_t size; 189 int32_t decode_len[16]; 190 uint32_t decode_pos[16]; 191 uint32_t quick_bits; 192 uint8_t quick_len[1 << 10]; 193 uint16_t quick_num[1 << 10]; 194 uint16_t decode_num[306]; 195 }; 196 197 struct comp_state { 198 /* Flag used to specify if unpacker needs to reinitialize the 199 uncompression context. */ 200 uint8_t initialized : 1; 201 202 /* Flag used when applying filters. */ 203 uint8_t all_filters_applied : 1; 204 205 /* Flag used to skip file context reinitialization, used when unpacker 206 is skipping through different multivolume archives. */ 207 uint8_t switch_multivolume : 1; 208 209 /* Flag used to specify if unpacker has processed the whole data block 210 or just a part of it. */ 211 uint8_t block_parsing_finished : 1; 212 213 /* Flag used to indicate that a previous file using this buffer was 214 encrypted, meaning no data in the buffer can be trusted */ 215 uint8_t data_encrypted : 1; 216 217 signed int notused : 3; 218 219 int flags; /* Uncompression flags. */ 220 int method; /* Uncompression algorithm method. */ 221 int version; /* Uncompression algorithm version. */ 222 ssize_t window_size; /* Size of window_buf. */ 223 uint8_t* window_buf; /* Circular buffer used during 224 decompression. */ 225 uint8_t* filtered_buf; /* Buffer used when applying filters. */ 226 const uint8_t* block_buf; /* Buffer used when merging blocks. */ 227 ssize_t window_mask; /* Convenience field; window_size - 1. */ 228 int64_t write_ptr; /* This amount of data has been unpacked 229 in the window buffer. */ 230 int64_t last_write_ptr; /* This amount of data has been stored in 231 the output file. */ 232 int64_t last_unstore_ptr; /* Counter of bytes extracted during 233 unstoring. This is separate from 234 last_write_ptr because of how SERVICE 235 base blocks are handled during skipping 236 in solid multiarchive archives. */ 237 int64_t solid_offset; /* Additional offset inside the window 238 buffer, used in unpacking solid 239 archives. */ 240 ssize_t cur_block_size; /* Size of current data block. */ 241 int last_len; /* Flag used in lzss decompression. */ 242 243 /* Decode tables used during lzss uncompression. */ 244 245 #define HUFF_BC 20 246 struct decode_table bd; /* huffman bit lengths */ 247 #define HUFF_NC 306 248 struct decode_table ld; /* literals */ 249 #define HUFF_DC 64 250 struct decode_table dd; /* distances */ 251 #define HUFF_LDC 16 252 struct decode_table ldd; /* lower bits of distances */ 253 #define HUFF_RC 44 254 struct decode_table rd; /* repeating distances */ 255 #define HUFF_TABLE_SIZE (HUFF_NC + HUFF_DC + HUFF_RC + HUFF_LDC) 256 257 /* Circular deque for storing filters. */ 258 struct cdeque filters; 259 int64_t last_block_start; /* Used for sanity checking. */ 260 ssize_t last_block_length; /* Used for sanity checking. */ 261 262 /* Distance cache used during lzss uncompression. */ 263 int dist_cache[4]; 264 265 /* Data buffer stack. */ 266 struct data_ready dready[2]; 267 }; 268 269 /* Bit reader state. */ 270 struct bit_reader { 271 int8_t bit_addr; /* Current bit pointer inside current byte. */ 272 int in_addr; /* Current byte pointer. */ 273 }; 274 275 /* RARv5 block header structure. Use bf_* functions to get values from 276 * block_flags_u8 field. I.e. bf_byte_count, etc. */ 277 struct compressed_block_header { 278 /* block_flags_u8 contain fields encoded in little-endian bitfield: 279 * 280 * - table present flag (shr 7, and 1), 281 * - last block flag (shr 6, and 1), 282 * - byte_count (shr 3, and 7), 283 * - bit_size (shr 0, and 7). 284 */ 285 uint8_t block_flags_u8; 286 uint8_t block_cksum; 287 }; 288 289 /* RARv5 main header structure. */ 290 struct main_header { 291 /* Does the archive contain solid streams? */ 292 uint8_t solid : 1; 293 294 /* If this a multi-file archive? */ 295 uint8_t volume : 1; 296 uint8_t endarc : 1; 297 uint8_t notused : 5; 298 299 unsigned int vol_no; 300 }; 301 302 struct generic_header { 303 uint8_t split_after : 1; 304 uint8_t split_before : 1; 305 uint8_t padding : 6; 306 int size; 307 int last_header_id; 308 }; 309 310 struct multivolume { 311 unsigned int expected_vol_no; 312 uint8_t* push_buf; 313 }; 314 315 /* Main context structure. */ 316 struct rar5 { 317 int header_initialized; 318 319 /* Set to 1 if current file is positioned AFTER the magic value 320 * of the archive file. This is used in header reading functions. */ 321 int skipped_magic; 322 323 /* Set to not zero if we're in skip mode (either by calling 324 * rar5_data_skip function or when skipping over solid streams). 325 * Set to 0 when in * extraction mode. This is used during checksum 326 * calculation functions. */ 327 int skip_mode; 328 329 /* Set to not zero if we're in block merging mode (i.e. when switching 330 * to another file in multivolume archive, last block from 1st archive 331 * needs to be merged with 1st block from 2nd archive). This flag 332 * guards against recursive use of the merging function, which doesn't 333 * support recursive calls. */ 334 int merge_mode; 335 336 /* An offset to QuickOpen list. This is not supported by this unpacker, 337 * because we're focusing on streaming interface. QuickOpen is designed 338 * to make things quicker for non-stream interfaces, so it's not our 339 * use case. */ 340 uint64_t qlist_offset; 341 342 /* An offset to additional Recovery data. This is not supported by this 343 * unpacker. Recovery data are additional Reed-Solomon codes that could 344 * be used to calculate bytes that are missing in archive or are 345 * corrupted. */ 346 uint64_t rr_offset; 347 348 /* Various context variables grouped to different structures. */ 349 struct generic_header generic; 350 struct main_header main; 351 struct comp_state cstate; 352 struct file_header file; 353 struct bit_reader bits; 354 struct multivolume vol; 355 356 /* The header of currently processed RARv5 block. Used in main 357 * decompression logic loop. */ 358 struct compressed_block_header last_block_hdr; 359 360 /* 361 * Custom field to denote that this archive contains encrypted entries 362 */ 363 int has_encrypted_entries; 364 int headers_are_encrypted; 365 }; 366 367 /* Forward function declarations. */ 368 369 static void rar5_signature(char *buf); 370 static int verify_global_checksums(struct archive_read* a); 371 static int rar5_read_data_skip(struct archive_read *a); 372 static int push_data_ready(struct archive_read* a, struct rar5* rar, 373 const uint8_t* buf, size_t size, int64_t offset); 374 static void clear_data_ready_stack(struct rar5* rar); 375 376 /* CDE_xxx = Circular Double Ended (Queue) return values. */ 377 enum CDE_RETURN_VALUES { 378 CDE_OK, CDE_ALLOC, CDE_PARAM, CDE_OUT_OF_BOUNDS, 379 }; 380 381 /* Clears the contents of this circular deque. */ 382 static void cdeque_clear(struct cdeque* d) { 383 d->size = 0; 384 d->beg_pos = 0; 385 d->end_pos = 0; 386 } 387 388 /* Creates a new circular deque object. Capacity must be power of 2: 8, 16, 32, 389 * 64, 256, etc. When the user will add another item above current capacity, 390 * the circular deque will overwrite the oldest entry. */ 391 static int cdeque_init(struct cdeque* d, int max_capacity_power_of_2) { 392 if(d == NULL || max_capacity_power_of_2 == 0) 393 return CDE_PARAM; 394 395 d->cap_mask = max_capacity_power_of_2 - 1; 396 d->arr = NULL; 397 398 if((max_capacity_power_of_2 & d->cap_mask) != 0) 399 return CDE_PARAM; 400 401 cdeque_clear(d); 402 d->arr = malloc(sizeof(void*) * max_capacity_power_of_2); 403 404 return d->arr ? CDE_OK : CDE_ALLOC; 405 } 406 407 /* Return the current size (not capacity) of circular deque `d`. */ 408 static size_t cdeque_size(struct cdeque* d) { 409 return d->size; 410 } 411 412 /* Returns the first element of current circular deque. Note that this function 413 * doesn't perform any bounds checking. If you need bounds checking, use 414 * `cdeque_front()` function instead. */ 415 static void cdeque_front_fast(struct cdeque* d, void** value) { 416 *value = (void*) d->arr[d->beg_pos]; 417 } 418 419 /* Returns the first element of current circular deque. This function 420 * performs bounds checking. */ 421 static int cdeque_front(struct cdeque* d, void** value) { 422 if(d->size > 0) { 423 cdeque_front_fast(d, value); 424 return CDE_OK; 425 } else 426 return CDE_OUT_OF_BOUNDS; 427 } 428 429 /* Pushes a new element into the end of this circular deque object. If current 430 * size will exceed capacity, the oldest element will be overwritten. */ 431 static int cdeque_push_back(struct cdeque* d, void* item) { 432 if(d == NULL) 433 return CDE_PARAM; 434 435 if(d->size == d->cap_mask + 1) 436 return CDE_OUT_OF_BOUNDS; 437 438 d->arr[d->end_pos] = (size_t) item; 439 d->end_pos = (d->end_pos + 1) & d->cap_mask; 440 d->size++; 441 442 return CDE_OK; 443 } 444 445 /* Pops a front element of this circular deque object and returns its value. 446 * This function doesn't perform any bounds checking. */ 447 static void cdeque_pop_front_fast(struct cdeque* d, void** value) { 448 *value = (void*) d->arr[d->beg_pos]; 449 d->beg_pos = (d->beg_pos + 1) & d->cap_mask; 450 d->size--; 451 } 452 453 /* Pops a front element of this circular deque object and returns its value. 454 * This function performs bounds checking. */ 455 static int cdeque_pop_front(struct cdeque* d, void** value) { 456 if(!d || !value) 457 return CDE_PARAM; 458 459 if(d->size == 0) 460 return CDE_OUT_OF_BOUNDS; 461 462 cdeque_pop_front_fast(d, value); 463 return CDE_OK; 464 } 465 466 /* Convenience function to cast filter_info** to void **. */ 467 static void** cdeque_filter_p(struct filter_info** f) { 468 return (void**) (size_t) f; 469 } 470 471 /* Convenience function to cast filter_info* to void *. */ 472 static void* cdeque_filter(struct filter_info* f) { 473 return (void**) (size_t) f; 474 } 475 476 /* Destroys this circular deque object. Deallocates the memory of the 477 * collection buffer, but doesn't deallocate the memory of any pointer passed 478 * to this deque as a value. */ 479 static void cdeque_free(struct cdeque* d) { 480 if(!d) 481 return; 482 483 if(!d->arr) 484 return; 485 486 free(d->arr); 487 488 d->arr = NULL; 489 d->beg_pos = -1; 490 d->end_pos = -1; 491 d->cap_mask = 0; 492 } 493 494 static inline 495 uint8_t bf_bit_size(const struct compressed_block_header* hdr) { 496 return hdr->block_flags_u8 & 7; 497 } 498 499 static inline 500 uint8_t bf_byte_count(const struct compressed_block_header* hdr) { 501 return (hdr->block_flags_u8 >> 3) & 7; 502 } 503 504 static inline 505 uint8_t bf_is_table_present(const struct compressed_block_header* hdr) { 506 return (hdr->block_flags_u8 >> 7) & 1; 507 } 508 509 static inline 510 uint8_t bf_is_last_block(const struct compressed_block_header* hdr) { 511 return (hdr->block_flags_u8 >> 6) & 1; 512 } 513 514 static inline struct rar5* get_context(struct archive_read* a) { 515 return (struct rar5*) a->format->data; 516 } 517 518 /* Convenience functions used by filter implementations. */ 519 static void circular_memcpy(uint8_t* dst, uint8_t* window, const ssize_t mask, 520 int64_t start, int64_t end) 521 { 522 if((start & mask) > (end & mask)) { 523 ssize_t len1 = mask + 1 - (start & mask); 524 ssize_t len2 = end & mask; 525 526 memcpy(dst, &window[start & mask], len1); 527 memcpy(dst + len1, window, len2); 528 } else { 529 memcpy(dst, &window[start & mask], (size_t) (end - start)); 530 } 531 } 532 533 static uint32_t read_filter_data(struct rar5* rar, uint32_t offset) { 534 uint8_t linear_buf[4]; 535 circular_memcpy(linear_buf, rar->cstate.window_buf, 536 rar->cstate.window_mask, offset, offset + 4); 537 return archive_le32dec(linear_buf); 538 } 539 540 static void write_filter_data(struct rar5* rar, uint32_t offset, 541 uint32_t value) 542 { 543 archive_le32enc(&rar->cstate.filtered_buf[offset], value); 544 } 545 546 /* Allocates a new filter descriptor and adds it to the filter array. */ 547 static struct filter_info* add_new_filter(struct rar5* rar) { 548 struct filter_info* f = calloc(1, sizeof(*f)); 549 550 if(!f) { 551 return NULL; 552 } 553 554 cdeque_push_back(&rar->cstate.filters, cdeque_filter(f)); 555 return f; 556 } 557 558 static int run_delta_filter(struct rar5* rar, struct filter_info* flt) { 559 int i; 560 ssize_t dest_pos, src_pos = 0; 561 562 for(i = 0; i < flt->channels; i++) { 563 uint8_t prev_byte = 0; 564 for(dest_pos = i; 565 dest_pos < flt->block_length; 566 dest_pos += flt->channels) 567 { 568 uint8_t byte; 569 570 byte = rar->cstate.window_buf[ 571 (rar->cstate.solid_offset + flt->block_start + 572 src_pos) & rar->cstate.window_mask]; 573 574 prev_byte -= byte; 575 rar->cstate.filtered_buf[dest_pos] = prev_byte; 576 src_pos++; 577 } 578 } 579 580 return ARCHIVE_OK; 581 } 582 583 static int run_e8e9_filter(struct rar5* rar, struct filter_info* flt, 584 int extended) 585 { 586 const uint32_t file_size = 0x1000000; 587 ssize_t i; 588 589 circular_memcpy(rar->cstate.filtered_buf, 590 rar->cstate.window_buf, rar->cstate.window_mask, 591 rar->cstate.solid_offset + flt->block_start, 592 rar->cstate.solid_offset + flt->block_start + flt->block_length); 593 594 for(i = 0; i < flt->block_length - 4;) { 595 uint8_t b = rar->cstate.window_buf[ 596 (rar->cstate.solid_offset + flt->block_start + 597 i++) & rar->cstate.window_mask]; 598 599 /* 600 * 0xE8 = x86's call <relative_addr_uint32> (function call) 601 * 0xE9 = x86's jmp <relative_addr_uint32> (unconditional jump) 602 */ 603 if(b == 0xE8 || (extended && b == 0xE9)) { 604 605 uint32_t addr; 606 uint32_t offset = (i + flt->block_start) % file_size; 607 608 addr = read_filter_data(rar, 609 (uint32_t)(rar->cstate.solid_offset + 610 flt->block_start + i) & rar->cstate.window_mask); 611 612 if(addr & 0x80000000) { 613 if(((addr + offset) & 0x80000000) == 0) { 614 write_filter_data(rar, (uint32_t)i, 615 addr + file_size); 616 } 617 } else { 618 if((addr - file_size) & 0x80000000) { 619 uint32_t naddr = addr - offset; 620 write_filter_data(rar, (uint32_t)i, 621 naddr); 622 } 623 } 624 625 i += 4; 626 } 627 } 628 629 return ARCHIVE_OK; 630 } 631 632 static int run_arm_filter(struct rar5* rar, struct filter_info* flt) { 633 ssize_t i = 0; 634 uint32_t offset; 635 636 circular_memcpy(rar->cstate.filtered_buf, 637 rar->cstate.window_buf, rar->cstate.window_mask, 638 rar->cstate.solid_offset + flt->block_start, 639 rar->cstate.solid_offset + flt->block_start + flt->block_length); 640 641 for(i = 0; i < flt->block_length - 3; i += 4) { 642 uint8_t* b = &rar->cstate.window_buf[ 643 (rar->cstate.solid_offset + 644 flt->block_start + i + 3) & rar->cstate.window_mask]; 645 646 if(*b == 0xEB) { 647 /* 0xEB = ARM's BL (branch + link) instruction. */ 648 offset = read_filter_data(rar, 649 (rar->cstate.solid_offset + flt->block_start + i) & 650 (uint32_t)rar->cstate.window_mask) & 0x00ffffff; 651 652 offset -= (uint32_t) ((i + flt->block_start) / 4); 653 offset = (offset & 0x00ffffff) | 0xeb000000; 654 write_filter_data(rar, (uint32_t)i, offset); 655 } 656 } 657 658 return ARCHIVE_OK; 659 } 660 661 static int run_filter(struct archive_read* a, struct filter_info* flt) { 662 int ret; 663 struct rar5* rar = get_context(a); 664 665 clear_data_ready_stack(rar); 666 free(rar->cstate.filtered_buf); 667 668 rar->cstate.filtered_buf = malloc(flt->block_length); 669 if(!rar->cstate.filtered_buf) { 670 archive_set_error(&a->archive, ENOMEM, 671 "Can't allocate memory for filter data."); 672 return ARCHIVE_FATAL; 673 } 674 675 switch(flt->type) { 676 case FILTER_DELTA: 677 ret = run_delta_filter(rar, flt); 678 break; 679 680 case FILTER_E8: 681 /* fallthrough */ 682 case FILTER_E8E9: 683 ret = run_e8e9_filter(rar, flt, 684 flt->type == FILTER_E8E9); 685 break; 686 687 case FILTER_ARM: 688 ret = run_arm_filter(rar, flt); 689 break; 690 691 default: 692 archive_set_error(&a->archive, 693 ARCHIVE_ERRNO_FILE_FORMAT, 694 "Unsupported filter type: 0x%x", flt->type); 695 return ARCHIVE_FATAL; 696 } 697 698 if(ret != ARCHIVE_OK) { 699 /* Filter has failed. */ 700 return ret; 701 } 702 703 if(ARCHIVE_OK != push_data_ready(a, rar, rar->cstate.filtered_buf, 704 flt->block_length, rar->cstate.last_write_ptr)) 705 { 706 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 707 "Stack overflow when submitting unpacked data"); 708 709 return ARCHIVE_FATAL; 710 } 711 712 rar->cstate.last_write_ptr += flt->block_length; 713 return ARCHIVE_OK; 714 } 715 716 /* The `push_data` function submits the selected data range to the user. 717 * Next call of `use_data` will use the pointer, size and offset arguments 718 * that are specified here. These arguments are pushed to the FIFO stack here, 719 * and popped from the stack by the `use_data` function. */ 720 static void push_data(struct archive_read* a, struct rar5* rar, 721 const uint8_t* buf, int64_t idx_begin, int64_t idx_end) 722 { 723 const ssize_t wmask = rar->cstate.window_mask; 724 const ssize_t solid_write_ptr = (rar->cstate.solid_offset + 725 rar->cstate.last_write_ptr) & wmask; 726 727 idx_begin += rar->cstate.solid_offset; 728 idx_end += rar->cstate.solid_offset; 729 730 /* Check if our unpacked data is wrapped inside the window circular 731 * buffer. If it's not wrapped, it can be copied out by using 732 * a single memcpy, but when it's wrapped, we need to copy the first 733 * part with one memcpy, and the second part with another memcpy. */ 734 735 if((idx_begin & wmask) > (idx_end & wmask)) { 736 /* The data is wrapped (begin offset sis bigger than end 737 * offset). */ 738 const ssize_t frag1_size = rar->cstate.window_size - 739 (idx_begin & wmask); 740 const ssize_t frag2_size = idx_end & wmask; 741 742 /* Copy the first part of the buffer first. */ 743 push_data_ready(a, rar, buf + solid_write_ptr, frag1_size, 744 rar->cstate.last_write_ptr); 745 746 /* Copy the second part of the buffer. */ 747 push_data_ready(a, rar, buf, frag2_size, 748 rar->cstate.last_write_ptr + frag1_size); 749 750 rar->cstate.last_write_ptr += frag1_size + frag2_size; 751 } else { 752 /* Data is not wrapped, so we can just use one call to copy the 753 * data. */ 754 push_data_ready(a, rar, 755 buf + solid_write_ptr, (idx_end - idx_begin) & wmask, 756 rar->cstate.last_write_ptr); 757 758 rar->cstate.last_write_ptr += idx_end - idx_begin; 759 } 760 } 761 762 /* Convenience function that submits the data to the user. It uses the 763 * unpack window buffer as a source location. */ 764 static void push_window_data(struct archive_read* a, struct rar5* rar, 765 int64_t idx_begin, int64_t idx_end) 766 { 767 push_data(a, rar, rar->cstate.window_buf, idx_begin, idx_end); 768 } 769 770 static int apply_filters(struct archive_read* a) { 771 struct filter_info* flt; 772 struct rar5* rar = get_context(a); 773 int ret; 774 775 rar->cstate.all_filters_applied = 0; 776 777 /* Get the first filter that can be applied to our data. The data 778 * needs to be fully unpacked before the filter can be run. */ 779 if(CDE_OK == cdeque_front(&rar->cstate.filters, 780 cdeque_filter_p(&flt))) { 781 /* Check if our unpacked data fully covers this filter's 782 * range. */ 783 if(rar->cstate.write_ptr > flt->block_start && 784 rar->cstate.write_ptr >= flt->block_start + 785 flt->block_length) { 786 /* Check if we have some data pending to be written 787 * right before the filter's start offset. */ 788 if(rar->cstate.last_write_ptr == flt->block_start) { 789 /* Run the filter specified by descriptor 790 * `flt`. */ 791 ret = run_filter(a, flt); 792 if(ret != ARCHIVE_OK) { 793 /* Filter failure, return error. */ 794 return ret; 795 } 796 797 /* Filter descriptor won't be needed anymore 798 * after it's used, * so remove it from the 799 * filter list and free its memory. */ 800 (void) cdeque_pop_front(&rar->cstate.filters, 801 cdeque_filter_p(&flt)); 802 803 free(flt); 804 } else { 805 /* We can't run filters yet, dump the memory 806 * right before the filter. */ 807 push_window_data(a, rar, 808 rar->cstate.last_write_ptr, 809 flt->block_start); 810 } 811 812 /* Return 'filter applied or not needed' state to the 813 * caller. */ 814 return ARCHIVE_RETRY; 815 } 816 } 817 818 rar->cstate.all_filters_applied = 1; 819 return ARCHIVE_OK; 820 } 821 822 static void dist_cache_push(struct rar5* rar, int value) { 823 int* q = rar->cstate.dist_cache; 824 825 q[3] = q[2]; 826 q[2] = q[1]; 827 q[1] = q[0]; 828 q[0] = value; 829 } 830 831 static int dist_cache_touch(struct rar5* rar, int idx) { 832 int* q = rar->cstate.dist_cache; 833 int i, dist = q[idx]; 834 835 for(i = idx; i > 0; i--) 836 q[i] = q[i - 1]; 837 838 q[0] = dist; 839 return dist; 840 } 841 842 static void free_filters(struct rar5* rar) { 843 struct cdeque* d = &rar->cstate.filters; 844 845 /* Free any remaining filters. All filters should be naturally 846 * consumed by the unpacking function, so remaining filters after 847 * unpacking normally mean that unpacking wasn't successful. 848 * But still of course we shouldn't leak memory in such case. */ 849 850 /* cdeque_size() is a fast operation, so we can use it as a loop 851 * expression. */ 852 while(cdeque_size(d) > 0) { 853 struct filter_info* f = NULL; 854 855 /* Pop_front will also decrease the collection's size. */ 856 if (CDE_OK == cdeque_pop_front(d, cdeque_filter_p(&f))) 857 free(f); 858 } 859 860 cdeque_clear(d); 861 862 /* Also clear out the variables needed for sanity checking. */ 863 rar->cstate.last_block_start = 0; 864 rar->cstate.last_block_length = 0; 865 } 866 867 static void reset_file_context(struct rar5* rar) { 868 memset(&rar->file, 0, sizeof(rar->file)); 869 blake2sp_init(&rar->file.b2state, 32); 870 871 if(rar->main.solid) { 872 rar->cstate.solid_offset += rar->cstate.write_ptr; 873 } else { 874 rar->cstate.solid_offset = 0; 875 } 876 877 rar->cstate.write_ptr = 0; 878 rar->cstate.last_write_ptr = 0; 879 rar->cstate.last_unstore_ptr = 0; 880 881 rar->file.redir_type = REDIR_TYPE_NONE; 882 rar->file.redir_flags = 0; 883 884 free_filters(rar); 885 } 886 887 static inline int get_archive_read(struct archive* a, 888 struct archive_read** ar) 889 { 890 *ar = (struct archive_read*) a; 891 archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, 892 "archive_read_support_format_rar5"); 893 894 return ARCHIVE_OK; 895 } 896 897 static int read_ahead(struct archive_read* a, size_t how_many, 898 const uint8_t** ptr) 899 { 900 ssize_t avail = -1; 901 if(!ptr) 902 return 0; 903 904 *ptr = __archive_read_ahead(a, how_many, &avail); 905 if(*ptr == NULL) { 906 return 0; 907 } 908 909 return 1; 910 } 911 912 static int consume(struct archive_read* a, int64_t how_many) { 913 int ret; 914 915 ret = how_many == __archive_read_consume(a, how_many) 916 ? ARCHIVE_OK 917 : ARCHIVE_FATAL; 918 919 return ret; 920 } 921 922 /** 923 * Read a RAR5 variable sized numeric value. This value will be stored in 924 * `pvalue`. The `pvalue_len` argument points to a variable that will receive 925 * the byte count that was consumed in order to decode the `pvalue` value, plus 926 * one. 927 * 928 * pvalue_len is optional and can be NULL. 929 * 930 * NOTE: if `pvalue_len` is NOT NULL, the caller needs to manually consume 931 * the number of bytes that `pvalue_len` value contains. If the `pvalue_len` 932 * is NULL, this consuming operation is done automatically. 933 * 934 * Returns 1 if *pvalue was successfully read. 935 * Returns 0 if there was an error. In this case, *pvalue contains an 936 * invalid value. 937 */ 938 939 static int read_var(struct archive_read* a, uint64_t* pvalue, 940 uint64_t* pvalue_len) 941 { 942 uint64_t result = 0; 943 size_t shift, i; 944 const uint8_t* p; 945 uint8_t b; 946 947 /* We will read maximum of 8 bytes. We don't have to handle the 948 * situation to read the RAR5 variable-sized value stored at the end of 949 * the file, because such situation will never happen. */ 950 if(!read_ahead(a, 8, &p)) 951 return 0; 952 953 for(shift = 0, i = 0; i < 8; i++, shift += 7) { 954 b = p[i]; 955 956 /* Strip the MSB from the input byte and add the resulting 957 * number to the `result`. */ 958 result += (b & (uint64_t)0x7F) << shift; 959 960 /* MSB set to 1 means we need to continue decoding process. 961 * MSB set to 0 means we're done. 962 * 963 * This conditional checks for the second case. */ 964 if((b & 0x80) == 0) { 965 if(pvalue) { 966 *pvalue = result; 967 } 968 969 /* If the caller has passed the `pvalue_len` pointer, 970 * store the number of consumed bytes in it and do NOT 971 * consume those bytes, since the caller has all the 972 * information it needs to perform */ 973 if(pvalue_len) { 974 *pvalue_len = 1 + i; 975 } else { 976 /* If the caller did not provide the 977 * `pvalue_len` pointer, it will not have the 978 * possibility to advance the file pointer, 979 * because it will not know how many bytes it 980 * needs to consume. This is why we handle 981 * such situation here automatically. */ 982 if(ARCHIVE_OK != consume(a, 1 + i)) { 983 return 0; 984 } 985 } 986 987 /* End of decoding process, return success. */ 988 return 1; 989 } 990 } 991 992 /* The decoded value takes the maximum number of 8 bytes. 993 * It's a maximum number of bytes, so end decoding process here 994 * even if the first bit of last byte is 1. */ 995 if(pvalue) { 996 *pvalue = result; 997 } 998 999 if(pvalue_len) { 1000 *pvalue_len = 9; 1001 } else { 1002 if(ARCHIVE_OK != consume(a, 9)) { 1003 return 0; 1004 } 1005 } 1006 1007 return 1; 1008 } 1009 1010 static int read_var_sized(struct archive_read* a, size_t* pvalue, 1011 size_t* pvalue_len) 1012 { 1013 uint64_t v; 1014 uint64_t v_size = 0; 1015 1016 const int ret = pvalue_len ? read_var(a, &v, &v_size) 1017 : read_var(a, &v, NULL); 1018 1019 if(ret == 1 && pvalue) { 1020 *pvalue = (size_t) v; 1021 } 1022 1023 if(pvalue_len) { 1024 /* Possible data truncation should be safe. */ 1025 *pvalue_len = (size_t) v_size; 1026 } 1027 1028 return ret; 1029 } 1030 1031 static int read_bits_32(struct archive_read* a, struct rar5* rar, 1032 const uint8_t* p, uint32_t* value) 1033 { 1034 if(rar->bits.in_addr >= rar->cstate.cur_block_size) { 1035 archive_set_error(&a->archive, 1036 ARCHIVE_ERRNO_PROGRAMMER, 1037 "Premature end of stream during extraction of data (#1)"); 1038 return ARCHIVE_FATAL; 1039 } 1040 1041 uint32_t bits = ((uint32_t) p[rar->bits.in_addr]) << 24; 1042 bits |= p[rar->bits.in_addr + 1] << 16; 1043 bits |= p[rar->bits.in_addr + 2] << 8; 1044 bits |= p[rar->bits.in_addr + 3]; 1045 bits <<= rar->bits.bit_addr; 1046 bits |= p[rar->bits.in_addr + 4] >> (8 - rar->bits.bit_addr); 1047 *value = bits; 1048 return ARCHIVE_OK; 1049 } 1050 1051 static int read_bits_16(struct archive_read* a, struct rar5* rar, 1052 const uint8_t* p, uint16_t* value) 1053 { 1054 if(rar->bits.in_addr >= rar->cstate.cur_block_size) { 1055 archive_set_error(&a->archive, 1056 ARCHIVE_ERRNO_PROGRAMMER, 1057 "Premature end of stream during extraction of data (#2)"); 1058 return ARCHIVE_FATAL; 1059 } 1060 1061 int bits = (int) ((uint32_t) p[rar->bits.in_addr]) << 16; 1062 bits |= (int) p[rar->bits.in_addr + 1] << 8; 1063 bits |= (int) p[rar->bits.in_addr + 2]; 1064 bits >>= (8 - rar->bits.bit_addr); 1065 *value = bits & 0xffff; 1066 return ARCHIVE_OK; 1067 } 1068 1069 static void skip_bits(struct rar5* rar, int bits) { 1070 const int new_bits = rar->bits.bit_addr + bits; 1071 rar->bits.in_addr += new_bits >> 3; 1072 rar->bits.bit_addr = new_bits & 7; 1073 } 1074 1075 /* n = up to 16 */ 1076 static int read_consume_bits(struct archive_read* a, struct rar5* rar, 1077 const uint8_t* p, int n, int* value) 1078 { 1079 uint16_t v; 1080 int ret, num; 1081 1082 if(n == 0 || n > 16) { 1083 /* This is a programmer error and should never happen 1084 * in runtime. */ 1085 return ARCHIVE_FATAL; 1086 } 1087 1088 ret = read_bits_16(a, rar, p, &v); 1089 if(ret != ARCHIVE_OK) 1090 return ret; 1091 1092 num = (int) v; 1093 num >>= 16 - n; 1094 1095 skip_bits(rar, n); 1096 1097 if(value) 1098 *value = num; 1099 1100 return ARCHIVE_OK; 1101 } 1102 1103 static int read_u32(struct archive_read* a, uint32_t* pvalue) { 1104 const uint8_t* p; 1105 if(!read_ahead(a, 4, &p)) 1106 return 0; 1107 1108 *pvalue = archive_le32dec(p); 1109 return ARCHIVE_OK == consume(a, 4) ? 1 : 0; 1110 } 1111 1112 static int read_u64(struct archive_read* a, uint64_t* pvalue) { 1113 const uint8_t* p; 1114 if(!read_ahead(a, 8, &p)) 1115 return 0; 1116 1117 *pvalue = archive_le64dec(p); 1118 return ARCHIVE_OK == consume(a, 8) ? 1 : 0; 1119 } 1120 1121 static int bid_standard(struct archive_read* a) { 1122 const uint8_t* p; 1123 char signature[sizeof(rar5_signature_xor)]; 1124 1125 rar5_signature(signature); 1126 1127 if(!read_ahead(a, sizeof(rar5_signature_xor), &p)) 1128 return -1; 1129 1130 if(!memcmp(signature, p, sizeof(rar5_signature_xor))) 1131 return 30; 1132 1133 return -1; 1134 } 1135 1136 static int bid_sfx(struct archive_read *a) 1137 { 1138 const char *p; 1139 1140 if ((p = __archive_read_ahead(a, 7, NULL)) == NULL) 1141 return -1; 1142 1143 if ((p[0] == 'M' && p[1] == 'Z') || memcmp(p, "\x7F\x45LF", 4) == 0) { 1144 /* This is a PE file */ 1145 char signature[sizeof(rar5_signature_xor)]; 1146 ssize_t offset = 0x10000; 1147 ssize_t window = 4096; 1148 ssize_t bytes_avail; 1149 1150 rar5_signature(signature); 1151 1152 while (offset + window <= (1024 * 512)) { 1153 const char *buff = __archive_read_ahead(a, offset + window, &bytes_avail); 1154 if (buff == NULL) { 1155 /* Remaining bytes are less than window. */ 1156 window >>= 1; 1157 if (window < 0x40) 1158 return 0; 1159 continue; 1160 } 1161 p = buff + offset; 1162 while (p + 8 < buff + bytes_avail) { 1163 if (memcmp(p, signature, sizeof(signature)) == 0) 1164 return 30; 1165 p += 0x10; 1166 } 1167 offset = p - buff; 1168 } 1169 } 1170 1171 return 0; 1172 } 1173 1174 static int rar5_bid(struct archive_read* a, int best_bid) { 1175 int my_bid; 1176 1177 if(best_bid > 30) 1178 return -1; 1179 1180 my_bid = bid_standard(a); 1181 if(my_bid > -1) { 1182 return my_bid; 1183 } 1184 my_bid = bid_sfx(a); 1185 if (my_bid > -1) { 1186 return my_bid; 1187 } 1188 1189 return -1; 1190 } 1191 1192 static int rar5_options(struct archive_read *a, const char *key, 1193 const char *val) { 1194 (void) a; 1195 (void) key; 1196 (void) val; 1197 1198 /* No options supported in this version. Return the ARCHIVE_WARN code 1199 * to signal the options supervisor that the unpacker didn't handle 1200 * setting this option. */ 1201 1202 return ARCHIVE_WARN; 1203 } 1204 1205 static void init_header(struct archive_read* a) { 1206 a->archive.archive_format = ARCHIVE_FORMAT_RAR_V5; 1207 a->archive.archive_format_name = "RAR5"; 1208 } 1209 1210 static void init_window_mask(struct rar5* rar) { 1211 if (rar->cstate.window_size) 1212 rar->cstate.window_mask = rar->cstate.window_size - 1; 1213 else 1214 rar->cstate.window_mask = 0; 1215 } 1216 1217 enum HEADER_FLAGS { 1218 HFL_EXTRA_DATA = 0x0001, 1219 HFL_DATA = 0x0002, 1220 HFL_SKIP_IF_UNKNOWN = 0x0004, 1221 HFL_SPLIT_BEFORE = 0x0008, 1222 HFL_SPLIT_AFTER = 0x0010, 1223 HFL_CHILD = 0x0020, 1224 HFL_INHERITED = 0x0040 1225 }; 1226 1227 static int process_main_locator_extra_block(struct archive_read* a, 1228 struct rar5* rar) 1229 { 1230 uint64_t locator_flags; 1231 1232 enum LOCATOR_FLAGS { 1233 QLIST = 0x01, RECOVERY = 0x02, 1234 }; 1235 1236 if(!read_var(a, &locator_flags, NULL)) { 1237 return ARCHIVE_EOF; 1238 } 1239 1240 if(locator_flags & QLIST) { 1241 if(!read_var(a, &rar->qlist_offset, NULL)) { 1242 return ARCHIVE_EOF; 1243 } 1244 1245 /* qlist is not used */ 1246 } 1247 1248 if(locator_flags & RECOVERY) { 1249 if(!read_var(a, &rar->rr_offset, NULL)) { 1250 return ARCHIVE_EOF; 1251 } 1252 1253 /* rr is not used */ 1254 } 1255 1256 return ARCHIVE_OK; 1257 } 1258 1259 static int parse_file_extra_hash(struct archive_read* a, struct rar5* rar, 1260 int64_t* extra_data_size) 1261 { 1262 size_t hash_type = 0; 1263 size_t value_len; 1264 1265 enum HASH_TYPE { 1266 BLAKE2sp = 0x00 1267 }; 1268 1269 if(!read_var_sized(a, &hash_type, &value_len)) 1270 return ARCHIVE_EOF; 1271 1272 *extra_data_size -= value_len; 1273 if(ARCHIVE_OK != consume(a, value_len)) { 1274 return ARCHIVE_EOF; 1275 } 1276 1277 /* The file uses BLAKE2sp checksum algorithm instead of plain old 1278 * CRC32. */ 1279 if(hash_type == BLAKE2sp) { 1280 const uint8_t* p; 1281 const int hash_size = sizeof(rar->file.blake2sp); 1282 1283 if(!read_ahead(a, hash_size, &p)) 1284 return ARCHIVE_EOF; 1285 1286 rar->file.has_blake2 = 1; 1287 memcpy(&rar->file.blake2sp, p, hash_size); 1288 1289 if(ARCHIVE_OK != consume(a, hash_size)) { 1290 return ARCHIVE_EOF; 1291 } 1292 1293 *extra_data_size -= hash_size; 1294 } else { 1295 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1296 "Unsupported hash type (0x%x)", (int) hash_type); 1297 return ARCHIVE_FATAL; 1298 } 1299 1300 return ARCHIVE_OK; 1301 } 1302 1303 static uint64_t time_win_to_unix(uint64_t win_time) { 1304 const size_t ns_in_sec = 10000000; 1305 const uint64_t sec_to_unix = 11644473600LL; 1306 return win_time / ns_in_sec - sec_to_unix; 1307 } 1308 1309 static int parse_htime_item(struct archive_read* a, char unix_time, 1310 uint64_t* where, int64_t* extra_data_size) 1311 { 1312 if(unix_time) { 1313 uint32_t time_val; 1314 if(!read_u32(a, &time_val)) 1315 return ARCHIVE_EOF; 1316 1317 *extra_data_size -= 4; 1318 *where = (uint64_t) time_val; 1319 } else { 1320 uint64_t windows_time; 1321 if(!read_u64(a, &windows_time)) 1322 return ARCHIVE_EOF; 1323 1324 *where = time_win_to_unix(windows_time); 1325 *extra_data_size -= 8; 1326 } 1327 1328 return ARCHIVE_OK; 1329 } 1330 1331 static int parse_file_extra_version(struct archive_read* a, 1332 struct archive_entry* e, int64_t* extra_data_size) 1333 { 1334 size_t flags = 0; 1335 size_t version = 0; 1336 size_t value_len = 0; 1337 struct archive_string version_string; 1338 struct archive_string name_utf8_string; 1339 const char* cur_filename; 1340 1341 /* Flags are ignored. */ 1342 if(!read_var_sized(a, &flags, &value_len)) 1343 return ARCHIVE_EOF; 1344 1345 *extra_data_size -= value_len; 1346 if(ARCHIVE_OK != consume(a, value_len)) 1347 return ARCHIVE_EOF; 1348 1349 if(!read_var_sized(a, &version, &value_len)) 1350 return ARCHIVE_EOF; 1351 1352 *extra_data_size -= value_len; 1353 if(ARCHIVE_OK != consume(a, value_len)) 1354 return ARCHIVE_EOF; 1355 1356 /* extra_data_size should be zero here. */ 1357 1358 cur_filename = archive_entry_pathname_utf8(e); 1359 if(cur_filename == NULL) { 1360 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 1361 "Version entry without file name"); 1362 return ARCHIVE_FATAL; 1363 } 1364 1365 archive_string_init(&version_string); 1366 archive_string_init(&name_utf8_string); 1367 1368 /* Prepare a ;123 suffix for the filename, where '123' is the version 1369 * value of this file. */ 1370 archive_string_sprintf(&version_string, ";%zu", version); 1371 1372 /* Build the new filename. */ 1373 archive_strcat(&name_utf8_string, cur_filename); 1374 archive_strcat(&name_utf8_string, version_string.s); 1375 1376 /* Apply the new filename into this file's context. */ 1377 archive_entry_update_pathname_utf8(e, name_utf8_string.s); 1378 1379 /* Free buffers. */ 1380 archive_string_free(&version_string); 1381 archive_string_free(&name_utf8_string); 1382 return ARCHIVE_OK; 1383 } 1384 1385 static int parse_file_extra_htime(struct archive_read* a, 1386 struct archive_entry* e, struct rar5* rar, int64_t* extra_data_size) 1387 { 1388 char unix_time = 0; 1389 size_t flags = 0; 1390 size_t value_len; 1391 1392 enum HTIME_FLAGS { 1393 IS_UNIX = 0x01, 1394 HAS_MTIME = 0x02, 1395 HAS_CTIME = 0x04, 1396 HAS_ATIME = 0x08, 1397 HAS_UNIX_NS = 0x10, 1398 }; 1399 1400 if(!read_var_sized(a, &flags, &value_len)) 1401 return ARCHIVE_EOF; 1402 1403 *extra_data_size -= value_len; 1404 if(ARCHIVE_OK != consume(a, value_len)) { 1405 return ARCHIVE_EOF; 1406 } 1407 1408 unix_time = flags & IS_UNIX; 1409 1410 if(flags & HAS_MTIME) { 1411 parse_htime_item(a, unix_time, &rar->file.e_mtime, 1412 extra_data_size); 1413 archive_entry_set_mtime(e, rar->file.e_mtime, 0); 1414 } 1415 1416 if(flags & HAS_CTIME) { 1417 parse_htime_item(a, unix_time, &rar->file.e_ctime, 1418 extra_data_size); 1419 archive_entry_set_ctime(e, rar->file.e_ctime, 0); 1420 } 1421 1422 if(flags & HAS_ATIME) { 1423 parse_htime_item(a, unix_time, &rar->file.e_atime, 1424 extra_data_size); 1425 archive_entry_set_atime(e, rar->file.e_atime, 0); 1426 } 1427 1428 if(flags & HAS_UNIX_NS) { 1429 if(!read_u32(a, &rar->file.e_unix_ns)) 1430 return ARCHIVE_EOF; 1431 1432 *extra_data_size -= 4; 1433 } 1434 1435 return ARCHIVE_OK; 1436 } 1437 1438 static int parse_file_extra_redir(struct archive_read* a, 1439 struct archive_entry* e, struct rar5* rar, int64_t* extra_data_size) 1440 { 1441 uint64_t value_size = 0; 1442 size_t target_size = 0; 1443 char target_utf8_buf[MAX_NAME_IN_BYTES]; 1444 const uint8_t* p; 1445 1446 if(!read_var(a, &rar->file.redir_type, &value_size)) 1447 return ARCHIVE_EOF; 1448 if(ARCHIVE_OK != consume(a, (int64_t)value_size)) 1449 return ARCHIVE_EOF; 1450 *extra_data_size -= value_size; 1451 1452 if(!read_var(a, &rar->file.redir_flags, &value_size)) 1453 return ARCHIVE_EOF; 1454 if(ARCHIVE_OK != consume(a, (int64_t)value_size)) 1455 return ARCHIVE_EOF; 1456 *extra_data_size -= value_size; 1457 1458 if(!read_var_sized(a, &target_size, NULL)) 1459 return ARCHIVE_EOF; 1460 *extra_data_size -= target_size + 1; 1461 1462 if(target_size > (MAX_NAME_IN_CHARS - 1)) { 1463 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1464 "Link target is too long"); 1465 return ARCHIVE_FATAL; 1466 } 1467 1468 if(target_size == 0) { 1469 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1470 "No link target specified"); 1471 return ARCHIVE_FATAL; 1472 } 1473 1474 if(!read_ahead(a, target_size, &p)) 1475 return ARCHIVE_EOF; 1476 1477 memcpy(target_utf8_buf, p, target_size); 1478 target_utf8_buf[target_size] = 0; 1479 1480 if(ARCHIVE_OK != consume(a, (int64_t)target_size)) 1481 return ARCHIVE_EOF; 1482 1483 switch(rar->file.redir_type) { 1484 case REDIR_TYPE_UNIXSYMLINK: 1485 case REDIR_TYPE_WINSYMLINK: 1486 archive_entry_set_filetype(e, AE_IFLNK); 1487 archive_entry_update_symlink_utf8(e, target_utf8_buf); 1488 if (rar->file.redir_flags & REDIR_SYMLINK_IS_DIR) { 1489 archive_entry_set_symlink_type(e, 1490 AE_SYMLINK_TYPE_DIRECTORY); 1491 } else { 1492 archive_entry_set_symlink_type(e, 1493 AE_SYMLINK_TYPE_FILE); 1494 } 1495 break; 1496 1497 case REDIR_TYPE_HARDLINK: 1498 archive_entry_set_filetype(e, AE_IFREG); 1499 archive_entry_update_hardlink_utf8(e, target_utf8_buf); 1500 break; 1501 1502 default: 1503 /* Unknown redir type, skip it. */ 1504 break; 1505 } 1506 return ARCHIVE_OK; 1507 } 1508 1509 static int parse_file_extra_owner(struct archive_read* a, 1510 struct archive_entry* e, int64_t* extra_data_size) 1511 { 1512 uint64_t flags = 0; 1513 uint64_t value_size = 0; 1514 uint64_t id = 0; 1515 size_t name_len = 0; 1516 size_t name_size = 0; 1517 char namebuf[OWNER_MAXNAMELEN]; 1518 const uint8_t* p; 1519 1520 if(!read_var(a, &flags, &value_size)) 1521 return ARCHIVE_EOF; 1522 if(ARCHIVE_OK != consume(a, (int64_t)value_size)) 1523 return ARCHIVE_EOF; 1524 *extra_data_size -= value_size; 1525 1526 if ((flags & OWNER_USER_NAME) != 0) { 1527 if(!read_var_sized(a, &name_size, NULL)) 1528 return ARCHIVE_EOF; 1529 *extra_data_size -= name_size + 1; 1530 1531 if(!read_ahead(a, name_size, &p)) 1532 return ARCHIVE_EOF; 1533 1534 if (name_size >= OWNER_MAXNAMELEN) { 1535 name_len = OWNER_MAXNAMELEN - 1; 1536 } else { 1537 name_len = name_size; 1538 } 1539 1540 memcpy(namebuf, p, name_len); 1541 namebuf[name_len] = 0; 1542 if(ARCHIVE_OK != consume(a, (int64_t)name_size)) 1543 return ARCHIVE_EOF; 1544 1545 archive_entry_set_uname(e, namebuf); 1546 } 1547 if ((flags & OWNER_GROUP_NAME) != 0) { 1548 if(!read_var_sized(a, &name_size, NULL)) 1549 return ARCHIVE_EOF; 1550 *extra_data_size -= name_size + 1; 1551 1552 if(!read_ahead(a, name_size, &p)) 1553 return ARCHIVE_EOF; 1554 1555 if (name_size >= OWNER_MAXNAMELEN) { 1556 name_len = OWNER_MAXNAMELEN - 1; 1557 } else { 1558 name_len = name_size; 1559 } 1560 1561 memcpy(namebuf, p, name_len); 1562 namebuf[name_len] = 0; 1563 if(ARCHIVE_OK != consume(a, (int64_t)name_size)) 1564 return ARCHIVE_EOF; 1565 1566 archive_entry_set_gname(e, namebuf); 1567 } 1568 if ((flags & OWNER_USER_UID) != 0) { 1569 if(!read_var(a, &id, &value_size)) 1570 return ARCHIVE_EOF; 1571 if(ARCHIVE_OK != consume(a, (int64_t)value_size)) 1572 return ARCHIVE_EOF; 1573 *extra_data_size -= value_size; 1574 1575 archive_entry_set_uid(e, (la_int64_t)id); 1576 } 1577 if ((flags & OWNER_GROUP_GID) != 0) { 1578 if(!read_var(a, &id, &value_size)) 1579 return ARCHIVE_EOF; 1580 if(ARCHIVE_OK != consume(a, (int64_t)value_size)) 1581 return ARCHIVE_EOF; 1582 *extra_data_size -= value_size; 1583 1584 archive_entry_set_gid(e, (la_int64_t)id); 1585 } 1586 return ARCHIVE_OK; 1587 } 1588 1589 static int process_head_file_extra(struct archive_read* a, 1590 struct archive_entry* e, struct rar5* rar, int64_t extra_data_size) 1591 { 1592 uint64_t extra_field_size; 1593 uint64_t extra_field_id = 0; 1594 int ret = ARCHIVE_FATAL; 1595 uint64_t var_size; 1596 1597 while(extra_data_size > 0) { 1598 if(!read_var(a, &extra_field_size, &var_size)) 1599 return ARCHIVE_EOF; 1600 1601 extra_data_size -= var_size; 1602 if(ARCHIVE_OK != consume(a, var_size)) { 1603 return ARCHIVE_EOF; 1604 } 1605 1606 if(!read_var(a, &extra_field_id, &var_size)) 1607 return ARCHIVE_EOF; 1608 1609 extra_field_size -= var_size; 1610 extra_data_size -= var_size; 1611 if(ARCHIVE_OK != consume(a, var_size)) { 1612 return ARCHIVE_EOF; 1613 } 1614 1615 switch(extra_field_id) { 1616 case EX_HASH: 1617 ret = parse_file_extra_hash(a, rar, 1618 &extra_data_size); 1619 break; 1620 case EX_HTIME: 1621 ret = parse_file_extra_htime(a, e, rar, 1622 &extra_data_size); 1623 break; 1624 case EX_REDIR: 1625 ret = parse_file_extra_redir(a, e, rar, 1626 &extra_data_size); 1627 break; 1628 case EX_UOWNER: 1629 ret = parse_file_extra_owner(a, e, 1630 &extra_data_size); 1631 break; 1632 case EX_VERSION: 1633 ret = parse_file_extra_version(a, e, 1634 &extra_data_size); 1635 break; 1636 case EX_CRYPT: 1637 /* Mark the entry as encrypted */ 1638 archive_entry_set_is_data_encrypted(e, 1); 1639 rar->has_encrypted_entries = 1; 1640 rar->cstate.data_encrypted = 1; 1641 /* fallthrough */ 1642 case EX_SUBDATA: 1643 /* fallthrough */ 1644 default: 1645 /* Skip unsupported entry. */ 1646 extra_data_size -= extra_field_size; 1647 if (ARCHIVE_OK != consume(a, extra_field_size)) { 1648 return ARCHIVE_EOF; 1649 } 1650 } 1651 } 1652 1653 if(ret != ARCHIVE_OK) { 1654 /* Attribute not implemented. */ 1655 return ret; 1656 } 1657 1658 return ARCHIVE_OK; 1659 } 1660 1661 static int process_head_file(struct archive_read* a, struct rar5* rar, 1662 struct archive_entry* entry, size_t block_flags) 1663 { 1664 int64_t extra_data_size = 0; 1665 size_t data_size = 0; 1666 size_t file_flags = 0; 1667 size_t file_attr = 0; 1668 size_t compression_info = 0; 1669 size_t host_os = 0; 1670 size_t name_size = 0; 1671 uint64_t unpacked_size, window_size; 1672 uint32_t mtime = 0, crc = 0; 1673 int c_method = 0, c_version = 0; 1674 char name_utf8_buf[MAX_NAME_IN_BYTES]; 1675 const uint8_t* p; 1676 1677 enum FILE_FLAGS { 1678 DIRECTORY = 0x0001, UTIME = 0x0002, CRC32 = 0x0004, 1679 UNKNOWN_UNPACKED_SIZE = 0x0008, 1680 }; 1681 1682 enum FILE_ATTRS { 1683 ATTR_READONLY = 0x1, ATTR_HIDDEN = 0x2, ATTR_SYSTEM = 0x4, 1684 ATTR_DIRECTORY = 0x10, 1685 }; 1686 1687 enum COMP_INFO_FLAGS { 1688 SOLID = 0x0040, 1689 }; 1690 1691 enum HOST_OS { 1692 HOST_WINDOWS = 0, 1693 HOST_UNIX = 1, 1694 }; 1695 1696 archive_entry_clear(entry); 1697 1698 /* Do not reset file context if we're switching archives. */ 1699 if(!rar->cstate.switch_multivolume) { 1700 reset_file_context(rar); 1701 } 1702 1703 if(block_flags & HFL_EXTRA_DATA) { 1704 uint64_t edata_size = 0; 1705 if(!read_var(a, &edata_size, NULL)) 1706 return ARCHIVE_EOF; 1707 1708 /* Intentional type cast from unsigned to signed. */ 1709 extra_data_size = (int64_t) edata_size; 1710 } 1711 1712 if(block_flags & HFL_DATA) { 1713 if(!read_var_sized(a, &data_size, NULL)) 1714 return ARCHIVE_EOF; 1715 1716 rar->file.bytes_remaining = data_size; 1717 } else { 1718 rar->file.bytes_remaining = 0; 1719 1720 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1721 "no data found in file/service block"); 1722 return ARCHIVE_FATAL; 1723 } 1724 1725 if(!read_var_sized(a, &file_flags, NULL)) 1726 return ARCHIVE_EOF; 1727 1728 if(!read_var(a, &unpacked_size, NULL)) 1729 return ARCHIVE_EOF; 1730 1731 if(file_flags & UNKNOWN_UNPACKED_SIZE) { 1732 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 1733 "Files with unknown unpacked size are not supported"); 1734 return ARCHIVE_FATAL; 1735 } 1736 1737 rar->file.dir = (uint8_t) ((file_flags & DIRECTORY) > 0); 1738 1739 if(!read_var_sized(a, &file_attr, NULL)) 1740 return ARCHIVE_EOF; 1741 1742 if(file_flags & UTIME) { 1743 if(!read_u32(a, &mtime)) 1744 return ARCHIVE_EOF; 1745 } 1746 1747 if(file_flags & CRC32) { 1748 if(!read_u32(a, &crc)) 1749 return ARCHIVE_EOF; 1750 } 1751 1752 if(!read_var_sized(a, &compression_info, NULL)) 1753 return ARCHIVE_EOF; 1754 1755 c_method = (int) (compression_info >> 7) & 0x7; 1756 c_version = (int) (compression_info & 0x3f); 1757 1758 /* RAR5 seems to limit the dictionary size to 64MB. */ 1759 window_size = (rar->file.dir > 0) ? 1760 0 : 1761 g_unpack_window_size << ((compression_info >> 10) & 15); 1762 rar->cstate.method = c_method; 1763 rar->cstate.version = c_version + 50; 1764 rar->file.solid = (compression_info & SOLID) > 0; 1765 1766 /* Archives which declare solid files without initializing the window 1767 * buffer first are invalid, unless previous data was encrypted, in 1768 * which case we may never have had the chance */ 1769 1770 if(rar->file.solid > 0 && rar->cstate.data_encrypted == 0 && 1771 rar->cstate.window_buf == NULL) { 1772 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1773 "Declared solid file, but no window buffer " 1774 "initialized yet."); 1775 return ARCHIVE_FATAL; 1776 } 1777 1778 /* Check if window_size is a sane value. Also, if the file is not 1779 * declared as a directory, disallow window_size == 0. */ 1780 if(window_size > (64 * 1024 * 1024) || 1781 (rar->file.dir == 0 && window_size == 0)) 1782 { 1783 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1784 "Declared dictionary size is not supported."); 1785 return ARCHIVE_FATAL; 1786 } 1787 1788 if(rar->file.solid > 0) { 1789 /* Re-check if current window size is the same as previous 1790 * window size (for solid files only). */ 1791 if(rar->file.solid_window_size > 0 && 1792 rar->file.solid_window_size != (ssize_t) window_size) 1793 { 1794 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1795 "Window size for this solid file doesn't match " 1796 "the window size used in previous solid file. "); 1797 return ARCHIVE_FATAL; 1798 } 1799 } 1800 else 1801 rar->cstate.data_encrypted = 0; /* Reset for new buffer */ 1802 1803 if(rar->cstate.window_size < (ssize_t) window_size && 1804 rar->cstate.window_buf) 1805 { 1806 /* The `data_ready` stack contains pointers to the `window_buf` or 1807 * `filtered_buf` buffers. Since we're about to reallocate the first 1808 * buffer, some of those pointers could become invalid. Therefore, we 1809 * need to dispose of all entries from the stack before attempting the 1810 * realloc. */ 1811 clear_data_ready_stack(rar); 1812 1813 /* If window_buf has been allocated before, reallocate it, so 1814 * that its size will match new window_size. */ 1815 1816 uint8_t* new_window_buf = 1817 realloc(rar->cstate.window_buf, (size_t) window_size); 1818 1819 if(!new_window_buf) { 1820 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 1821 "Not enough memory when trying to realloc the window " 1822 "buffer."); 1823 return ARCHIVE_FATAL; 1824 } 1825 1826 rar->cstate.window_buf = new_window_buf; 1827 } 1828 1829 /* Values up to 64M should fit into ssize_t on every 1830 * architecture. */ 1831 rar->cstate.window_size = (ssize_t) window_size; 1832 1833 if(rar->file.solid > 0 && rar->file.solid_window_size == 0) { 1834 /* Solid files have to have the same window_size across 1835 whole archive. Remember the window_size parameter 1836 for first solid file found. */ 1837 rar->file.solid_window_size = rar->cstate.window_size; 1838 } 1839 1840 init_window_mask(rar); 1841 1842 rar->file.service = 0; 1843 1844 if(!read_var_sized(a, &host_os, NULL)) 1845 return ARCHIVE_EOF; 1846 1847 if(host_os == HOST_WINDOWS) { 1848 /* Host OS is Windows */ 1849 1850 __LA_MODE_T mode; 1851 1852 if(file_attr & ATTR_DIRECTORY) { 1853 if (file_attr & ATTR_READONLY) { 1854 mode = 0555 | AE_IFDIR; 1855 } else { 1856 mode = 0755 | AE_IFDIR; 1857 } 1858 } else { 1859 if (file_attr & ATTR_READONLY) { 1860 mode = 0444 | AE_IFREG; 1861 } else { 1862 mode = 0644 | AE_IFREG; 1863 } 1864 } 1865 1866 archive_entry_set_mode(entry, mode); 1867 1868 if (file_attr & (ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM)) { 1869 char *fflags_text, *ptr; 1870 /* allocate for ",rdonly,hidden,system" */ 1871 fflags_text = malloc(22 * sizeof(*fflags_text)); 1872 if (fflags_text != NULL) { 1873 ptr = fflags_text; 1874 if (file_attr & ATTR_READONLY) { 1875 strcpy(ptr, ",rdonly"); 1876 ptr = ptr + 7; 1877 } 1878 if (file_attr & ATTR_HIDDEN) { 1879 strcpy(ptr, ",hidden"); 1880 ptr = ptr + 7; 1881 } 1882 if (file_attr & ATTR_SYSTEM) { 1883 strcpy(ptr, ",system"); 1884 ptr = ptr + 7; 1885 } 1886 if (ptr > fflags_text) { 1887 archive_entry_copy_fflags_text(entry, 1888 fflags_text + 1); 1889 } 1890 free(fflags_text); 1891 } 1892 } 1893 } else if(host_os == HOST_UNIX) { 1894 /* Host OS is Unix */ 1895 archive_entry_set_mode(entry, (__LA_MODE_T) file_attr); 1896 } else { 1897 /* Unknown host OS */ 1898 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1899 "Unsupported Host OS: 0x%x", (int) host_os); 1900 1901 return ARCHIVE_FATAL; 1902 } 1903 1904 if(!read_var_sized(a, &name_size, NULL)) 1905 return ARCHIVE_EOF; 1906 1907 if(name_size > (MAX_NAME_IN_CHARS - 1)) { 1908 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1909 "Filename is too long"); 1910 1911 return ARCHIVE_FATAL; 1912 } 1913 1914 if(name_size == 0) { 1915 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1916 "No filename specified"); 1917 1918 return ARCHIVE_FATAL; 1919 } 1920 1921 if(!read_ahead(a, name_size, &p)) 1922 return ARCHIVE_EOF; 1923 1924 memcpy(name_utf8_buf, p, name_size); 1925 name_utf8_buf[name_size] = 0; 1926 if(ARCHIVE_OK != consume(a, name_size)) { 1927 return ARCHIVE_EOF; 1928 } 1929 1930 archive_entry_update_pathname_utf8(entry, name_utf8_buf); 1931 1932 if(extra_data_size > 0) { 1933 int ret = process_head_file_extra(a, entry, rar, 1934 extra_data_size); 1935 1936 /* 1937 * TODO: rewrite or remove useless sanity check 1938 * as extra_data_size is not passed as a pointer 1939 * 1940 if(extra_data_size < 0) { 1941 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 1942 "File extra data size is not zero"); 1943 return ARCHIVE_FATAL; 1944 } 1945 */ 1946 1947 if(ret != ARCHIVE_OK) 1948 return ret; 1949 } 1950 1951 if((file_flags & UNKNOWN_UNPACKED_SIZE) == 0) { 1952 rar->file.unpacked_size = (ssize_t) unpacked_size; 1953 if(rar->file.redir_type == REDIR_TYPE_NONE) 1954 archive_entry_set_size(entry, unpacked_size); 1955 } 1956 1957 if(file_flags & UTIME) { 1958 archive_entry_set_mtime(entry, (time_t) mtime, 0); 1959 } 1960 1961 if(file_flags & CRC32) { 1962 rar->file.stored_crc32 = crc; 1963 } 1964 1965 if(!rar->cstate.switch_multivolume) { 1966 /* Do not reinitialize unpacking state if we're switching 1967 * archives. */ 1968 rar->cstate.block_parsing_finished = 1; 1969 rar->cstate.all_filters_applied = 1; 1970 rar->cstate.initialized = 0; 1971 } 1972 1973 if(rar->generic.split_before > 0) { 1974 /* If now we're standing on a header that has a 'split before' 1975 * mark, it means we're standing on a 'continuation' file 1976 * header. Signal the caller that if it wants to move to 1977 * another file, it must call rar5_read_header() function 1978 * again. */ 1979 1980 return ARCHIVE_RETRY; 1981 } else { 1982 return ARCHIVE_OK; 1983 } 1984 } 1985 1986 static int process_head_service(struct archive_read* a, struct rar5* rar, 1987 struct archive_entry* entry, size_t block_flags) 1988 { 1989 /* Process this SERVICE block the same way as FILE blocks. */ 1990 int ret = process_head_file(a, rar, entry, block_flags); 1991 if(ret != ARCHIVE_OK) 1992 return ret; 1993 1994 rar->file.service = 1; 1995 1996 /* But skip the data part automatically. It's no use for the user 1997 * anyway. It contains only service data, not even needed to 1998 * properly unpack the file. */ 1999 ret = rar5_read_data_skip(a); 2000 if(ret != ARCHIVE_OK) 2001 return ret; 2002 2003 /* After skipping, try parsing another block automatically. */ 2004 return ARCHIVE_RETRY; 2005 } 2006 2007 static int process_head_main(struct archive_read* a, struct rar5* rar, 2008 struct archive_entry* entry, size_t block_flags) 2009 { 2010 int ret; 2011 uint64_t extra_data_size = 0; 2012 size_t extra_field_size = 0; 2013 size_t extra_field_id = 0; 2014 size_t archive_flags = 0; 2015 2016 enum MAIN_FLAGS { 2017 VOLUME = 0x0001, /* multi-volume archive */ 2018 VOLUME_NUMBER = 0x0002, /* volume number, first vol doesn't 2019 * have it */ 2020 SOLID = 0x0004, /* solid archive */ 2021 PROTECT = 0x0008, /* contains Recovery info */ 2022 LOCK = 0x0010, /* readonly flag, not used */ 2023 }; 2024 2025 enum MAIN_EXTRA { 2026 // Just one attribute here. 2027 LOCATOR = 0x01, 2028 }; 2029 2030 (void) entry; 2031 2032 if(block_flags & HFL_EXTRA_DATA) { 2033 if(!read_var(a, &extra_data_size, NULL)) 2034 return ARCHIVE_EOF; 2035 } else { 2036 extra_data_size = 0; 2037 } 2038 2039 if(!read_var_sized(a, &archive_flags, NULL)) { 2040 return ARCHIVE_EOF; 2041 } 2042 2043 rar->main.volume = (archive_flags & VOLUME) > 0; 2044 rar->main.solid = (archive_flags & SOLID) > 0; 2045 2046 if(archive_flags & VOLUME_NUMBER) { 2047 size_t v = 0; 2048 if(!read_var_sized(a, &v, NULL)) { 2049 return ARCHIVE_EOF; 2050 } 2051 2052 if (v > UINT_MAX) { 2053 archive_set_error(&a->archive, 2054 ARCHIVE_ERRNO_FILE_FORMAT, 2055 "Invalid volume number"); 2056 return ARCHIVE_FATAL; 2057 } 2058 2059 rar->main.vol_no = (unsigned int) v; 2060 } else { 2061 rar->main.vol_no = 0; 2062 } 2063 2064 if(rar->vol.expected_vol_no > 0 && 2065 rar->main.vol_no != rar->vol.expected_vol_no) 2066 { 2067 /* Returning EOF instead of FATAL because of strange 2068 * libarchive behavior. When opening multiple files via 2069 * archive_read_open_filenames(), after reading up the whole 2070 * last file, the __archive_read_ahead function wraps up to 2071 * the first archive instead of returning EOF. */ 2072 return ARCHIVE_EOF; 2073 } 2074 2075 if(extra_data_size == 0) { 2076 /* Early return. */ 2077 return ARCHIVE_OK; 2078 } 2079 2080 if(!read_var_sized(a, &extra_field_size, NULL)) { 2081 return ARCHIVE_EOF; 2082 } 2083 2084 if(!read_var_sized(a, &extra_field_id, NULL)) { 2085 return ARCHIVE_EOF; 2086 } 2087 2088 if(extra_field_size == 0) { 2089 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2090 "Invalid extra field size"); 2091 return ARCHIVE_FATAL; 2092 } 2093 2094 switch(extra_field_id) { 2095 case LOCATOR: 2096 ret = process_main_locator_extra_block(a, rar); 2097 if(ret != ARCHIVE_OK) { 2098 /* Error while parsing main locator extra 2099 * block. */ 2100 return ret; 2101 } 2102 2103 break; 2104 default: 2105 archive_set_error(&a->archive, 2106 ARCHIVE_ERRNO_FILE_FORMAT, 2107 "Unsupported extra type (0x%x)", 2108 (int) extra_field_id); 2109 return ARCHIVE_FATAL; 2110 } 2111 2112 return ARCHIVE_OK; 2113 } 2114 2115 static int skip_unprocessed_bytes(struct archive_read* a) { 2116 struct rar5* rar = get_context(a); 2117 int ret; 2118 2119 if(rar->file.bytes_remaining) { 2120 /* Use different skipping method in block merging mode than in 2121 * normal mode. If merge mode is active, rar5_read_data_skip 2122 * can't be used, because it could allow recursive use of 2123 * merge_block() * function, and this function doesn't support 2124 * recursive use. */ 2125 if(rar->merge_mode) { 2126 /* Discard whole merged block. This is valid in solid 2127 * mode as well, because the code will discard blocks 2128 * only if those blocks are safe to discard (i.e. 2129 * they're not FILE blocks). */ 2130 ret = consume(a, rar->file.bytes_remaining); 2131 if(ret != ARCHIVE_OK) { 2132 return ret; 2133 } 2134 rar->file.bytes_remaining = 0; 2135 } else { 2136 /* If we're not in merge mode, use safe skipping code. 2137 * This will ensure we'll handle solid archives 2138 * properly. */ 2139 ret = rar5_read_data_skip(a); 2140 if(ret != ARCHIVE_OK) { 2141 return ret; 2142 } 2143 } 2144 } 2145 2146 return ARCHIVE_OK; 2147 } 2148 2149 static int scan_for_signature(struct archive_read* a); 2150 2151 /* Base block processing function. A 'base block' is a RARv5 header block 2152 * that tells the reader what kind of data is stored inside the block. 2153 * 2154 * From the birds-eye view a RAR file looks file this: 2155 * 2156 * <magic><base_block_1><base_block_2>...<base_block_n> 2157 * 2158 * There are a few types of base blocks. Those types are specified inside 2159 * the 'switch' statement in this function. For example purposes, I'll write 2160 * how a standard RARv5 file could look like here: 2161 * 2162 * <magic><MAIN><FILE><FILE><FILE><SERVICE><ENDARC> 2163 * 2164 * The structure above could describe an archive file with 3 files in it, 2165 * one service "QuickOpen" block (that is ignored by this parser), and an 2166 * end of file base block marker. 2167 * 2168 * If the file is stored in multiple archive files ("multiarchive"), it might 2169 * look like this: 2170 * 2171 * .part01.rar: <magic><MAIN><FILE><ENDARC> 2172 * .part02.rar: <magic><MAIN><FILE><ENDARC> 2173 * .part03.rar: <magic><MAIN><FILE><ENDARC> 2174 * 2175 * This example could describe 3 RAR files that contain ONE archived file. 2176 * Or it could describe 3 RAR files that contain 3 different files. Or 3 2177 * RAR files than contain 2 files. It all depends what metadata is stored in 2178 * the headers of <FILE> blocks. 2179 * 2180 * Each <FILE> block contains info about its size, the name of the file it's 2181 * storing inside, and whether this FILE block is a continuation block of 2182 * previous archive ('split before'), and is this FILE block should be 2183 * continued in another archive ('split after'). By parsing the 'split before' 2184 * and 'split after' flags, we're able to tell if multiple <FILE> base blocks 2185 * are describing one file, or multiple files (with the same filename, for 2186 * example). 2187 * 2188 * One thing to note is that if we're parsing the first <FILE> block, and 2189 * we see 'split after' flag, then we need to jump over to another <FILE> 2190 * block to be able to decompress rest of the data. To do this, we need 2191 * to skip the <ENDARC> block, then switch to another file, then skip the 2192 * <magic> block, <MAIN> block, and then we're standing on the proper 2193 * <FILE> block. 2194 */ 2195 2196 static int process_base_block(struct archive_read* a, 2197 struct archive_entry* entry) 2198 { 2199 const size_t SMALLEST_RAR5_BLOCK_SIZE = 3; 2200 2201 struct rar5* rar = get_context(a); 2202 uint32_t hdr_crc, computed_crc; 2203 size_t raw_hdr_size = 0, hdr_size_len, hdr_size; 2204 size_t header_id = 0; 2205 size_t header_flags = 0; 2206 const uint8_t* p; 2207 int ret; 2208 2209 enum HEADER_TYPE { 2210 HEAD_MARK = 0x00, HEAD_MAIN = 0x01, HEAD_FILE = 0x02, 2211 HEAD_SERVICE = 0x03, HEAD_CRYPT = 0x04, HEAD_ENDARC = 0x05, 2212 HEAD_UNKNOWN = 0xff, 2213 }; 2214 2215 /* Skip any unprocessed data for this file. */ 2216 ret = skip_unprocessed_bytes(a); 2217 if(ret != ARCHIVE_OK) 2218 return ret; 2219 2220 /* Read the expected CRC32 checksum. */ 2221 if(!read_u32(a, &hdr_crc)) { 2222 return ARCHIVE_EOF; 2223 } 2224 2225 /* Read header size. */ 2226 if(!read_var_sized(a, &raw_hdr_size, &hdr_size_len)) { 2227 return ARCHIVE_EOF; 2228 } 2229 2230 hdr_size = raw_hdr_size + hdr_size_len; 2231 2232 /* Sanity check, maximum header size for RAR5 is 2MB. */ 2233 if(hdr_size > (2 * 1024 * 1024)) { 2234 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2235 "Base block header is too large"); 2236 2237 return ARCHIVE_FATAL; 2238 } 2239 2240 /* Additional sanity checks to weed out invalid files. */ 2241 if(raw_hdr_size == 0 || hdr_size_len == 0 || 2242 hdr_size < SMALLEST_RAR5_BLOCK_SIZE) 2243 { 2244 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2245 "Too small block encountered (%zu bytes)", 2246 raw_hdr_size); 2247 2248 return ARCHIVE_FATAL; 2249 } 2250 2251 /* Read the whole header data into memory, maximum memory use here is 2252 * 2MB. */ 2253 if(!read_ahead(a, hdr_size, &p)) { 2254 return ARCHIVE_EOF; 2255 } 2256 2257 /* Verify the CRC32 of the header data. */ 2258 computed_crc = (uint32_t) crc32(0, p, (int) hdr_size); 2259 if(computed_crc != hdr_crc) { 2260 #ifndef DONT_FAIL_ON_CRC_ERROR 2261 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2262 "Header CRC error"); 2263 2264 return ARCHIVE_FATAL; 2265 #endif 2266 } 2267 2268 /* If the checksum is OK, we proceed with parsing. */ 2269 if(ARCHIVE_OK != consume(a, hdr_size_len)) { 2270 return ARCHIVE_EOF; 2271 } 2272 2273 if(!read_var_sized(a, &header_id, NULL)) 2274 return ARCHIVE_EOF; 2275 2276 if(!read_var_sized(a, &header_flags, NULL)) 2277 return ARCHIVE_EOF; 2278 2279 rar->generic.split_after = (header_flags & HFL_SPLIT_AFTER) > 0; 2280 rar->generic.split_before = (header_flags & HFL_SPLIT_BEFORE) > 0; 2281 rar->generic.size = (int)hdr_size; 2282 rar->generic.last_header_id = (int)header_id; 2283 rar->main.endarc = 0; 2284 2285 /* Those are possible header ids in RARv5. */ 2286 switch(header_id) { 2287 case HEAD_MAIN: 2288 ret = process_head_main(a, rar, entry, header_flags); 2289 2290 /* Main header doesn't have any files in it, so it's 2291 * pointless to return to the caller. Retry to next 2292 * header, which should be HEAD_FILE/HEAD_SERVICE. */ 2293 if(ret == ARCHIVE_OK) 2294 return ARCHIVE_RETRY; 2295 2296 return ret; 2297 case HEAD_SERVICE: 2298 ret = process_head_service(a, rar, entry, header_flags); 2299 return ret; 2300 case HEAD_FILE: 2301 ret = process_head_file(a, rar, entry, header_flags); 2302 return ret; 2303 case HEAD_CRYPT: 2304 archive_entry_set_is_metadata_encrypted(entry, 1); 2305 archive_entry_set_is_data_encrypted(entry, 1); 2306 rar->has_encrypted_entries = 1; 2307 rar->headers_are_encrypted = 1; 2308 archive_set_error(&a->archive, 2309 ARCHIVE_ERRNO_FILE_FORMAT, 2310 "Encryption is not supported"); 2311 return ARCHIVE_FATAL; 2312 case HEAD_ENDARC: 2313 rar->main.endarc = 1; 2314 2315 /* After encountering an end of file marker, we need 2316 * to take into consideration if this archive is 2317 * continued in another file (i.e. is it part01.rar: 2318 * is there a part02.rar?) */ 2319 if(rar->main.volume) { 2320 /* In case there is part02.rar, position the 2321 * read pointer in a proper place, so we can 2322 * resume parsing. */ 2323 ret = scan_for_signature(a); 2324 if(ret == ARCHIVE_FATAL) { 2325 return ARCHIVE_EOF; 2326 } else { 2327 if(rar->vol.expected_vol_no == 2328 UINT_MAX) { 2329 archive_set_error(&a->archive, 2330 ARCHIVE_ERRNO_FILE_FORMAT, 2331 "Header error"); 2332 return ARCHIVE_FATAL; 2333 } 2334 2335 rar->vol.expected_vol_no = 2336 rar->main.vol_no + 1; 2337 return ARCHIVE_OK; 2338 } 2339 } else { 2340 return ARCHIVE_EOF; 2341 } 2342 case HEAD_MARK: 2343 return ARCHIVE_EOF; 2344 default: 2345 if((header_flags & HFL_SKIP_IF_UNKNOWN) == 0) { 2346 archive_set_error(&a->archive, 2347 ARCHIVE_ERRNO_FILE_FORMAT, 2348 "Header type error"); 2349 return ARCHIVE_FATAL; 2350 } else { 2351 /* If the block is marked as 'skip if unknown', 2352 * do as the flag says: skip the block 2353 * instead on failing on it. */ 2354 return ARCHIVE_RETRY; 2355 } 2356 } 2357 2358 #if !defined WIN32 2359 // Not reached. 2360 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 2361 "Internal unpacker error"); 2362 return ARCHIVE_FATAL; 2363 #endif 2364 } 2365 2366 static int skip_base_block(struct archive_read* a) { 2367 int ret; 2368 struct rar5* rar = get_context(a); 2369 2370 /* Create a new local archive_entry structure that will be operated on 2371 * by header reader; operations on this archive_entry will be discarded. 2372 */ 2373 struct archive_entry* entry = archive_entry_new(); 2374 ret = process_base_block(a, entry); 2375 2376 /* Discard operations on this archive_entry structure. */ 2377 archive_entry_free(entry); 2378 if(ret == ARCHIVE_FATAL) 2379 return ret; 2380 2381 if(rar->generic.last_header_id == 2 && rar->generic.split_before > 0) 2382 return ARCHIVE_OK; 2383 2384 if(ret == ARCHIVE_OK) 2385 return ARCHIVE_RETRY; 2386 else 2387 return ret; 2388 } 2389 2390 static int try_skip_sfx(struct archive_read *a) 2391 { 2392 const char *p; 2393 2394 if ((p = __archive_read_ahead(a, 7, NULL)) == NULL) 2395 return ARCHIVE_EOF; 2396 2397 if ((p[0] == 'M' && p[1] == 'Z') || memcmp(p, "\x7F\x45LF", 4) == 0) 2398 { 2399 char signature[sizeof(rar5_signature_xor)]; 2400 const void *h; 2401 const char *q; 2402 size_t skip, total = 0; 2403 ssize_t bytes, window = 4096; 2404 2405 rar5_signature(signature); 2406 2407 while (total + window <= (1024 * 512)) { 2408 h = __archive_read_ahead(a, window, &bytes); 2409 if (h == NULL) { 2410 /* Remaining bytes are less than window. */ 2411 window >>= 1; 2412 if (window < 0x40) 2413 goto fatal; 2414 continue; 2415 } 2416 if (bytes < 0x40) 2417 goto fatal; 2418 p = h; 2419 q = p + bytes; 2420 2421 /* 2422 * Scan ahead until we find something that looks 2423 * like the RAR header. 2424 */ 2425 while (p + 8 < q) { 2426 if (memcmp(p, signature, sizeof(signature)) == 0) { 2427 skip = p - (const char *)h; 2428 __archive_read_consume(a, skip); 2429 return (ARCHIVE_OK); 2430 } 2431 p += 0x10; 2432 } 2433 skip = p - (const char *)h; 2434 __archive_read_consume(a, skip); 2435 total += skip; 2436 } 2437 } 2438 2439 return ARCHIVE_OK; 2440 fatal: 2441 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2442 "Couldn't find out RAR header"); 2443 return (ARCHIVE_FATAL); 2444 } 2445 2446 static int rar5_read_header(struct archive_read *a, 2447 struct archive_entry *entry) 2448 { 2449 struct rar5* rar = get_context(a); 2450 int ret; 2451 2452 /* 2453 * It should be sufficient to call archive_read_next_header() for 2454 * a reader to determine if an entry is encrypted or not. 2455 */ 2456 if (rar->has_encrypted_entries == ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW) { 2457 rar->has_encrypted_entries = 0; 2458 } 2459 2460 if(rar->header_initialized == 0) { 2461 init_header(a); 2462 if ((ret = try_skip_sfx(a)) < ARCHIVE_WARN) 2463 return ret; 2464 rar->header_initialized = 1; 2465 } 2466 2467 if(rar->skipped_magic == 0) { 2468 if(ARCHIVE_OK != consume(a, sizeof(rar5_signature_xor))) { 2469 return ARCHIVE_EOF; 2470 } 2471 2472 rar->skipped_magic = 1; 2473 } 2474 2475 do { 2476 ret = process_base_block(a, entry); 2477 } while(ret == ARCHIVE_RETRY || 2478 (rar->main.endarc > 0 && ret == ARCHIVE_OK)); 2479 2480 return ret; 2481 } 2482 2483 static void init_unpack(struct rar5* rar) { 2484 rar->file.calculated_crc32 = 0; 2485 init_window_mask(rar); 2486 2487 free(rar->cstate.window_buf); 2488 free(rar->cstate.filtered_buf); 2489 2490 if(rar->cstate.window_size > 0) { 2491 rar->cstate.window_buf = calloc(1, rar->cstate.window_size); 2492 rar->cstate.filtered_buf = calloc(1, rar->cstate.window_size); 2493 } else { 2494 rar->cstate.window_buf = NULL; 2495 rar->cstate.filtered_buf = NULL; 2496 } 2497 2498 clear_data_ready_stack(rar); 2499 2500 rar->cstate.write_ptr = 0; 2501 rar->cstate.last_write_ptr = 0; 2502 2503 memset(&rar->cstate.bd, 0, sizeof(rar->cstate.bd)); 2504 memset(&rar->cstate.ld, 0, sizeof(rar->cstate.ld)); 2505 memset(&rar->cstate.dd, 0, sizeof(rar->cstate.dd)); 2506 memset(&rar->cstate.ldd, 0, sizeof(rar->cstate.ldd)); 2507 memset(&rar->cstate.rd, 0, sizeof(rar->cstate.rd)); 2508 } 2509 2510 static void update_crc(struct rar5* rar, const uint8_t* p, size_t to_read) { 2511 int verify_crc; 2512 2513 if(rar->skip_mode) { 2514 #if defined CHECK_CRC_ON_SOLID_SKIP 2515 verify_crc = 1; 2516 #else 2517 verify_crc = 0; 2518 #endif 2519 } else 2520 verify_crc = 1; 2521 2522 if(verify_crc) { 2523 /* Don't update CRC32 if the file doesn't have the 2524 * `stored_crc32` info filled in. */ 2525 if(rar->file.stored_crc32 > 0) { 2526 rar->file.calculated_crc32 = 2527 crc32(rar->file.calculated_crc32, p, (unsigned int)to_read); 2528 } 2529 2530 /* Check if the file uses an optional BLAKE2sp checksum 2531 * algorithm. */ 2532 if(rar->file.has_blake2 > 0) { 2533 /* Return value of the `update` function is always 0, 2534 * so we can explicitly ignore it here. */ 2535 (void) blake2sp_update(&rar->file.b2state, p, to_read); 2536 } 2537 } 2538 } 2539 2540 static int create_decode_tables(uint8_t* bit_length, 2541 struct decode_table* table, int size) 2542 { 2543 int code, upper_limit = 0, i, lc[16]; 2544 uint32_t decode_pos_clone[rar5_countof(table->decode_pos)]; 2545 ssize_t cur_len, quick_data_size; 2546 2547 memset(&lc, 0, sizeof(lc)); 2548 memset(table->decode_num, 0, sizeof(table->decode_num)); 2549 table->size = size; 2550 table->quick_bits = size == HUFF_NC ? 10 : 7; 2551 2552 for(i = 0; i < size; i++) { 2553 lc[bit_length[i] & 15]++; 2554 } 2555 2556 lc[0] = 0; 2557 table->decode_pos[0] = 0; 2558 table->decode_len[0] = 0; 2559 2560 for(i = 1; i < 16; i++) { 2561 upper_limit += lc[i]; 2562 2563 table->decode_len[i] = upper_limit << (16 - i); 2564 table->decode_pos[i] = table->decode_pos[i - 1] + lc[i - 1]; 2565 2566 upper_limit <<= 1; 2567 } 2568 2569 memcpy(decode_pos_clone, table->decode_pos, sizeof(decode_pos_clone)); 2570 2571 for(i = 0; i < size; i++) { 2572 uint8_t clen = bit_length[i] & 15; 2573 if(clen > 0) { 2574 int last_pos = decode_pos_clone[clen]; 2575 table->decode_num[last_pos] = i; 2576 decode_pos_clone[clen]++; 2577 } 2578 } 2579 2580 quick_data_size = (int64_t)1 << table->quick_bits; 2581 cur_len = 1; 2582 for(code = 0; code < quick_data_size; code++) { 2583 int bit_field = code << (16 - table->quick_bits); 2584 int dist, pos; 2585 2586 while(cur_len < rar5_countof(table->decode_len) && 2587 bit_field >= table->decode_len[cur_len]) { 2588 cur_len++; 2589 } 2590 2591 table->quick_len[code] = (uint8_t) cur_len; 2592 2593 dist = bit_field - table->decode_len[cur_len - 1]; 2594 dist >>= (16 - cur_len); 2595 2596 pos = table->decode_pos[cur_len & 15] + dist; 2597 if(cur_len < rar5_countof(table->decode_pos) && pos < size) { 2598 table->quick_num[code] = table->decode_num[pos]; 2599 } else { 2600 table->quick_num[code] = 0; 2601 } 2602 } 2603 2604 return ARCHIVE_OK; 2605 } 2606 2607 static int decode_number(struct archive_read* a, struct decode_table* table, 2608 const uint8_t* p, uint16_t* num) 2609 { 2610 int i, bits, dist, ret; 2611 uint16_t bitfield; 2612 uint32_t pos; 2613 struct rar5* rar = get_context(a); 2614 2615 if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &bitfield))) { 2616 return ret; 2617 } 2618 2619 bitfield &= 0xfffe; 2620 2621 if(bitfield < table->decode_len[table->quick_bits]) { 2622 int code = bitfield >> (16 - table->quick_bits); 2623 skip_bits(rar, table->quick_len[code]); 2624 *num = table->quick_num[code]; 2625 return ARCHIVE_OK; 2626 } 2627 2628 bits = 15; 2629 2630 for(i = table->quick_bits + 1; i < 15; i++) { 2631 if(bitfield < table->decode_len[i]) { 2632 bits = i; 2633 break; 2634 } 2635 } 2636 2637 skip_bits(rar, bits); 2638 2639 dist = bitfield - table->decode_len[bits - 1]; 2640 dist >>= (16 - bits); 2641 pos = table->decode_pos[bits] + dist; 2642 2643 if(pos >= table->size) 2644 pos = 0; 2645 2646 *num = table->decode_num[pos]; 2647 return ARCHIVE_OK; 2648 } 2649 2650 /* Reads and parses Huffman tables from the beginning of the block. */ 2651 static int parse_tables(struct archive_read* a, struct rar5* rar, 2652 const uint8_t* p) 2653 { 2654 int ret, value, i, w, idx = 0; 2655 uint8_t bit_length[HUFF_BC], 2656 table[HUFF_TABLE_SIZE], 2657 nibble_mask = 0xF0, 2658 nibble_shift = 4; 2659 2660 enum { ESCAPE = 15 }; 2661 2662 /* The data for table generation is compressed using a simple RLE-like 2663 * algorithm when storing zeroes, so we need to unpack it first. */ 2664 for(w = 0, i = 0; w < HUFF_BC;) { 2665 if(i >= rar->cstate.cur_block_size) { 2666 /* Truncated data, can't continue. */ 2667 archive_set_error(&a->archive, 2668 ARCHIVE_ERRNO_FILE_FORMAT, 2669 "Truncated data in huffman tables"); 2670 return ARCHIVE_FATAL; 2671 } 2672 2673 value = (p[i] & nibble_mask) >> nibble_shift; 2674 2675 if(nibble_mask == 0x0F) 2676 ++i; 2677 2678 nibble_mask ^= 0xFF; 2679 nibble_shift ^= 4; 2680 2681 /* Values smaller than 15 is data, so we write it directly. 2682 * Value 15 is a flag telling us that we need to unpack more 2683 * bytes. */ 2684 if(value == ESCAPE) { 2685 value = (p[i] & nibble_mask) >> nibble_shift; 2686 if(nibble_mask == 0x0F) 2687 ++i; 2688 nibble_mask ^= 0xFF; 2689 nibble_shift ^= 4; 2690 2691 if(value == 0) { 2692 /* We sometimes need to write the actual value 2693 * of 15, so this case handles that. */ 2694 bit_length[w++] = ESCAPE; 2695 } else { 2696 int k; 2697 2698 /* Fill zeroes. */ 2699 for(k = 0; (k < value + 2) && (w < HUFF_BC); 2700 k++) { 2701 bit_length[w++] = 0; 2702 } 2703 } 2704 } else { 2705 bit_length[w++] = value; 2706 } 2707 } 2708 2709 rar->bits.in_addr = i; 2710 rar->bits.bit_addr = nibble_shift ^ 4; 2711 2712 ret = create_decode_tables(bit_length, &rar->cstate.bd, HUFF_BC); 2713 if(ret != ARCHIVE_OK) { 2714 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2715 "Decoding huffman tables failed"); 2716 return ARCHIVE_FATAL; 2717 } 2718 2719 for(i = 0; i < HUFF_TABLE_SIZE;) { 2720 uint16_t num; 2721 2722 ret = decode_number(a, &rar->cstate.bd, p, &num); 2723 if(ret != ARCHIVE_OK) { 2724 archive_set_error(&a->archive, 2725 ARCHIVE_ERRNO_FILE_FORMAT, 2726 "Decoding huffman tables failed"); 2727 return ARCHIVE_FATAL; 2728 } 2729 2730 if(num < 16) { 2731 /* 0..15: store directly */ 2732 table[i] = (uint8_t) num; 2733 i++; 2734 } else if(num < 18) { 2735 /* 16..17: repeat previous code */ 2736 uint16_t n; 2737 2738 if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &n))) 2739 return ret; 2740 2741 if(num == 16) { 2742 n >>= 13; 2743 n += 3; 2744 skip_bits(rar, 3); 2745 } else { 2746 n >>= 9; 2747 n += 11; 2748 skip_bits(rar, 7); 2749 } 2750 2751 if(i > 0) { 2752 while(n-- > 0 && i < HUFF_TABLE_SIZE) { 2753 table[i] = table[i - 1]; 2754 i++; 2755 } 2756 } else { 2757 archive_set_error(&a->archive, 2758 ARCHIVE_ERRNO_FILE_FORMAT, 2759 "Unexpected error when decoding " 2760 "huffman tables"); 2761 return ARCHIVE_FATAL; 2762 } 2763 } else { 2764 /* other codes: fill with zeroes `n` times */ 2765 uint16_t n; 2766 2767 if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &n))) 2768 return ret; 2769 2770 if(num == 18) { 2771 n >>= 13; 2772 n += 3; 2773 skip_bits(rar, 3); 2774 } else { 2775 n >>= 9; 2776 n += 11; 2777 skip_bits(rar, 7); 2778 } 2779 2780 while(n-- > 0 && i < HUFF_TABLE_SIZE) 2781 table[i++] = 0; 2782 } 2783 } 2784 2785 ret = create_decode_tables(&table[idx], &rar->cstate.ld, HUFF_NC); 2786 if(ret != ARCHIVE_OK) { 2787 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2788 "Failed to create literal table"); 2789 return ARCHIVE_FATAL; 2790 } 2791 2792 idx += HUFF_NC; 2793 2794 ret = create_decode_tables(&table[idx], &rar->cstate.dd, HUFF_DC); 2795 if(ret != ARCHIVE_OK) { 2796 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2797 "Failed to create distance table"); 2798 return ARCHIVE_FATAL; 2799 } 2800 2801 idx += HUFF_DC; 2802 2803 ret = create_decode_tables(&table[idx], &rar->cstate.ldd, HUFF_LDC); 2804 if(ret != ARCHIVE_OK) { 2805 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2806 "Failed to create lower bits of distances table"); 2807 return ARCHIVE_FATAL; 2808 } 2809 2810 idx += HUFF_LDC; 2811 2812 ret = create_decode_tables(&table[idx], &rar->cstate.rd, HUFF_RC); 2813 if(ret != ARCHIVE_OK) { 2814 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2815 "Failed to create repeating distances table"); 2816 return ARCHIVE_FATAL; 2817 } 2818 2819 return ARCHIVE_OK; 2820 } 2821 2822 /* Parses the block header, verifies its CRC byte, and saves the header 2823 * fields inside the `hdr` pointer. */ 2824 static int parse_block_header(struct archive_read* a, const uint8_t* p, 2825 ssize_t* block_size, struct compressed_block_header* hdr) 2826 { 2827 uint8_t calculated_cksum; 2828 memcpy(hdr, p, sizeof(struct compressed_block_header)); 2829 2830 if(bf_byte_count(hdr) > 2) { 2831 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2832 "Unsupported block header size (was %d, max is 2)", 2833 bf_byte_count(hdr)); 2834 return ARCHIVE_FATAL; 2835 } 2836 2837 /* This should probably use bit reader interface in order to be more 2838 * future-proof. */ 2839 *block_size = 0; 2840 switch(bf_byte_count(hdr)) { 2841 /* 1-byte block size */ 2842 case 0: 2843 *block_size = *(const uint8_t*) &p[2]; 2844 break; 2845 2846 /* 2-byte block size */ 2847 case 1: 2848 *block_size = archive_le16dec(&p[2]); 2849 break; 2850 2851 /* 3-byte block size */ 2852 case 2: 2853 *block_size = archive_le32dec(&p[2]); 2854 *block_size &= 0x00FFFFFF; 2855 break; 2856 2857 /* Other block sizes are not supported. This case is not 2858 * reached, because we have an 'if' guard before the switch 2859 * that makes sure of it. */ 2860 default: 2861 return ARCHIVE_FATAL; 2862 } 2863 2864 /* Verify the block header checksum. 0x5A is a magic value and is 2865 * always * constant. */ 2866 calculated_cksum = 0x5A 2867 ^ (uint8_t) hdr->block_flags_u8 2868 ^ (uint8_t) *block_size 2869 ^ (uint8_t) (*block_size >> 8) 2870 ^ (uint8_t) (*block_size >> 16); 2871 2872 if(calculated_cksum != hdr->block_cksum) { 2873 #ifndef DONT_FAIL_ON_CRC_ERROR 2874 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2875 "Block checksum error: got 0x%x, expected 0x%x", 2876 hdr->block_cksum, calculated_cksum); 2877 2878 return ARCHIVE_FATAL; 2879 #endif 2880 } 2881 2882 return ARCHIVE_OK; 2883 } 2884 2885 /* Convenience function used during filter processing. */ 2886 static int parse_filter_data(struct archive_read* a, struct rar5* rar, 2887 const uint8_t* p, uint32_t* filter_data) 2888 { 2889 int i, bytes, ret; 2890 uint32_t data = 0; 2891 2892 if(ARCHIVE_OK != (ret = read_consume_bits(a, rar, p, 2, &bytes))) 2893 return ret; 2894 2895 bytes++; 2896 2897 for(i = 0; i < bytes; i++) { 2898 uint16_t byte; 2899 2900 if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &byte))) { 2901 return ret; 2902 } 2903 2904 /* Cast to uint32_t will ensure the shift operation will not 2905 * produce undefined result. */ 2906 data += ((uint32_t) byte >> 8) << (i * 8); 2907 skip_bits(rar, 8); 2908 } 2909 2910 *filter_data = data; 2911 return ARCHIVE_OK; 2912 } 2913 2914 /* Function is used during sanity checking. */ 2915 static int is_valid_filter_block_start(struct rar5* rar, 2916 uint32_t start) 2917 { 2918 const int64_t block_start = (ssize_t) start + rar->cstate.write_ptr; 2919 const int64_t last_bs = rar->cstate.last_block_start; 2920 const ssize_t last_bl = rar->cstate.last_block_length; 2921 2922 if(last_bs == 0 || last_bl == 0) { 2923 /* We didn't have any filters yet, so accept this offset. */ 2924 return 1; 2925 } 2926 2927 if(block_start >= last_bs + last_bl) { 2928 /* Current offset is bigger than last block's end offset, so 2929 * accept current offset. */ 2930 return 1; 2931 } 2932 2933 /* Any other case is not a normal situation and we should fail. */ 2934 return 0; 2935 } 2936 2937 /* The function will create a new filter, read its parameters from the input 2938 * stream and add it to the filter collection. */ 2939 static int parse_filter(struct archive_read* ar, const uint8_t* p) { 2940 uint32_t block_start, block_length; 2941 uint16_t filter_type; 2942 struct filter_info* filt = NULL; 2943 struct rar5* rar = get_context(ar); 2944 int ret; 2945 2946 /* Read the parameters from the input stream. */ 2947 if(ARCHIVE_OK != (ret = parse_filter_data(ar, rar, p, &block_start))) 2948 return ret; 2949 2950 if(ARCHIVE_OK != (ret = parse_filter_data(ar, rar, p, &block_length))) 2951 return ret; 2952 2953 if(ARCHIVE_OK != (ret = read_bits_16(ar, rar, p, &filter_type))) 2954 return ret; 2955 2956 filter_type >>= 13; 2957 skip_bits(rar, 3); 2958 2959 /* Perform some sanity checks on this filter parameters. Note that we 2960 * allow only DELTA, E8/E9 and ARM filters here, because rest of 2961 * filters are not used in RARv5. */ 2962 2963 if(block_length < 4 || 2964 block_length > 0x400000 || 2965 filter_type > FILTER_ARM || 2966 !is_valid_filter_block_start(rar, block_start)) 2967 { 2968 archive_set_error(&ar->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2969 "Invalid filter encountered"); 2970 return ARCHIVE_FATAL; 2971 } 2972 2973 /* Allocate a new filter. */ 2974 filt = add_new_filter(rar); 2975 if(filt == NULL) { 2976 archive_set_error(&ar->archive, ENOMEM, 2977 "Can't allocate memory for a filter descriptor."); 2978 return ARCHIVE_FATAL; 2979 } 2980 2981 filt->type = filter_type; 2982 filt->block_start = rar->cstate.write_ptr + block_start; 2983 filt->block_length = block_length; 2984 2985 rar->cstate.last_block_start = filt->block_start; 2986 rar->cstate.last_block_length = filt->block_length; 2987 2988 /* Read some more data in case this is a DELTA filter. Other filter 2989 * types don't require any additional data over what was already 2990 * read. */ 2991 if(filter_type == FILTER_DELTA) { 2992 int channels; 2993 2994 if(ARCHIVE_OK != (ret = read_consume_bits(ar, rar, p, 5, &channels))) 2995 return ret; 2996 2997 filt->channels = channels + 1; 2998 } 2999 3000 return ARCHIVE_OK; 3001 } 3002 3003 static int decode_code_length(struct archive_read* a, struct rar5* rar, 3004 const uint8_t* p, uint16_t code) 3005 { 3006 int lbits, length = 2; 3007 3008 if(code < 8) { 3009 lbits = 0; 3010 length += code; 3011 } else { 3012 lbits = code / 4 - 1; 3013 length += (4 | (code & 3)) << lbits; 3014 } 3015 3016 if(lbits > 0) { 3017 int add; 3018 3019 if(ARCHIVE_OK != read_consume_bits(a, rar, p, lbits, &add)) 3020 return -1; 3021 3022 length += add; 3023 } 3024 3025 return length; 3026 } 3027 3028 static int copy_string(struct archive_read* a, int len, int dist) { 3029 struct rar5* rar = get_context(a); 3030 const ssize_t cmask = rar->cstate.window_mask; 3031 const uint64_t write_ptr = rar->cstate.write_ptr + 3032 rar->cstate.solid_offset; 3033 int i; 3034 3035 if (rar->cstate.window_buf == NULL) 3036 return ARCHIVE_FATAL; 3037 3038 /* The unpacker spends most of the time in this function. It would be 3039 * a good idea to introduce some optimizations here. 3040 * 3041 * Just remember that this loop treats buffers that overlap differently 3042 * than buffers that do not overlap. This is why a simple memcpy(3) 3043 * call will not be enough. */ 3044 3045 for(i = 0; i < len; i++) { 3046 const ssize_t write_idx = (write_ptr + i) & cmask; 3047 const ssize_t read_idx = (write_ptr + i - dist) & cmask; 3048 rar->cstate.window_buf[write_idx] = 3049 rar->cstate.window_buf[read_idx]; 3050 } 3051 3052 rar->cstate.write_ptr += len; 3053 return ARCHIVE_OK; 3054 } 3055 3056 static int do_uncompress_block(struct archive_read* a, const uint8_t* p) { 3057 struct rar5* rar = get_context(a); 3058 uint16_t num; 3059 int ret; 3060 3061 const uint64_t cmask = rar->cstate.window_mask; 3062 const struct compressed_block_header* hdr = &rar->last_block_hdr; 3063 const uint8_t bit_size = 1 + bf_bit_size(hdr); 3064 3065 while(1) { 3066 if(rar->cstate.write_ptr - rar->cstate.last_write_ptr > 3067 (rar->cstate.window_size >> 1)) { 3068 /* Don't allow growing data by more than half of the 3069 * window size at a time. In such case, break the loop; 3070 * next call to this function will continue processing 3071 * from this moment. */ 3072 break; 3073 } 3074 3075 if(rar->bits.in_addr > rar->cstate.cur_block_size - 1 || 3076 (rar->bits.in_addr == rar->cstate.cur_block_size - 1 && 3077 rar->bits.bit_addr >= bit_size)) 3078 { 3079 /* If the program counter is here, it means the 3080 * function has finished processing the block. */ 3081 rar->cstate.block_parsing_finished = 1; 3082 break; 3083 } 3084 3085 /* Decode the next literal. */ 3086 if(ARCHIVE_OK != decode_number(a, &rar->cstate.ld, p, &num)) { 3087 return ARCHIVE_EOF; 3088 } 3089 3090 /* Num holds a decompression literal, or 'command code'. 3091 * 3092 * - Values lower than 256 are just bytes. Those codes 3093 * can be stored in the output buffer directly. 3094 * 3095 * - Code 256 defines a new filter, which is later used to 3096 * ransform the data block accordingly to the filter type. 3097 * The data block needs to be fully uncompressed first. 3098 * 3099 * - Code bigger than 257 and smaller than 262 define 3100 * a repetition pattern that should be copied from 3101 * an already uncompressed chunk of data. 3102 */ 3103 3104 if(num < 256) { 3105 /* Directly store the byte. */ 3106 int64_t write_idx = rar->cstate.solid_offset + 3107 rar->cstate.write_ptr++; 3108 3109 rar->cstate.window_buf[write_idx & cmask] = 3110 (uint8_t) num; 3111 continue; 3112 } else if(num >= 262) { 3113 uint16_t dist_slot; 3114 int len = decode_code_length(a, rar, p, num - 262), 3115 dbits, 3116 dist = 1; 3117 3118 if(len == -1) { 3119 archive_set_error(&a->archive, 3120 ARCHIVE_ERRNO_PROGRAMMER, 3121 "Failed to decode the code length"); 3122 3123 return ARCHIVE_FATAL; 3124 } 3125 3126 if(ARCHIVE_OK != decode_number(a, &rar->cstate.dd, p, 3127 &dist_slot)) 3128 { 3129 archive_set_error(&a->archive, 3130 ARCHIVE_ERRNO_PROGRAMMER, 3131 "Failed to decode the distance slot"); 3132 3133 return ARCHIVE_FATAL; 3134 } 3135 3136 if(dist_slot < 4) { 3137 dbits = 0; 3138 dist += dist_slot; 3139 } else { 3140 dbits = dist_slot / 2 - 1; 3141 3142 /* Cast to uint32_t will make sure the shift 3143 * left operation won't produce undefined 3144 * result. Then, the uint32_t type will 3145 * be implicitly casted to int. */ 3146 dist += (uint32_t) (2 | 3147 (dist_slot & 1)) << dbits; 3148 } 3149 3150 if(dbits > 0) { 3151 if(dbits >= 4) { 3152 uint32_t add = 0; 3153 uint16_t low_dist; 3154 3155 if(dbits > 4) { 3156 if(ARCHIVE_OK != (ret = read_bits_32( 3157 a, rar, p, &add))) { 3158 /* Return EOF if we 3159 * can't read more 3160 * data. */ 3161 return ret; 3162 } 3163 3164 skip_bits(rar, dbits - 4); 3165 add = (add >> ( 3166 36 - dbits)) << 4; 3167 dist += add; 3168 } 3169 3170 if(ARCHIVE_OK != decode_number(a, 3171 &rar->cstate.ldd, p, &low_dist)) 3172 { 3173 archive_set_error(&a->archive, 3174 ARCHIVE_ERRNO_PROGRAMMER, 3175 "Failed to decode the " 3176 "distance slot"); 3177 3178 return ARCHIVE_FATAL; 3179 } 3180 3181 if(dist >= INT_MAX - low_dist - 1) { 3182 /* This only happens in 3183 * invalid archives. */ 3184 archive_set_error(&a->archive, 3185 ARCHIVE_ERRNO_FILE_FORMAT, 3186 "Distance pointer " 3187 "overflow"); 3188 return ARCHIVE_FATAL; 3189 } 3190 3191 dist += low_dist; 3192 } else { 3193 /* dbits is one of [0,1,2,3] */ 3194 int add; 3195 3196 if(ARCHIVE_OK != (ret = read_consume_bits(a, rar, 3197 p, dbits, &add))) { 3198 /* Return EOF if we can't read 3199 * more data. */ 3200 return ret; 3201 } 3202 3203 dist += add; 3204 } 3205 } 3206 3207 if(dist > 0x100) { 3208 len++; 3209 3210 if(dist > 0x2000) { 3211 len++; 3212 3213 if(dist > 0x40000) { 3214 len++; 3215 } 3216 } 3217 } 3218 3219 dist_cache_push(rar, dist); 3220 rar->cstate.last_len = len; 3221 3222 if(ARCHIVE_OK != copy_string(a, len, dist)) 3223 return ARCHIVE_FATAL; 3224 3225 continue; 3226 } else if(num == 256) { 3227 /* Create a filter. */ 3228 ret = parse_filter(a, p); 3229 if(ret != ARCHIVE_OK) 3230 return ret; 3231 3232 continue; 3233 } else if(num == 257) { 3234 if(rar->cstate.last_len != 0) { 3235 if(ARCHIVE_OK != copy_string(a, 3236 rar->cstate.last_len, 3237 rar->cstate.dist_cache[0])) 3238 { 3239 return ARCHIVE_FATAL; 3240 } 3241 } 3242 3243 continue; 3244 } else { 3245 /* num < 262 */ 3246 const int idx = num - 258; 3247 const int dist = dist_cache_touch(rar, idx); 3248 3249 uint16_t len_slot; 3250 int len; 3251 3252 if(ARCHIVE_OK != decode_number(a, &rar->cstate.rd, p, 3253 &len_slot)) { 3254 return ARCHIVE_FATAL; 3255 } 3256 3257 len = decode_code_length(a, rar, p, len_slot); 3258 if (len == -1) { 3259 return ARCHIVE_FATAL; 3260 } 3261 3262 rar->cstate.last_len = len; 3263 3264 if(ARCHIVE_OK != copy_string(a, len, dist)) 3265 return ARCHIVE_FATAL; 3266 3267 continue; 3268 } 3269 } 3270 3271 return ARCHIVE_OK; 3272 } 3273 3274 /* Binary search for the RARv5 signature. */ 3275 static int scan_for_signature(struct archive_read* a) { 3276 const uint8_t* p; 3277 const int chunk_size = 512; 3278 ssize_t i; 3279 char signature[sizeof(rar5_signature_xor)]; 3280 3281 /* If we're here, it means we're on an 'unknown territory' data. 3282 * There's no indication what kind of data we're reading here. 3283 * It could be some text comment, any kind of binary data, 3284 * digital sign, dragons, etc. 3285 * 3286 * We want to find a valid RARv5 magic header inside this unknown 3287 * data. */ 3288 3289 /* Is it possible in libarchive to just skip everything until the 3290 * end of the file? If so, it would be a better approach than the 3291 * current implementation of this function. */ 3292 3293 rar5_signature(signature); 3294 3295 while(1) { 3296 if(!read_ahead(a, chunk_size, &p)) 3297 return ARCHIVE_EOF; 3298 3299 for(i = 0; i < chunk_size - (int)sizeof(rar5_signature_xor); 3300 i++) { 3301 if(memcmp(&p[i], signature, 3302 sizeof(rar5_signature_xor)) == 0) { 3303 /* Consume the number of bytes we've used to 3304 * search for the signature, as well as the 3305 * number of bytes used by the signature 3306 * itself. After this we should be standing 3307 * on a valid base block header. */ 3308 (void) consume(a, 3309 i + sizeof(rar5_signature_xor)); 3310 return ARCHIVE_OK; 3311 } 3312 } 3313 3314 consume(a, chunk_size); 3315 } 3316 3317 return ARCHIVE_FATAL; 3318 } 3319 3320 /* This function will switch the multivolume archive file to another file, 3321 * i.e. from part03 to part 04. */ 3322 static int advance_multivolume(struct archive_read* a) { 3323 int lret; 3324 struct rar5* rar = get_context(a); 3325 3326 /* A small state machine that will skip unnecessary data, needed to 3327 * switch from one multivolume to another. Such skipping is needed if 3328 * we want to be an stream-oriented (instead of file-oriented) 3329 * unpacker. 3330 * 3331 * The state machine starts with `rar->main.endarc` == 0. It also 3332 * assumes that current stream pointer points to some base block 3333 * header. 3334 * 3335 * The `endarc` field is being set when the base block parsing 3336 * function encounters the 'end of archive' marker. 3337 */ 3338 3339 while(1) { 3340 if(rar->main.endarc == 1) { 3341 int looping = 1; 3342 3343 rar->main.endarc = 0; 3344 3345 while(looping) { 3346 lret = skip_base_block(a); 3347 switch(lret) { 3348 case ARCHIVE_RETRY: 3349 /* Continue looping. */ 3350 break; 3351 case ARCHIVE_OK: 3352 /* Break loop. */ 3353 looping = 0; 3354 break; 3355 default: 3356 /* Forward any errors to the 3357 * caller. */ 3358 return lret; 3359 } 3360 } 3361 3362 break; 3363 } else { 3364 /* Skip current base block. In order to properly skip 3365 * it, we really need to simply parse it and discard 3366 * the results. */ 3367 3368 lret = skip_base_block(a); 3369 if(lret == ARCHIVE_FATAL || lret == ARCHIVE_FAILED) 3370 return lret; 3371 3372 /* The `skip_base_block` function tells us if we 3373 * should continue with skipping, or we should stop 3374 * skipping. We're trying to skip everything up to 3375 * a base FILE block. */ 3376 3377 if(lret != ARCHIVE_RETRY) { 3378 /* If there was an error during skipping, or we 3379 * have just skipped a FILE base block... */ 3380 3381 if(rar->main.endarc == 0) { 3382 return lret; 3383 } else { 3384 continue; 3385 } 3386 } 3387 } 3388 } 3389 3390 return ARCHIVE_OK; 3391 } 3392 3393 /* Merges the partial block from the first multivolume archive file, and 3394 * partial block from the second multivolume archive file. The result is 3395 * a chunk of memory containing the whole block, and the stream pointer 3396 * is advanced to the next block in the second multivolume archive file. */ 3397 static int merge_block(struct archive_read* a, ssize_t block_size, 3398 const uint8_t** p) 3399 { 3400 struct rar5* rar = get_context(a); 3401 ssize_t cur_block_size, partial_offset = 0; 3402 const uint8_t* lp; 3403 int ret; 3404 3405 if(rar->merge_mode) { 3406 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 3407 "Recursive merge is not allowed"); 3408 3409 return ARCHIVE_FATAL; 3410 } 3411 3412 /* Set a flag that we're in the switching mode. */ 3413 rar->cstate.switch_multivolume = 1; 3414 3415 /* Reallocate the memory which will hold the whole block. */ 3416 if(rar->vol.push_buf) 3417 free((void*) rar->vol.push_buf); 3418 3419 /* Increasing the allocation block by 8 is due to bit reading functions, 3420 * which are using additional 2 or 4 bytes. Allocating the block size 3421 * by exact value would make bit reader perform reads from invalid 3422 * memory block when reading the last byte from the buffer. */ 3423 rar->vol.push_buf = malloc(block_size + 8); 3424 if(!rar->vol.push_buf) { 3425 archive_set_error(&a->archive, ENOMEM, 3426 "Can't allocate memory for a merge block buffer."); 3427 return ARCHIVE_FATAL; 3428 } 3429 3430 /* Valgrind complains if the extension block for bit reader is not 3431 * initialized, so initialize it. */ 3432 memset(&rar->vol.push_buf[block_size], 0, 8); 3433 3434 /* A single block can span across multiple multivolume archive files, 3435 * so we use a loop here. This loop will consume enough multivolume 3436 * archive files until the whole block is read. */ 3437 3438 while(1) { 3439 /* Get the size of current block chunk in this multivolume 3440 * archive file and read it. */ 3441 cur_block_size = rar5_min(rar->file.bytes_remaining, 3442 block_size - partial_offset); 3443 3444 if(cur_block_size == 0) { 3445 archive_set_error(&a->archive, 3446 ARCHIVE_ERRNO_FILE_FORMAT, 3447 "Encountered block size == 0 during block merge"); 3448 return ARCHIVE_FATAL; 3449 } 3450 3451 if(!read_ahead(a, cur_block_size, &lp)) 3452 return ARCHIVE_EOF; 3453 3454 /* Sanity check; there should never be a situation where this 3455 * function reads more data than the block's size. */ 3456 if(partial_offset + cur_block_size > block_size) { 3457 archive_set_error(&a->archive, 3458 ARCHIVE_ERRNO_PROGRAMMER, 3459 "Consumed too much data when merging blocks."); 3460 return ARCHIVE_FATAL; 3461 } 3462 3463 /* Merge previous block chunk with current block chunk, 3464 * or create first block chunk if this is our first 3465 * iteration. */ 3466 memcpy(&rar->vol.push_buf[partial_offset], lp, cur_block_size); 3467 3468 /* Advance the stream read pointer by this block chunk size. */ 3469 if(ARCHIVE_OK != consume(a, cur_block_size)) 3470 return ARCHIVE_EOF; 3471 3472 /* Update the pointers. `partial_offset` contains information 3473 * about the sum of merged block chunks. */ 3474 partial_offset += cur_block_size; 3475 rar->file.bytes_remaining -= cur_block_size; 3476 3477 /* If `partial_offset` is the same as `block_size`, this means 3478 * we've merged all block chunks and we have a valid full 3479 * block. */ 3480 if(partial_offset == block_size) { 3481 break; 3482 } 3483 3484 /* If we don't have any bytes to read, this means we should 3485 * switch to another multivolume archive file. */ 3486 if(rar->file.bytes_remaining == 0) { 3487 rar->merge_mode++; 3488 ret = advance_multivolume(a); 3489 rar->merge_mode--; 3490 if(ret != ARCHIVE_OK) { 3491 return ret; 3492 } 3493 } 3494 } 3495 3496 *p = rar->vol.push_buf; 3497 3498 /* If we're here, we can resume unpacking by processing the block 3499 * pointed to by the `*p` memory pointer. */ 3500 3501 return ARCHIVE_OK; 3502 } 3503 3504 static int process_block(struct archive_read* a) { 3505 const uint8_t* p; 3506 struct rar5* rar = get_context(a); 3507 int ret; 3508 3509 /* If we don't have any data to be processed, this most probably means 3510 * we need to switch to the next volume. */ 3511 if(rar->main.volume && rar->file.bytes_remaining == 0) { 3512 ret = advance_multivolume(a); 3513 if(ret != ARCHIVE_OK) 3514 return ret; 3515 } 3516 3517 if(rar->cstate.block_parsing_finished) { 3518 ssize_t block_size; 3519 ssize_t to_skip; 3520 ssize_t cur_block_size; 3521 3522 /* The header size won't be bigger than 6 bytes. */ 3523 if(!read_ahead(a, 6, &p)) { 3524 /* Failed to prefetch data block header. */ 3525 return ARCHIVE_EOF; 3526 } 3527 3528 /* 3529 * Read block_size by parsing block header. Validate the header 3530 * by calculating CRC byte stored inside the header. Size of 3531 * the header is not constant (block size can be stored either 3532 * in 1 or 2 bytes), that's why block size is left out from the 3533 * `compressed_block_header` structure and returned by 3534 * `parse_block_header` as the second argument. */ 3535 3536 ret = parse_block_header(a, p, &block_size, 3537 &rar->last_block_hdr); 3538 if(ret != ARCHIVE_OK) { 3539 return ret; 3540 } 3541 3542 /* Skip block header. Next data is huffman tables, 3543 * if present. */ 3544 to_skip = sizeof(struct compressed_block_header) + 3545 bf_byte_count(&rar->last_block_hdr) + 1; 3546 3547 if(ARCHIVE_OK != consume(a, to_skip)) 3548 return ARCHIVE_EOF; 3549 3550 rar->file.bytes_remaining -= to_skip; 3551 3552 /* The block size gives information about the whole block size, 3553 * but the block could be stored in split form when using 3554 * multi-volume archives. In this case, the block size will be 3555 * bigger than the actual data stored in this file. Remaining 3556 * part of the data will be in another file. */ 3557 3558 cur_block_size = 3559 rar5_min(rar->file.bytes_remaining, block_size); 3560 3561 if(block_size > rar->file.bytes_remaining) { 3562 /* If current blocks' size is bigger than our data 3563 * size, this means we have a multivolume archive. 3564 * In this case, skip all base headers until the end 3565 * of the file, proceed to next "partXXX.rar" volume, 3566 * find its signature, skip all headers up to the first 3567 * FILE base header, and continue from there. 3568 * 3569 * Note that `merge_block` will update the `rar` 3570 * context structure quite extensively. */ 3571 3572 ret = merge_block(a, block_size, &p); 3573 if(ret != ARCHIVE_OK) { 3574 return ret; 3575 } 3576 3577 cur_block_size = block_size; 3578 3579 /* Current stream pointer should be now directly 3580 * *after* the block that spanned through multiple 3581 * archive files. `p` pointer should have the data of 3582 * the *whole* block (merged from partial blocks 3583 * stored in multiple archives files). */ 3584 } else { 3585 rar->cstate.switch_multivolume = 0; 3586 3587 /* Read the whole block size into memory. This can take 3588 * up to 8 megabytes of memory in theoretical cases. 3589 * Might be worth to optimize this and use a standard 3590 * chunk of 4kb's. */ 3591 if(!read_ahead(a, 4 + cur_block_size, &p)) { 3592 /* Failed to prefetch block data. */ 3593 return ARCHIVE_EOF; 3594 } 3595 } 3596 3597 rar->cstate.block_buf = p; 3598 rar->cstate.cur_block_size = cur_block_size; 3599 rar->cstate.block_parsing_finished = 0; 3600 3601 rar->bits.in_addr = 0; 3602 rar->bits.bit_addr = 0; 3603 3604 if(bf_is_table_present(&rar->last_block_hdr)) { 3605 /* Load Huffman tables. */ 3606 ret = parse_tables(a, rar, p); 3607 if(ret != ARCHIVE_OK) { 3608 /* Error during decompression of Huffman 3609 * tables. */ 3610 return ret; 3611 } 3612 } 3613 } else { 3614 /* Block parsing not finished, reuse previous memory buffer. */ 3615 p = rar->cstate.block_buf; 3616 } 3617 3618 /* Uncompress the block, or a part of it, depending on how many bytes 3619 * will be generated by uncompressing the block. 3620 * 3621 * In case too many bytes will be generated, calling this function 3622 * again will resume the uncompression operation. */ 3623 ret = do_uncompress_block(a, p); 3624 if(ret != ARCHIVE_OK) { 3625 return ret; 3626 } 3627 3628 if(rar->cstate.block_parsing_finished && 3629 rar->cstate.switch_multivolume == 0 && 3630 rar->cstate.cur_block_size > 0) 3631 { 3632 /* If we're processing a normal block, consume the whole 3633 * block. We can do this because we've already read the whole 3634 * block to memory. */ 3635 if(ARCHIVE_OK != consume(a, rar->cstate.cur_block_size)) 3636 return ARCHIVE_FATAL; 3637 3638 rar->file.bytes_remaining -= rar->cstate.cur_block_size; 3639 } else if(rar->cstate.switch_multivolume) { 3640 /* Don't consume the block if we're doing multivolume 3641 * processing. The volume switching function will consume 3642 * the proper count of bytes instead. */ 3643 rar->cstate.switch_multivolume = 0; 3644 } 3645 3646 return ARCHIVE_OK; 3647 } 3648 3649 /* Pops the `buf`, `size` and `offset` from the "data ready" stack. 3650 * 3651 * Returns ARCHIVE_OK when those arguments can be used, ARCHIVE_RETRY 3652 * when there is no data on the stack. */ 3653 static int use_data(struct rar5* rar, const void** buf, size_t* size, 3654 int64_t* offset) 3655 { 3656 int i; 3657 3658 for(i = 0; i < rar5_countof(rar->cstate.dready); i++) { 3659 struct data_ready *d = &rar->cstate.dready[i]; 3660 3661 if(d->used) { 3662 if(buf) *buf = d->buf; 3663 if(size) *size = d->size; 3664 if(offset) *offset = d->offset; 3665 3666 d->used = 0; 3667 return ARCHIVE_OK; 3668 } 3669 } 3670 3671 return ARCHIVE_RETRY; 3672 } 3673 3674 static void clear_data_ready_stack(struct rar5* rar) { 3675 memset(&rar->cstate.dready, 0, sizeof(rar->cstate.dready)); 3676 } 3677 3678 /* Pushes the `buf`, `size` and `offset` arguments to the rar->cstate.dready 3679 * FIFO stack. Those values will be popped from this stack by the `use_data` 3680 * function. */ 3681 static int push_data_ready(struct archive_read* a, struct rar5* rar, 3682 const uint8_t* buf, size_t size, int64_t offset) 3683 { 3684 int i; 3685 3686 /* Don't push if we're in skip mode. This is needed because solid 3687 * streams need full processing even if we're skipping data. After 3688 * fully processing the stream, we need to discard the generated bytes, 3689 * because we're interested only in the side effect: building up the 3690 * internal window circular buffer. This window buffer will be used 3691 * later during unpacking of requested data. */ 3692 if(rar->skip_mode) 3693 return ARCHIVE_OK; 3694 3695 /* Sanity check. */ 3696 if(offset != rar->file.last_offset + rar->file.last_size) { 3697 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 3698 "Sanity check error: output stream is not continuous"); 3699 return ARCHIVE_FATAL; 3700 } 3701 3702 for(i = 0; i < rar5_countof(rar->cstate.dready); i++) { 3703 struct data_ready* d = &rar->cstate.dready[i]; 3704 if(!d->used) { 3705 d->used = 1; 3706 d->buf = buf; 3707 d->size = size; 3708 d->offset = offset; 3709 3710 /* These fields are used only in sanity checking. */ 3711 rar->file.last_offset = offset; 3712 rar->file.last_size = size; 3713 3714 /* Calculate the checksum of this new block before 3715 * submitting data to libarchive's engine. */ 3716 update_crc(rar, d->buf, d->size); 3717 3718 return ARCHIVE_OK; 3719 } 3720 } 3721 3722 /* Program counter will reach this code if the `rar->cstate.data_ready` 3723 * stack will be filled up so that no new entries will be allowed. The 3724 * code shouldn't allow such situation to occur. So we treat this case 3725 * as an internal error. */ 3726 3727 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 3728 "Error: premature end of data_ready stack"); 3729 return ARCHIVE_FATAL; 3730 } 3731 3732 /* This function uncompresses the data that is stored in the <FILE> base 3733 * block. 3734 * 3735 * The FILE base block looks like this: 3736 * 3737 * <header><huffman tables><block_1><block_2>...<block_n> 3738 * 3739 * The <header> is a block header, that is parsed in parse_block_header(). 3740 * It's a "compressed_block_header" structure, containing metadata needed 3741 * to know when we should stop looking for more <block_n> blocks. 3742 * 3743 * <huffman tables> contain data needed to set up the huffman tables, needed 3744 * for the actual decompression. 3745 * 3746 * Each <block_n> consists of series of literals: 3747 * 3748 * <literal><literal><literal>...<literal> 3749 * 3750 * Those literals generate the uncompression data. They operate on a circular 3751 * buffer, sometimes writing raw data into it, sometimes referencing 3752 * some previous data inside this buffer, and sometimes declaring a filter 3753 * that will need to be executed on the data stored in the circular buffer. 3754 * It all depends on the literal that is used. 3755 * 3756 * Sometimes blocks produce output data, sometimes they don't. For example, for 3757 * some huge files that use lots of filters, sometimes a block is filled with 3758 * only filter declaration literals. Such blocks won't produce any data in the 3759 * circular buffer. 3760 * 3761 * Sometimes blocks will produce 4 bytes of data, and sometimes 1 megabyte, 3762 * because a literal can reference previously decompressed data. For example, 3763 * there can be a literal that says: 'append a byte 0xFE here', and after 3764 * it another literal can say 'append 1 megabyte of data from circular buffer 3765 * offset 0x12345'. This is how RAR format handles compressing repeated 3766 * patterns. 3767 * 3768 * The RAR compressor creates those literals and the actual efficiency of 3769 * compression depends on what those literals are. The literals can also 3770 * be seen as a kind of a non-turing-complete virtual machine that simply 3771 * tells the decompressor what it should do. 3772 * */ 3773 3774 static int do_uncompress_file(struct archive_read* a) { 3775 struct rar5* rar = get_context(a); 3776 int ret; 3777 int64_t max_end_pos; 3778 3779 if(!rar->cstate.initialized) { 3780 /* Don't perform full context reinitialization if we're 3781 * processing a solid archive. */ 3782 if(!rar->main.solid || !rar->cstate.window_buf) { 3783 init_unpack(rar); 3784 } 3785 3786 rar->cstate.initialized = 1; 3787 } 3788 3789 /* Don't allow extraction if window_size is invalid. */ 3790 if(rar->cstate.window_size == 0) { 3791 archive_set_error(&a->archive, 3792 ARCHIVE_ERRNO_FILE_FORMAT, 3793 "Invalid window size declaration in this file"); 3794 3795 /* This should never happen in valid files. */ 3796 return ARCHIVE_FATAL; 3797 } 3798 3799 if(rar->cstate.all_filters_applied == 1) { 3800 /* We use while(1) here, but standard case allows for just 1 3801 * iteration. The loop will iterate if process_block() didn't 3802 * generate any data at all. This can happen if the block 3803 * contains only filter definitions (this is common in big 3804 * files). */ 3805 while(1) { 3806 ret = process_block(a); 3807 if(ret == ARCHIVE_EOF || ret == ARCHIVE_FATAL) 3808 return ret; 3809 3810 if(rar->cstate.last_write_ptr == 3811 rar->cstate.write_ptr) { 3812 /* The block didn't generate any new data, 3813 * so just process a new block if this one 3814 * wasn't the last block in the file. */ 3815 if (bf_is_last_block(&rar->last_block_hdr)) { 3816 return ARCHIVE_EOF; 3817 } 3818 3819 continue; 3820 } 3821 3822 /* The block has generated some new data, so break 3823 * the loop. */ 3824 break; 3825 } 3826 } 3827 3828 /* Try to run filters. If filters won't be applied, it means that 3829 * insufficient data was generated. */ 3830 ret = apply_filters(a); 3831 if(ret == ARCHIVE_RETRY) { 3832 return ARCHIVE_OK; 3833 } else if(ret == ARCHIVE_FATAL) { 3834 return ARCHIVE_FATAL; 3835 } 3836 3837 /* If apply_filters() will return ARCHIVE_OK, we can continue here. */ 3838 3839 if(cdeque_size(&rar->cstate.filters) > 0) { 3840 /* Check if we can write something before hitting first 3841 * filter. */ 3842 struct filter_info* flt; 3843 3844 /* Get the block_start offset from the first filter. */ 3845 if(CDE_OK != cdeque_front(&rar->cstate.filters, 3846 cdeque_filter_p(&flt))) 3847 { 3848 archive_set_error(&a->archive, 3849 ARCHIVE_ERRNO_PROGRAMMER, 3850 "Can't read first filter"); 3851 return ARCHIVE_FATAL; 3852 } 3853 3854 max_end_pos = rar5_min(flt->block_start, 3855 rar->cstate.write_ptr); 3856 } else { 3857 /* There are no filters defined, or all filters were applied. 3858 * This means we can just store the data without any 3859 * postprocessing. */ 3860 max_end_pos = rar->cstate.write_ptr; 3861 } 3862 3863 if(max_end_pos == rar->cstate.last_write_ptr) { 3864 /* We can't write anything yet. The block uncompression 3865 * function did not generate enough data, and no filter can be 3866 * applied. At the same time we don't have any data that can be 3867 * stored without filter postprocessing. This means we need to 3868 * wait for more data to be generated, so we can apply the 3869 * filters. 3870 * 3871 * Signal the caller that we need more data to be able to do 3872 * anything. 3873 */ 3874 return ARCHIVE_RETRY; 3875 } else { 3876 /* We can write the data before hitting the first filter. 3877 * So let's do it. The push_window_data() function will 3878 * effectively return the selected data block to the user 3879 * application. */ 3880 push_window_data(a, rar, rar->cstate.last_write_ptr, 3881 max_end_pos); 3882 rar->cstate.last_write_ptr = max_end_pos; 3883 } 3884 3885 return ARCHIVE_OK; 3886 } 3887 3888 static int uncompress_file(struct archive_read* a) { 3889 int ret; 3890 3891 while(1) { 3892 /* Sometimes the uncompression function will return a 3893 * 'retry' signal. If this will happen, we have to retry 3894 * the function. */ 3895 ret = do_uncompress_file(a); 3896 if(ret != ARCHIVE_RETRY) 3897 return ret; 3898 } 3899 } 3900 3901 3902 static int do_unstore_file(struct archive_read* a, 3903 struct rar5* rar, const void** buf, size_t* size, int64_t* offset) 3904 { 3905 size_t to_read; 3906 const uint8_t* p; 3907 3908 if(rar->file.bytes_remaining == 0 && rar->main.volume > 0 && 3909 rar->generic.split_after > 0) 3910 { 3911 int ret; 3912 3913 rar->cstate.switch_multivolume = 1; 3914 ret = advance_multivolume(a); 3915 rar->cstate.switch_multivolume = 0; 3916 3917 if(ret != ARCHIVE_OK) { 3918 /* Failed to advance to next multivolume archive 3919 * file. */ 3920 return ret; 3921 } 3922 } 3923 3924 to_read = rar5_min(rar->file.bytes_remaining, 64 * 1024); 3925 if(to_read == 0) { 3926 return ARCHIVE_EOF; 3927 } 3928 3929 if(!read_ahead(a, to_read, &p)) { 3930 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 3931 "I/O error when unstoring file"); 3932 return ARCHIVE_FATAL; 3933 } 3934 3935 if(ARCHIVE_OK != consume(a, to_read)) { 3936 return ARCHIVE_EOF; 3937 } 3938 3939 if(buf) *buf = p; 3940 if(size) *size = to_read; 3941 if(offset) *offset = rar->cstate.last_unstore_ptr; 3942 3943 rar->file.bytes_remaining -= to_read; 3944 rar->cstate.last_unstore_ptr += to_read; 3945 3946 update_crc(rar, p, to_read); 3947 return ARCHIVE_OK; 3948 } 3949 3950 static int do_unpack(struct archive_read* a, struct rar5* rar, 3951 const void** buf, size_t* size, int64_t* offset) 3952 { 3953 enum COMPRESSION_METHOD { 3954 STORE = 0, FASTEST = 1, FAST = 2, NORMAL = 3, GOOD = 4, 3955 BEST = 5 3956 }; 3957 3958 if(rar->file.service > 0) { 3959 return do_unstore_file(a, rar, buf, size, offset); 3960 } else { 3961 switch(rar->cstate.method) { 3962 case STORE: 3963 return do_unstore_file(a, rar, buf, size, 3964 offset); 3965 case FASTEST: 3966 /* fallthrough */ 3967 case FAST: 3968 /* fallthrough */ 3969 case NORMAL: 3970 /* fallthrough */ 3971 case GOOD: 3972 /* fallthrough */ 3973 case BEST: 3974 /* No data is returned here. But because a sparse-file aware 3975 * caller (like archive_read_data_into_fd) may treat zero-size 3976 * as a sparse file block, we need to update the offset 3977 * accordingly. At this point the decoder doesn't have any 3978 * pending uncompressed data blocks, so the current position in 3979 * the output file should be last_write_ptr. */ 3980 if (offset) *offset = rar->cstate.last_write_ptr; 3981 return uncompress_file(a); 3982 default: 3983 archive_set_error(&a->archive, 3984 ARCHIVE_ERRNO_FILE_FORMAT, 3985 "Compression method not supported: 0x%x", 3986 rar->cstate.method); 3987 3988 return ARCHIVE_FATAL; 3989 } 3990 } 3991 3992 #if !defined WIN32 3993 /* Not reached. */ 3994 return ARCHIVE_OK; 3995 #endif 3996 } 3997 3998 static int verify_checksums(struct archive_read* a) { 3999 int verify_crc; 4000 struct rar5* rar = get_context(a); 4001 4002 /* Check checksums only when actually unpacking the data. There's no 4003 * need to calculate checksum when we're skipping data in solid archives 4004 * (skipping in solid archives is the same thing as unpacking compressed 4005 * data and discarding the result). */ 4006 4007 if(!rar->skip_mode) { 4008 /* Always check checksums if we're not in skip mode */ 4009 verify_crc = 1; 4010 } else { 4011 /* We can override the logic above with a compile-time option 4012 * NO_CRC_ON_SOLID_SKIP. This option is used during debugging, 4013 * and it will check checksums of unpacked data even when 4014 * we're skipping it. */ 4015 4016 #if defined CHECK_CRC_ON_SOLID_SKIP 4017 /* Debug case */ 4018 verify_crc = 1; 4019 #else 4020 /* Normal case */ 4021 verify_crc = 0; 4022 #endif 4023 } 4024 4025 if(verify_crc) { 4026 /* During unpacking, on each unpacked block we're calling the 4027 * update_crc() function. Since we are here, the unpacking 4028 * process is already over and we can check if calculated 4029 * checksum (CRC32 or BLAKE2sp) is the same as what is stored 4030 * in the archive. */ 4031 if(rar->file.stored_crc32 > 0) { 4032 /* Check CRC32 only when the file contains a CRC32 4033 * value for this file. */ 4034 4035 if(rar->file.calculated_crc32 != 4036 rar->file.stored_crc32) { 4037 /* Checksums do not match; the unpacked file 4038 * is corrupted. */ 4039 4040 DEBUG_CODE { 4041 printf("Checksum error: CRC32 " 4042 "(was: %08" PRIx32 ", expected: %08" PRIx32 ")\n", 4043 rar->file.calculated_crc32, 4044 rar->file.stored_crc32); 4045 } 4046 4047 #ifndef DONT_FAIL_ON_CRC_ERROR 4048 archive_set_error(&a->archive, 4049 ARCHIVE_ERRNO_FILE_FORMAT, 4050 "Checksum error: CRC32"); 4051 return ARCHIVE_FATAL; 4052 #endif 4053 } else { 4054 DEBUG_CODE { 4055 printf("Checksum OK: CRC32 " 4056 "(%08" PRIx32 "/%08" PRIx32 ")\n", 4057 rar->file.stored_crc32, 4058 rar->file.calculated_crc32); 4059 } 4060 } 4061 } 4062 4063 if(rar->file.has_blake2 > 0) { 4064 /* BLAKE2sp is an optional checksum algorithm that is 4065 * added to RARv5 archives when using the `-htb` switch 4066 * during creation of archive. 4067 * 4068 * We now finalize the hash calculation by calling the 4069 * `final` function. This will generate the final hash 4070 * value we can use to compare it with the BLAKE2sp 4071 * checksum that is stored in the archive. 4072 * 4073 * The return value of this `final` function is not 4074 * very helpful, as it guards only against improper use. 4075 * This is why we're explicitly ignoring it. */ 4076 4077 uint8_t b2_buf[32]; 4078 (void) blake2sp_final(&rar->file.b2state, b2_buf, 32); 4079 4080 if(memcmp(&rar->file.blake2sp, b2_buf, 32) != 0) { 4081 #ifndef DONT_FAIL_ON_CRC_ERROR 4082 archive_set_error(&a->archive, 4083 ARCHIVE_ERRNO_FILE_FORMAT, 4084 "Checksum error: BLAKE2"); 4085 4086 return ARCHIVE_FATAL; 4087 #endif 4088 } 4089 } 4090 } 4091 4092 /* Finalization for this file has been successfully completed. */ 4093 return ARCHIVE_OK; 4094 } 4095 4096 static int verify_global_checksums(struct archive_read* a) { 4097 return verify_checksums(a); 4098 } 4099 4100 /* 4101 * Decryption function for the magic signature pattern. Check the comment near 4102 * the `rar5_signature_xor` symbol to read the rationale behind this. 4103 */ 4104 static void rar5_signature(char *buf) { 4105 size_t i; 4106 4107 for(i = 0; i < sizeof(rar5_signature_xor); i++) { 4108 buf[i] = rar5_signature_xor[i] ^ 0xA1; 4109 } 4110 } 4111 4112 static int rar5_read_data(struct archive_read *a, const void **buff, 4113 size_t *size, int64_t *offset) { 4114 int ret; 4115 struct rar5* rar = get_context(a); 4116 4117 if (size) 4118 *size = 0; 4119 4120 if (rar->has_encrypted_entries == ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW) { 4121 rar->has_encrypted_entries = 0; 4122 } 4123 4124 if (rar->headers_are_encrypted || rar->cstate.data_encrypted) { 4125 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 4126 "Reading encrypted data is not currently supported"); 4127 return ARCHIVE_FATAL; 4128 } 4129 4130 if(rar->file.dir > 0) { 4131 /* Don't process any data if this file entry was declared 4132 * as a directory. This is needed, because entries marked as 4133 * directory doesn't have any dictionary buffer allocated, so 4134 * it's impossible to perform any decompression. */ 4135 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 4136 "Can't decompress an entry marked as a directory"); 4137 return ARCHIVE_FAILED; 4138 } 4139 4140 if(!rar->skip_mode && (rar->cstate.last_write_ptr > rar->file.unpacked_size)) { 4141 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 4142 "Unpacker has written too many bytes"); 4143 return ARCHIVE_FATAL; 4144 } 4145 4146 ret = use_data(rar, buff, size, offset); 4147 if(ret == ARCHIVE_OK) { 4148 return ret; 4149 } 4150 4151 if(rar->file.eof == 1) { 4152 return ARCHIVE_EOF; 4153 } 4154 4155 ret = do_unpack(a, rar, buff, size, offset); 4156 if(ret != ARCHIVE_OK) { 4157 return ret; 4158 } 4159 4160 if(rar->file.bytes_remaining == 0 && 4161 rar->cstate.last_write_ptr == rar->file.unpacked_size) 4162 { 4163 /* If all bytes of current file were processed, run 4164 * finalization. 4165 * 4166 * Finalization will check checksum against proper values. If 4167 * some of the checksums will not match, we'll return an error 4168 * value in the last `archive_read_data` call to signal an error 4169 * to the user. */ 4170 4171 rar->file.eof = 1; 4172 return verify_global_checksums(a); 4173 } 4174 4175 return ARCHIVE_OK; 4176 } 4177 4178 static int rar5_read_data_skip(struct archive_read *a) { 4179 struct rar5* rar = get_context(a); 4180 4181 if(rar->main.solid && (rar->cstate.data_encrypted == 0)) { 4182 /* In solid archives, instead of skipping the data, we need to 4183 * extract it, and dispose the result. The side effect of this 4184 * operation will be setting up the initial window buffer state 4185 * needed to be able to extract the selected file. Note that 4186 * this is only possible when data withing this solid block is 4187 * not encrypted, in which case we'll skip and fail if the user 4188 * tries to read data. */ 4189 4190 int ret; 4191 4192 /* Make sure to process all blocks in the compressed stream. */ 4193 while(rar->file.bytes_remaining > 0) { 4194 /* Setting the "skip mode" will allow us to skip 4195 * checksum checks during data skipping. Checking the 4196 * checksum of skipped data isn't really necessary and 4197 * it's only slowing things down. 4198 * 4199 * This is incremented instead of setting to 1 because 4200 * this data skipping function can be called 4201 * recursively. */ 4202 rar->skip_mode++; 4203 4204 /* We're disposing 1 block of data, so we use triple 4205 * NULLs in arguments. */ 4206 ret = rar5_read_data(a, NULL, NULL, NULL); 4207 4208 /* Turn off "skip mode". */ 4209 rar->skip_mode--; 4210 4211 if(ret < 0 || ret == ARCHIVE_EOF) { 4212 /* Propagate any potential error conditions 4213 * to the caller. */ 4214 return ret; 4215 } 4216 } 4217 } else { 4218 /* In standard archives, we can just jump over the compressed 4219 * stream. Each file in non-solid archives starts from an empty 4220 * window buffer. */ 4221 4222 if(ARCHIVE_OK != consume(a, rar->file.bytes_remaining)) { 4223 return ARCHIVE_FATAL; 4224 } 4225 4226 rar->file.bytes_remaining = 0; 4227 } 4228 4229 return ARCHIVE_OK; 4230 } 4231 4232 static int64_t rar5_seek_data(struct archive_read *a, int64_t offset, 4233 int whence) 4234 { 4235 (void) a; 4236 (void) offset; 4237 (void) whence; 4238 4239 /* We're a streaming unpacker, and we don't support seeking. */ 4240 4241 return ARCHIVE_FATAL; 4242 } 4243 4244 static int rar5_cleanup(struct archive_read *a) { 4245 struct rar5* rar = get_context(a); 4246 4247 free(rar->cstate.window_buf); 4248 free(rar->cstate.filtered_buf); 4249 clear_data_ready_stack(rar); 4250 4251 free(rar->vol.push_buf); 4252 4253 free_filters(rar); 4254 cdeque_free(&rar->cstate.filters); 4255 4256 free(rar); 4257 a->format->data = NULL; 4258 4259 return ARCHIVE_OK; 4260 } 4261 4262 static int rar5_capabilities(struct archive_read * a) { 4263 (void) a; 4264 return (ARCHIVE_READ_FORMAT_CAPS_ENCRYPT_DATA 4265 | ARCHIVE_READ_FORMAT_CAPS_ENCRYPT_METADATA); 4266 } 4267 4268 static int rar5_has_encrypted_entries(struct archive_read *_a) { 4269 if (_a && _a->format) { 4270 struct rar5 *rar = (struct rar5 *)_a->format->data; 4271 if (rar) { 4272 return rar->has_encrypted_entries; 4273 } 4274 } 4275 4276 return ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW; 4277 } 4278 4279 static int rar5_init(struct rar5* rar) { 4280 memset(rar, 0, sizeof(struct rar5)); 4281 4282 if(CDE_OK != cdeque_init(&rar->cstate.filters, 8192)) 4283 return ARCHIVE_FATAL; 4284 4285 /* 4286 * Until enough data has been read, we cannot tell about 4287 * any encrypted entries yet. 4288 */ 4289 rar->has_encrypted_entries = ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW; 4290 4291 return ARCHIVE_OK; 4292 } 4293 4294 int archive_read_support_format_rar5(struct archive *_a) { 4295 struct archive_read* ar; 4296 int ret; 4297 struct rar5* rar; 4298 4299 if(ARCHIVE_OK != (ret = get_archive_read(_a, &ar))) 4300 return ret; 4301 4302 rar = malloc(sizeof(*rar)); 4303 if(rar == NULL) { 4304 archive_set_error(&ar->archive, ENOMEM, 4305 "Can't allocate rar5 data"); 4306 return ARCHIVE_FATAL; 4307 } 4308 4309 if(ARCHIVE_OK != rar5_init(rar)) { 4310 archive_set_error(&ar->archive, ENOMEM, 4311 "Can't allocate rar5 filter buffer"); 4312 free(rar); 4313 return ARCHIVE_FATAL; 4314 } 4315 4316 ret = __archive_read_register_format(ar, 4317 rar, 4318 "rar5", 4319 rar5_bid, 4320 rar5_options, 4321 rar5_read_header, 4322 rar5_read_data, 4323 rar5_read_data_skip, 4324 rar5_seek_data, 4325 rar5_cleanup, 4326 rar5_capabilities, 4327 rar5_has_encrypted_entries); 4328 4329 if(ret != ARCHIVE_OK) { 4330 (void) rar5_cleanup(ar); 4331 } 4332 4333 return ret; 4334 } 4335