1 /*- 2 * Copyright (c) 2008-2014 Michihiro NAKAJIMA 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include "archive_platform.h" 27 28 #ifdef HAVE_ERRNO_H 29 #include <errno.h> 30 #endif 31 #ifdef HAVE_LIMITS_H 32 #include <limits.h> 33 #endif 34 #ifdef HAVE_STDLIB_H 35 #include <stdlib.h> 36 #endif 37 #ifdef HAVE_STRING_H 38 #include <string.h> 39 #endif 40 41 #include "archive.h" 42 #include "archive_entry.h" 43 #include "archive_entry_locale.h" 44 #include "archive_private.h" 45 #include "archive_read_private.h" 46 #include "archive_endian.h" 47 48 49 #define MAXMATCH 256 /* Maximum match length. */ 50 #define MINMATCH 3 /* Minimum match length. */ 51 /* 52 * Literal table format: 53 * +0 +256 +510 54 * +---------------+-------------------------+ 55 * | literal code | match length | 56 * | 0 ... 255 | MINMATCH ... MAXMATCH | 57 * +---------------+-------------------------+ 58 * <--- LT_BITLEN_SIZE ---> 59 */ 60 /* Literal table size. */ 61 #define LT_BITLEN_SIZE (UCHAR_MAX + 1 + MAXMATCH - MINMATCH + 1) 62 /* Position table size. 63 * Note: this used for both position table and pre literal table.*/ 64 #define PT_BITLEN_SIZE (3 + 16) 65 66 struct lzh_dec { 67 /* Decoding status. */ 68 int state; 69 70 /* 71 * Window to see last 8Ki(lh5),32Ki(lh6),64Ki(lh7) bytes of decoded 72 * data. 73 */ 74 int w_size; 75 int w_mask; 76 /* Window buffer, which is a loop buffer. */ 77 unsigned char *w_buff; 78 /* The insert position to the window. */ 79 int w_pos; 80 /* The position where we can copy decoded code from the window. */ 81 int copy_pos; 82 /* The length how many bytes we can copy decoded code from 83 * the window. */ 84 int copy_len; 85 86 /* 87 * Bit stream reader. 88 */ 89 struct lzh_br { 90 #define CACHE_TYPE uint64_t 91 #define CACHE_BITS (8 * sizeof(CACHE_TYPE)) 92 /* Cache buffer. */ 93 CACHE_TYPE cache_buffer; 94 /* Indicates how many bits avail in cache_buffer. */ 95 int cache_avail; 96 } br; 97 98 /* 99 * Huffman coding. 100 */ 101 struct huffman { 102 int len_size; 103 int len_avail; 104 int len_bits; 105 int freq[17]; 106 unsigned char *bitlen; 107 108 /* 109 * Use a index table. It's faster than searching a huffman 110 * coding tree, which is a binary tree. But a use of a large 111 * index table causes L1 cache read miss many times. 112 */ 113 #define HTBL_BITS 10 114 int max_bits; 115 int shift_bits; 116 int tbl_bits; 117 int tree_used; 118 int tree_avail; 119 /* Direct access table. */ 120 uint16_t *tbl; 121 /* Binary tree table for extra bits over the direct access. */ 122 struct htree_t { 123 uint16_t left; 124 uint16_t right; 125 } *tree; 126 } lt, pt; 127 128 int blocks_avail; 129 int pos_pt_len_size; 130 int pos_pt_len_bits; 131 int literal_pt_len_size; 132 int literal_pt_len_bits; 133 int reading_position; 134 int loop; 135 int error; 136 }; 137 138 struct lzh_stream { 139 const unsigned char *next_in; 140 int avail_in; 141 int64_t total_in; 142 const unsigned char *ref_ptr; 143 int avail_out; 144 int64_t total_out; 145 struct lzh_dec *ds; 146 }; 147 148 struct lha { 149 /* entry_bytes_remaining is the number of bytes we expect. */ 150 int64_t entry_offset; 151 int64_t entry_bytes_remaining; 152 int64_t entry_unconsumed; 153 uint16_t entry_crc_calculated; 154 155 size_t header_size; /* header size */ 156 unsigned char level; /* header level */ 157 char method[3]; /* compress type */ 158 int64_t compsize; /* compressed data size */ 159 int64_t origsize; /* original file size */ 160 int setflag; 161 #define BIRTHTIME_IS_SET 1 162 #define ATIME_IS_SET 2 163 #define UNIX_MODE_IS_SET 4 164 #define CRC_IS_SET 8 165 time_t birthtime; 166 long birthtime_tv_nsec; 167 time_t mtime; 168 long mtime_tv_nsec; 169 time_t atime; 170 long atime_tv_nsec; 171 mode_t mode; 172 int64_t uid; 173 int64_t gid; 174 struct archive_string uname; 175 struct archive_string gname; 176 uint16_t header_crc; 177 uint16_t crc; 178 /* dirname and filename could be in different codepages */ 179 struct archive_string_conv *sconv_dir; 180 struct archive_string_conv *sconv_fname; 181 struct archive_string_conv *opt_sconv; 182 183 struct archive_string dirname; 184 struct archive_string filename; 185 struct archive_wstring ws; 186 187 unsigned char dos_attr; 188 189 /* Flag to mark progress that an archive was read their first header.*/ 190 char found_first_header; 191 /* Flag to mark that indicates an empty directory. */ 192 char directory; 193 194 /* Flags to mark progress of decompression. */ 195 char decompress_init; 196 char end_of_entry; 197 char end_of_entry_cleanup; 198 char entry_is_compressed; 199 200 char format_name[64]; 201 202 struct lzh_stream strm; 203 }; 204 205 /* 206 * LHA header common member offset. 207 */ 208 #define H_METHOD_OFFSET 2 /* Compress type. */ 209 #define H_ATTR_OFFSET 19 /* DOS attribute. */ 210 #define H_LEVEL_OFFSET 20 /* Header Level. */ 211 #define H_SIZE 22 /* Minimum header size. */ 212 213 static int archive_read_format_lha_bid(struct archive_read *, int); 214 static int archive_read_format_lha_options(struct archive_read *, 215 const char *, const char *); 216 static int archive_read_format_lha_read_header(struct archive_read *, 217 struct archive_entry *); 218 static int archive_read_format_lha_read_data(struct archive_read *, 219 const void **, size_t *, int64_t *); 220 static int archive_read_format_lha_read_data_skip(struct archive_read *); 221 static int archive_read_format_lha_cleanup(struct archive_read *); 222 223 static void lha_replace_path_separator(struct lha *, 224 struct archive_entry *); 225 static int lha_read_file_header_0(struct archive_read *, struct lha *); 226 static int lha_read_file_header_1(struct archive_read *, struct lha *); 227 static int lha_read_file_header_2(struct archive_read *, struct lha *); 228 static int lha_read_file_header_3(struct archive_read *, struct lha *); 229 static int lha_read_file_extended_header(struct archive_read *, 230 struct lha *, uint16_t *, int, size_t, size_t *); 231 static size_t lha_check_header_format(const void *); 232 static int lha_skip_sfx(struct archive_read *); 233 static time_t lha_dos_time(const unsigned char *); 234 static time_t lha_win_time(uint64_t, long *); 235 static unsigned char lha_calcsum(unsigned char, const void *, 236 int, size_t); 237 static int lha_parse_linkname(struct archive_wstring *, 238 struct archive_wstring *); 239 static int lha_read_data_none(struct archive_read *, const void **, 240 size_t *, int64_t *); 241 static int lha_read_data_lzh(struct archive_read *, const void **, 242 size_t *, int64_t *); 243 static void lha_crc16_init(void); 244 static uint16_t lha_crc16(uint16_t, const void *, size_t); 245 static int lzh_decode_init(struct lzh_stream *, const char *); 246 static void lzh_decode_free(struct lzh_stream *); 247 static int lzh_decode(struct lzh_stream *, int); 248 static int lzh_br_fillup(struct lzh_stream *, struct lzh_br *); 249 static int lzh_huffman_init(struct huffman *, size_t, int); 250 static void lzh_huffman_free(struct huffman *); 251 static int lzh_read_pt_bitlen(struct lzh_stream *, int start, int end); 252 static int lzh_make_fake_table(struct huffman *, uint16_t); 253 static int lzh_make_huffman_table(struct huffman *); 254 static inline int lzh_decode_huffman(struct huffman *, unsigned); 255 static int lzh_decode_huffman_tree(struct huffman *, unsigned, int); 256 257 258 int 259 archive_read_support_format_lha(struct archive *_a) 260 { 261 struct archive_read *a = (struct archive_read *)_a; 262 struct lha *lha; 263 int r; 264 265 archive_check_magic(_a, ARCHIVE_READ_MAGIC, 266 ARCHIVE_STATE_NEW, "archive_read_support_format_lha"); 267 268 lha = (struct lha *)calloc(1, sizeof(*lha)); 269 if (lha == NULL) { 270 archive_set_error(&a->archive, ENOMEM, 271 "Can't allocate lha data"); 272 return (ARCHIVE_FATAL); 273 } 274 archive_string_init(&lha->ws); 275 276 r = __archive_read_register_format(a, 277 lha, 278 "lha", 279 archive_read_format_lha_bid, 280 archive_read_format_lha_options, 281 archive_read_format_lha_read_header, 282 archive_read_format_lha_read_data, 283 archive_read_format_lha_read_data_skip, 284 NULL, 285 archive_read_format_lha_cleanup, 286 NULL, 287 NULL); 288 289 if (r != ARCHIVE_OK) 290 free(lha); 291 return (ARCHIVE_OK); 292 } 293 294 static size_t 295 lha_check_header_format(const void *h) 296 { 297 const unsigned char *p = h; 298 size_t next_skip_bytes; 299 300 switch (p[H_METHOD_OFFSET+3]) { 301 /* 302 * "-lh0-" ... "-lh7-" "-lhd-" 303 * "-lzs-" "-lz5-" 304 */ 305 case '0': case '1': case '2': case '3': 306 case '4': case '5': case '6': case '7': 307 case 'd': 308 case 's': 309 next_skip_bytes = 4; 310 311 /* b0 == 0 means the end of an LHa archive file. */ 312 if (p[0] == 0) 313 break; 314 if (p[H_METHOD_OFFSET] != '-' || p[H_METHOD_OFFSET+1] != 'l' 315 || p[H_METHOD_OFFSET+4] != '-') 316 break; 317 318 if (p[H_METHOD_OFFSET+2] == 'h') { 319 /* "-lh?-" */ 320 if (p[H_METHOD_OFFSET+3] == 's') 321 break; 322 if (p[H_LEVEL_OFFSET] == 0) 323 return (0); 324 if (p[H_LEVEL_OFFSET] <= 3 && p[H_ATTR_OFFSET] == 0x20) 325 return (0); 326 } 327 if (p[H_METHOD_OFFSET+2] == 'z') { 328 /* LArc extensions: -lzs-,-lz4- and -lz5- */ 329 if (p[H_LEVEL_OFFSET] != 0) 330 break; 331 if (p[H_METHOD_OFFSET+3] == 's' 332 || p[H_METHOD_OFFSET+3] == '4' 333 || p[H_METHOD_OFFSET+3] == '5') 334 return (0); 335 } 336 break; 337 case 'h': next_skip_bytes = 1; break; 338 case 'z': next_skip_bytes = 1; break; 339 case 'l': next_skip_bytes = 2; break; 340 case '-': next_skip_bytes = 3; break; 341 default : next_skip_bytes = 4; break; 342 } 343 344 return (next_skip_bytes); 345 } 346 347 static int 348 archive_read_format_lha_bid(struct archive_read *a, int best_bid) 349 { 350 const char *p; 351 const void *buff; 352 ssize_t bytes_avail, offset, window; 353 size_t next; 354 355 /* If there's already a better bid than we can ever 356 make, don't bother testing. */ 357 if (best_bid > 30) 358 return (-1); 359 360 if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) 361 return (-1); 362 363 if (lha_check_header_format(p) == 0) 364 return (30); 365 366 if (p[0] == 'M' && p[1] == 'Z') { 367 /* PE file */ 368 offset = 0; 369 window = 4096; 370 while (offset < (1024 * 20)) { 371 buff = __archive_read_ahead(a, offset + window, 372 &bytes_avail); 373 if (buff == NULL) { 374 /* Remaining bytes are less than window. */ 375 window >>= 1; 376 if (window < (H_SIZE + 3)) 377 return (0); 378 continue; 379 } 380 p = (const char *)buff + offset; 381 while (p + H_SIZE < (const char *)buff + bytes_avail) { 382 if ((next = lha_check_header_format(p)) == 0) 383 return (30); 384 p += next; 385 } 386 offset = p - (const char *)buff; 387 } 388 } 389 return (0); 390 } 391 392 static int 393 archive_read_format_lha_options(struct archive_read *a, 394 const char *key, const char *val) 395 { 396 struct lha *lha; 397 int ret = ARCHIVE_FAILED; 398 399 lha = (struct lha *)(a->format->data); 400 if (strcmp(key, "hdrcharset") == 0) { 401 if (val == NULL || val[0] == 0) 402 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 403 "lha: hdrcharset option needs a character-set name"); 404 else { 405 lha->opt_sconv = 406 archive_string_conversion_from_charset( 407 &a->archive, val, 0); 408 if (lha->opt_sconv != NULL) 409 ret = ARCHIVE_OK; 410 else 411 ret = ARCHIVE_FATAL; 412 } 413 return (ret); 414 } 415 416 /* Note: The "warn" return is just to inform the options 417 * supervisor that we didn't handle it. It will generate 418 * a suitable error if no one used this option. */ 419 return (ARCHIVE_WARN); 420 } 421 422 static int 423 lha_skip_sfx(struct archive_read *a) 424 { 425 const void *h; 426 const char *p, *q; 427 size_t next, skip; 428 ssize_t bytes, window; 429 430 window = 4096; 431 for (;;) { 432 h = __archive_read_ahead(a, window, &bytes); 433 if (h == NULL) { 434 /* Remaining bytes are less than window. */ 435 window >>= 1; 436 if (window < (H_SIZE + 3)) 437 goto fatal; 438 continue; 439 } 440 if (bytes < H_SIZE) 441 goto fatal; 442 p = h; 443 q = p + bytes; 444 445 /* 446 * Scan ahead until we find something that looks 447 * like the lha header. 448 */ 449 while (p + H_SIZE < q) { 450 if ((next = lha_check_header_format(p)) == 0) { 451 skip = p - (const char *)h; 452 __archive_read_consume(a, skip); 453 return (ARCHIVE_OK); 454 } 455 p += next; 456 } 457 skip = p - (const char *)h; 458 __archive_read_consume(a, skip); 459 } 460 fatal: 461 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 462 "Couldn't find out LHa header"); 463 return (ARCHIVE_FATAL); 464 } 465 466 static int 467 truncated_error(struct archive_read *a) 468 { 469 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 470 "Truncated LHa header"); 471 return (ARCHIVE_FATAL); 472 } 473 474 static int 475 archive_read_format_lha_read_header(struct archive_read *a, 476 struct archive_entry *entry) 477 { 478 struct archive_wstring linkname; 479 struct archive_wstring pathname; 480 struct lha *lha; 481 const unsigned char *p; 482 const char *signature; 483 int err; 484 struct archive_mstring conv_buffer; 485 const wchar_t *conv_buffer_p; 486 487 lha_crc16_init(); 488 489 a->archive.archive_format = ARCHIVE_FORMAT_LHA; 490 if (a->archive.archive_format_name == NULL) 491 a->archive.archive_format_name = "lha"; 492 493 lha = (struct lha *)(a->format->data); 494 lha->decompress_init = 0; 495 lha->end_of_entry = 0; 496 lha->end_of_entry_cleanup = 0; 497 lha->entry_unconsumed = 0; 498 499 if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) { 500 /* 501 * LHa archiver added 0 to the tail of its archive file as 502 * the mark of the end of the archive. 503 */ 504 signature = __archive_read_ahead(a, sizeof(signature[0]), NULL); 505 if (signature == NULL || signature[0] == 0) 506 return (ARCHIVE_EOF); 507 return (truncated_error(a)); 508 } 509 510 signature = (const char *)p; 511 if (lha->found_first_header == 0 && 512 signature[0] == 'M' && signature[1] == 'Z') { 513 /* This is an executable? Must be self-extracting... */ 514 err = lha_skip_sfx(a); 515 if (err < ARCHIVE_WARN) 516 return (err); 517 518 if ((p = __archive_read_ahead(a, sizeof(*p), NULL)) == NULL) 519 return (truncated_error(a)); 520 signature = (const char *)p; 521 } 522 /* signature[0] == 0 means the end of an LHa archive file. */ 523 if (signature[0] == 0) 524 return (ARCHIVE_EOF); 525 526 /* 527 * Check the header format and method type. 528 */ 529 if (lha_check_header_format(p) != 0) { 530 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 531 "Bad LHa file"); 532 return (ARCHIVE_FATAL); 533 } 534 535 /* We've found the first header. */ 536 lha->found_first_header = 1; 537 /* Set a default value and common data */ 538 lha->header_size = 0; 539 lha->level = p[H_LEVEL_OFFSET]; 540 lha->method[0] = p[H_METHOD_OFFSET+1]; 541 lha->method[1] = p[H_METHOD_OFFSET+2]; 542 lha->method[2] = p[H_METHOD_OFFSET+3]; 543 if (memcmp(lha->method, "lhd", 3) == 0) 544 lha->directory = 1; 545 else 546 lha->directory = 0; 547 if (memcmp(lha->method, "lh0", 3) == 0 || 548 memcmp(lha->method, "lz4", 3) == 0) 549 lha->entry_is_compressed = 0; 550 else 551 lha->entry_is_compressed = 1; 552 553 lha->compsize = 0; 554 lha->origsize = 0; 555 lha->setflag = 0; 556 lha->birthtime = 0; 557 lha->birthtime_tv_nsec = 0; 558 lha->mtime = 0; 559 lha->mtime_tv_nsec = 0; 560 lha->atime = 0; 561 lha->atime_tv_nsec = 0; 562 lha->mode = (lha->directory)? 0777 : 0666; 563 lha->uid = 0; 564 lha->gid = 0; 565 archive_string_empty(&lha->dirname); 566 archive_string_empty(&lha->filename); 567 lha->dos_attr = 0; 568 if (lha->opt_sconv != NULL) { 569 lha->sconv_dir = lha->opt_sconv; 570 lha->sconv_fname = lha->opt_sconv; 571 } else { 572 lha->sconv_dir = NULL; 573 lha->sconv_fname = NULL; 574 } 575 576 switch (p[H_LEVEL_OFFSET]) { 577 case 0: 578 err = lha_read_file_header_0(a, lha); 579 break; 580 case 1: 581 err = lha_read_file_header_1(a, lha); 582 break; 583 case 2: 584 err = lha_read_file_header_2(a, lha); 585 break; 586 case 3: 587 err = lha_read_file_header_3(a, lha); 588 break; 589 default: 590 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 591 "Unsupported LHa header level %d", p[H_LEVEL_OFFSET]); 592 err = ARCHIVE_FATAL; 593 break; 594 } 595 if (err < ARCHIVE_WARN) 596 return (err); 597 598 599 if (!lha->directory && archive_strlen(&lha->filename) == 0) 600 /* The filename has not been set */ 601 return (truncated_error(a)); 602 603 /* 604 * Make a pathname from a dirname and a filename, after converting to Unicode. 605 * This is because codepages might differ between dirname and filename. 606 */ 607 archive_string_init(&pathname); 608 archive_string_init(&linkname); 609 archive_string_init(&conv_buffer.aes_mbs); 610 archive_string_init(&conv_buffer.aes_mbs_in_locale); 611 archive_string_init(&conv_buffer.aes_utf8); 612 archive_string_init(&conv_buffer.aes_wcs); 613 if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->dirname.s, lha->dirname.length, lha->sconv_dir)) { 614 archive_set_error(&a->archive, 615 ARCHIVE_ERRNO_FILE_FORMAT, 616 "Pathname cannot be converted " 617 "from %s to Unicode.", 618 archive_string_conversion_charset_name(lha->sconv_dir)); 619 err = ARCHIVE_FATAL; 620 } else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p)) 621 err = ARCHIVE_FATAL; 622 if (err == ARCHIVE_FATAL) { 623 archive_mstring_clean(&conv_buffer); 624 archive_wstring_free(&pathname); 625 archive_wstring_free(&linkname); 626 return (err); 627 } 628 archive_wstring_copy(&pathname, &conv_buffer.aes_wcs); 629 630 archive_string_empty(&conv_buffer.aes_mbs); 631 archive_string_empty(&conv_buffer.aes_mbs_in_locale); 632 archive_string_empty(&conv_buffer.aes_utf8); 633 archive_wstring_empty(&conv_buffer.aes_wcs); 634 if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->filename.s, lha->filename.length, lha->sconv_fname)) { 635 archive_set_error(&a->archive, 636 ARCHIVE_ERRNO_FILE_FORMAT, 637 "Pathname cannot be converted " 638 "from %s to Unicode.", 639 archive_string_conversion_charset_name(lha->sconv_fname)); 640 err = ARCHIVE_FATAL; 641 } 642 else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p)) 643 err = ARCHIVE_FATAL; 644 if (err == ARCHIVE_FATAL) { 645 archive_mstring_clean(&conv_buffer); 646 archive_wstring_free(&pathname); 647 archive_wstring_free(&linkname); 648 return (err); 649 } 650 archive_wstring_concat(&pathname, &conv_buffer.aes_wcs); 651 archive_mstring_clean(&conv_buffer); 652 653 if ((lha->mode & AE_IFMT) == AE_IFLNK) { 654 /* 655 * Extract the symlink-name if it's included in the pathname. 656 */ 657 if (!lha_parse_linkname(&linkname, &pathname)) { 658 /* We couldn't get the symlink-name. */ 659 archive_set_error(&a->archive, 660 ARCHIVE_ERRNO_FILE_FORMAT, 661 "Unknown symlink-name"); 662 archive_wstring_free(&pathname); 663 archive_wstring_free(&linkname); 664 return (ARCHIVE_FAILED); 665 } 666 } else { 667 /* 668 * Make sure a file-type is set. 669 * The mode has been overridden if it is in the extended data. 670 */ 671 lha->mode = (lha->mode & ~AE_IFMT) | 672 ((lha->directory)? AE_IFDIR: AE_IFREG); 673 } 674 if ((lha->setflag & UNIX_MODE_IS_SET) == 0 && 675 (lha->dos_attr & 1) != 0) 676 lha->mode &= ~(0222);/* read only. */ 677 678 /* 679 * Set basic file parameters. 680 */ 681 archive_entry_copy_pathname_w(entry, pathname.s); 682 archive_wstring_free(&pathname); 683 if (archive_strlen(&linkname) > 0) { 684 archive_entry_copy_symlink_w(entry, linkname.s); 685 } else 686 archive_entry_set_symlink(entry, NULL); 687 archive_wstring_free(&linkname); 688 /* 689 * When a header level is 0, there is a possibility that 690 * a pathname and a symlink has '\' character, a directory 691 * separator in DOS/Windows. So we should convert it to '/'. 692 */ 693 if (p[H_LEVEL_OFFSET] == 0) 694 lha_replace_path_separator(lha, entry); 695 696 archive_entry_set_mode(entry, lha->mode); 697 archive_entry_set_uid(entry, lha->uid); 698 archive_entry_set_gid(entry, lha->gid); 699 if (archive_strlen(&lha->uname) > 0) 700 archive_entry_set_uname(entry, lha->uname.s); 701 if (archive_strlen(&lha->gname) > 0) 702 archive_entry_set_gname(entry, lha->gname.s); 703 if (lha->setflag & BIRTHTIME_IS_SET) { 704 archive_entry_set_birthtime(entry, lha->birthtime, 705 lha->birthtime_tv_nsec); 706 archive_entry_set_ctime(entry, lha->birthtime, 707 lha->birthtime_tv_nsec); 708 } else { 709 archive_entry_unset_birthtime(entry); 710 archive_entry_unset_ctime(entry); 711 } 712 archive_entry_set_mtime(entry, lha->mtime, lha->mtime_tv_nsec); 713 if (lha->setflag & ATIME_IS_SET) 714 archive_entry_set_atime(entry, lha->atime, 715 lha->atime_tv_nsec); 716 else 717 archive_entry_unset_atime(entry); 718 if (lha->directory || archive_entry_symlink(entry) != NULL) 719 archive_entry_unset_size(entry); 720 else 721 archive_entry_set_size(entry, lha->origsize); 722 723 /* 724 * Prepare variables used to read a file content. 725 */ 726 lha->entry_bytes_remaining = lha->compsize; 727 if (lha->entry_bytes_remaining < 0) { 728 archive_set_error(&a->archive, 729 ARCHIVE_ERRNO_FILE_FORMAT, 730 "Invalid LHa entry size"); 731 return (ARCHIVE_FATAL); 732 } 733 lha->entry_offset = 0; 734 lha->entry_crc_calculated = 0; 735 736 /* 737 * This file does not have a content. 738 */ 739 if (lha->directory || lha->compsize == 0) 740 lha->end_of_entry = 1; 741 742 sprintf(lha->format_name, "lha -%c%c%c-", 743 lha->method[0], lha->method[1], lha->method[2]); 744 a->archive.archive_format_name = lha->format_name; 745 746 return (err); 747 } 748 749 /* 750 * Replace a DOS path separator '\' by a character '/'. 751 * Some multi-byte character set have a character '\' in its second byte. 752 */ 753 static void 754 lha_replace_path_separator(struct lha *lha, struct archive_entry *entry) 755 { 756 const wchar_t *wp; 757 size_t i; 758 759 if ((wp = archive_entry_pathname_w(entry)) != NULL) { 760 archive_wstrcpy(&(lha->ws), wp); 761 for (i = 0; i < archive_strlen(&(lha->ws)); i++) { 762 if (lha->ws.s[i] == L'\\') 763 lha->ws.s[i] = L'/'; 764 } 765 archive_entry_copy_pathname_w(entry, lha->ws.s); 766 } 767 768 if ((wp = archive_entry_symlink_w(entry)) != NULL) { 769 archive_wstrcpy(&(lha->ws), wp); 770 for (i = 0; i < archive_strlen(&(lha->ws)); i++) { 771 if (lha->ws.s[i] == L'\\') 772 lha->ws.s[i] = L'/'; 773 } 774 archive_entry_copy_symlink_w(entry, lha->ws.s); 775 } 776 } 777 778 /* 779 * Header 0 format 780 * 781 * +0 +1 +2 +7 +11 782 * +---------------+----------+----------------+-------------------+ 783 * |header size(*1)|header sum|compression type|compressed size(*2)| 784 * +---------------+----------+----------------+-------------------+ 785 * <---------------------(*1)----------* 786 * 787 * +11 +15 +17 +19 +20 +21 788 * +-----------------+---------+---------+--------------+----------------+ 789 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=0)| 790 * +-----------------+---------+---------+--------------+----------------+ 791 * *--------------------------------(*1)---------------------------------* 792 * 793 * +21 +22 +22+(*3) +22+(*3)+2 +22+(*3)+2+(*4) 794 * +---------------+---------+----------+----------------+------------------+ 795 * |name length(*3)|file name|file CRC16|extra header(*4)| compressed data | 796 * +---------------+---------+----------+----------------+------------------+ 797 * <--(*3)-> <------(*2)------> 798 * *----------------------(*1)--------------------------> 799 * 800 */ 801 #define H0_HEADER_SIZE_OFFSET 0 802 #define H0_HEADER_SUM_OFFSET 1 803 #define H0_COMP_SIZE_OFFSET 7 804 #define H0_ORIG_SIZE_OFFSET 11 805 #define H0_DOS_TIME_OFFSET 15 806 #define H0_NAME_LEN_OFFSET 21 807 #define H0_FILE_NAME_OFFSET 22 808 #define H0_FIXED_SIZE 24 809 static int 810 lha_read_file_header_0(struct archive_read *a, struct lha *lha) 811 { 812 const unsigned char *p; 813 int extdsize, namelen; 814 unsigned char headersum, sum_calculated; 815 816 if ((p = __archive_read_ahead(a, H0_FIXED_SIZE, NULL)) == NULL) 817 return (truncated_error(a)); 818 lha->header_size = p[H0_HEADER_SIZE_OFFSET] + 2; 819 headersum = p[H0_HEADER_SUM_OFFSET]; 820 lha->compsize = archive_le32dec(p + H0_COMP_SIZE_OFFSET); 821 lha->origsize = archive_le32dec(p + H0_ORIG_SIZE_OFFSET); 822 lha->mtime = lha_dos_time(p + H0_DOS_TIME_OFFSET); 823 namelen = p[H0_NAME_LEN_OFFSET]; 824 extdsize = (int)lha->header_size - H0_FIXED_SIZE - namelen; 825 if ((namelen > 221 || extdsize < 0) && extdsize != -2) { 826 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 827 "Invalid LHa header"); 828 return (ARCHIVE_FATAL); 829 } 830 if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL) 831 return (truncated_error(a)); 832 833 archive_strncpy(&lha->filename, p + H0_FILE_NAME_OFFSET, namelen); 834 /* When extdsize == -2, A CRC16 value is not present in the header. */ 835 if (extdsize >= 0) { 836 lha->crc = archive_le16dec(p + H0_FILE_NAME_OFFSET + namelen); 837 lha->setflag |= CRC_IS_SET; 838 } 839 sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2); 840 841 /* Read an extended header */ 842 if (extdsize > 0) { 843 /* This extended data is set by 'LHa for UNIX' only. 844 * Maybe fixed size. 845 */ 846 p += H0_FILE_NAME_OFFSET + namelen + 2; 847 if (p[0] == 'U' && extdsize == 12) { 848 /* p[1] is a minor version. */ 849 lha->mtime = archive_le32dec(&p[2]); 850 lha->mode = archive_le16dec(&p[6]); 851 lha->uid = archive_le16dec(&p[8]); 852 lha->gid = archive_le16dec(&p[10]); 853 lha->setflag |= UNIX_MODE_IS_SET; 854 } 855 } 856 __archive_read_consume(a, lha->header_size); 857 858 if (sum_calculated != headersum) { 859 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 860 "LHa header sum error"); 861 return (ARCHIVE_FATAL); 862 } 863 864 return (ARCHIVE_OK); 865 } 866 867 /* 868 * Header 1 format 869 * 870 * +0 +1 +2 +7 +11 871 * +---------------+----------+----------------+-------------+ 872 * |header size(*1)|header sum|compression type|skip size(*2)| 873 * +---------------+----------+----------------+-------------+ 874 * <---------------(*1)----------* 875 * 876 * +11 +15 +17 +19 +20 +21 877 * +-----------------+---------+---------+--------------+----------------+ 878 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=1)| 879 * +-----------------+---------+---------+--------------+----------------+ 880 * *-------------------------------(*1)----------------------------------* 881 * 882 * +21 +22 +22+(*3) +22+(*3)+2 +22+(*3)+3 +22+(*3)+3+(*4) 883 * +---------------+---------+----------+-----------+-----------+ 884 * |name length(*3)|file name|file CRC16| creator |padding(*4)| 885 * +---------------+---------+----------+-----------+-----------+ 886 * <--(*3)-> 887 * *----------------------------(*1)----------------------------* 888 * 889 * +22+(*3)+3+(*4) +22+(*3)+3+(*4)+2 +22+(*3)+3+(*4)+2+(*5) 890 * +----------------+---------------------+------------------------+ 891 * |next header size| extended header(*5) | compressed data | 892 * +----------------+---------------------+------------------------+ 893 * *------(*1)-----> <--------------------(*2)--------------------> 894 */ 895 #define H1_HEADER_SIZE_OFFSET 0 896 #define H1_HEADER_SUM_OFFSET 1 897 #define H1_COMP_SIZE_OFFSET 7 898 #define H1_ORIG_SIZE_OFFSET 11 899 #define H1_DOS_TIME_OFFSET 15 900 #define H1_NAME_LEN_OFFSET 21 901 #define H1_FILE_NAME_OFFSET 22 902 #define H1_FIXED_SIZE 27 903 static int 904 lha_read_file_header_1(struct archive_read *a, struct lha *lha) 905 { 906 const unsigned char *p; 907 size_t extdsize; 908 int i, err, err2; 909 int namelen, padding; 910 unsigned char headersum, sum_calculated; 911 912 err = ARCHIVE_OK; 913 914 if ((p = __archive_read_ahead(a, H1_FIXED_SIZE, NULL)) == NULL) 915 return (truncated_error(a)); 916 917 lha->header_size = p[H1_HEADER_SIZE_OFFSET] + 2; 918 headersum = p[H1_HEADER_SUM_OFFSET]; 919 /* Note: An extended header size is included in a compsize. */ 920 lha->compsize = archive_le32dec(p + H1_COMP_SIZE_OFFSET); 921 lha->origsize = archive_le32dec(p + H1_ORIG_SIZE_OFFSET); 922 lha->mtime = lha_dos_time(p + H1_DOS_TIME_OFFSET); 923 namelen = p[H1_NAME_LEN_OFFSET]; 924 /* Calculate a padding size. The result will be normally 0 only(?) */ 925 padding = ((int)lha->header_size) - H1_FIXED_SIZE - namelen; 926 927 if (namelen > 230 || padding < 0) 928 goto invalid; 929 930 if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL) 931 return (truncated_error(a)); 932 933 for (i = 0; i < namelen; i++) { 934 if (p[i + H1_FILE_NAME_OFFSET] == 0xff) 935 goto invalid;/* Invalid filename. */ 936 } 937 archive_strncpy(&lha->filename, p + H1_FILE_NAME_OFFSET, namelen); 938 lha->crc = archive_le16dec(p + H1_FILE_NAME_OFFSET + namelen); 939 lha->setflag |= CRC_IS_SET; 940 941 sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2); 942 /* Consume used bytes but not include `next header size' data 943 * since it will be consumed in lha_read_file_extended_header(). */ 944 __archive_read_consume(a, lha->header_size - 2); 945 946 /* Read extended headers */ 947 err2 = lha_read_file_extended_header(a, lha, NULL, 2, 948 (size_t)(lha->compsize + 2), &extdsize); 949 if (err2 < ARCHIVE_WARN) 950 return (err2); 951 if (err2 < err) 952 err = err2; 953 /* Get a real compressed file size. */ 954 lha->compsize -= extdsize - 2; 955 956 if (lha->compsize < 0) 957 goto invalid; /* Invalid compressed file size */ 958 959 if (sum_calculated != headersum) { 960 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 961 "LHa header sum error"); 962 return (ARCHIVE_FATAL); 963 } 964 return (err); 965 invalid: 966 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 967 "Invalid LHa header"); 968 return (ARCHIVE_FATAL); 969 } 970 971 /* 972 * Header 2 format 973 * 974 * +0 +2 +7 +11 +15 975 * +---------------+----------------+-------------------+-----------------+ 976 * |header size(*1)|compression type|compressed size(*2)|uncompressed size| 977 * +---------------+----------------+-------------------+-----------------+ 978 * <--------------------------------(*1)---------------------------------* 979 * 980 * +15 +19 +20 +21 +23 +24 981 * +-----------------+------------+----------------+----------+-----------+ 982 * |data/time(time_t)| 0x20 fixed |header level(=2)|file CRC16| creator | 983 * +-----------------+------------+----------------+----------+-----------+ 984 * *---------------------------------(*1)---------------------------------* 985 * 986 * +24 +26 +26+(*3) +26+(*3)+(*4) 987 * +----------------+-------------------+-------------+-------------------+ 988 * |next header size|extended header(*3)| padding(*4) | compressed data | 989 * +----------------+-------------------+-------------+-------------------+ 990 * *--------------------------(*1)-------------------> <------(*2)-------> 991 * 992 */ 993 #define H2_HEADER_SIZE_OFFSET 0 994 #define H2_COMP_SIZE_OFFSET 7 995 #define H2_ORIG_SIZE_OFFSET 11 996 #define H2_TIME_OFFSET 15 997 #define H2_CRC_OFFSET 21 998 #define H2_FIXED_SIZE 24 999 static int 1000 lha_read_file_header_2(struct archive_read *a, struct lha *lha) 1001 { 1002 const unsigned char *p; 1003 size_t extdsize; 1004 int err, padding; 1005 uint16_t header_crc; 1006 1007 if ((p = __archive_read_ahead(a, H2_FIXED_SIZE, NULL)) == NULL) 1008 return (truncated_error(a)); 1009 1010 lha->header_size =archive_le16dec(p + H2_HEADER_SIZE_OFFSET); 1011 lha->compsize = archive_le32dec(p + H2_COMP_SIZE_OFFSET); 1012 lha->origsize = archive_le32dec(p + H2_ORIG_SIZE_OFFSET); 1013 lha->mtime = archive_le32dec(p + H2_TIME_OFFSET); 1014 lha->crc = archive_le16dec(p + H2_CRC_OFFSET); 1015 lha->setflag |= CRC_IS_SET; 1016 1017 if (lha->header_size < H2_FIXED_SIZE) { 1018 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1019 "Invalid LHa header size"); 1020 return (ARCHIVE_FATAL); 1021 } 1022 1023 header_crc = lha_crc16(0, p, H2_FIXED_SIZE); 1024 __archive_read_consume(a, H2_FIXED_SIZE); 1025 1026 /* Read extended headers */ 1027 err = lha_read_file_extended_header(a, lha, &header_crc, 2, 1028 lha->header_size - H2_FIXED_SIZE, &extdsize); 1029 if (err < ARCHIVE_WARN) 1030 return (err); 1031 1032 /* Calculate a padding size. The result will be normally 0 or 1. */ 1033 padding = (int)lha->header_size - (int)(H2_FIXED_SIZE + extdsize); 1034 if (padding > 0) { 1035 if ((p = __archive_read_ahead(a, padding, NULL)) == NULL) 1036 return (truncated_error(a)); 1037 header_crc = lha_crc16(header_crc, p, padding); 1038 __archive_read_consume(a, padding); 1039 } 1040 1041 if (header_crc != lha->header_crc) { 1042 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1043 "LHa header CRC error"); 1044 return (ARCHIVE_FATAL); 1045 } 1046 return (err); 1047 } 1048 1049 /* 1050 * Header 3 format 1051 * 1052 * +0 +2 +7 +11 +15 1053 * +------------+----------------+-------------------+-----------------+ 1054 * | 0x04 fixed |compression type|compressed size(*2)|uncompressed size| 1055 * +------------+----------------+-------------------+-----------------+ 1056 * <-------------------------------(*1)-------------------------------* 1057 * 1058 * +15 +19 +20 +21 +23 +24 1059 * +-----------------+------------+----------------+----------+-----------+ 1060 * |date/time(time_t)| 0x20 fixed |header level(=3)|file CRC16| creator | 1061 * +-----------------+------------+----------------+----------+-----------+ 1062 * *--------------------------------(*1)----------------------------------* 1063 * 1064 * +24 +28 +32 +32+(*3) 1065 * +---------------+----------------+-------------------+-----------------+ 1066 * |header size(*1)|next header size|extended header(*3)| compressed data | 1067 * +---------------+----------------+-------------------+-----------------+ 1068 * *------------------------(*1)-----------------------> <------(*2)-----> 1069 * 1070 */ 1071 #define H3_FIELD_LEN_OFFSET 0 1072 #define H3_COMP_SIZE_OFFSET 7 1073 #define H3_ORIG_SIZE_OFFSET 11 1074 #define H3_TIME_OFFSET 15 1075 #define H3_CRC_OFFSET 21 1076 #define H3_HEADER_SIZE_OFFSET 24 1077 #define H3_FIXED_SIZE 28 1078 static int 1079 lha_read_file_header_3(struct archive_read *a, struct lha *lha) 1080 { 1081 const unsigned char *p; 1082 size_t extdsize; 1083 int err; 1084 uint16_t header_crc; 1085 1086 if ((p = __archive_read_ahead(a, H3_FIXED_SIZE, NULL)) == NULL) 1087 return (truncated_error(a)); 1088 1089 if (archive_le16dec(p + H3_FIELD_LEN_OFFSET) != 4) 1090 goto invalid; 1091 lha->header_size =archive_le32dec(p + H3_HEADER_SIZE_OFFSET); 1092 lha->compsize = archive_le32dec(p + H3_COMP_SIZE_OFFSET); 1093 lha->origsize = archive_le32dec(p + H3_ORIG_SIZE_OFFSET); 1094 lha->mtime = archive_le32dec(p + H3_TIME_OFFSET); 1095 lha->crc = archive_le16dec(p + H3_CRC_OFFSET); 1096 lha->setflag |= CRC_IS_SET; 1097 1098 if (lha->header_size < H3_FIXED_SIZE + 4) 1099 goto invalid; 1100 header_crc = lha_crc16(0, p, H3_FIXED_SIZE); 1101 __archive_read_consume(a, H3_FIXED_SIZE); 1102 1103 /* Read extended headers */ 1104 err = lha_read_file_extended_header(a, lha, &header_crc, 4, 1105 lha->header_size - H3_FIXED_SIZE, &extdsize); 1106 if (err < ARCHIVE_WARN) 1107 return (err); 1108 1109 if (header_crc != lha->header_crc) { 1110 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1111 "LHa header CRC error"); 1112 return (ARCHIVE_FATAL); 1113 } 1114 return (err); 1115 invalid: 1116 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1117 "Invalid LHa header"); 1118 return (ARCHIVE_FATAL); 1119 } 1120 1121 /* 1122 * Extended header format 1123 * 1124 * +0 +2 +3 -- used in header 1 and 2 1125 * +0 +4 +5 -- used in header 3 1126 * +--------------+---------+-------------------+--------------+-- 1127 * |ex-header size|header id| data |ex-header size| ....... 1128 * +--------------+---------+-------------------+--------------+-- 1129 * <-------------( ex-header size)------------> <-- next extended header --* 1130 * 1131 * If the ex-header size is zero, it is the make of the end of extended 1132 * headers. 1133 * 1134 */ 1135 static int 1136 lha_read_file_extended_header(struct archive_read *a, struct lha *lha, 1137 uint16_t *crc, int sizefield_length, size_t limitsize, size_t *total_size) 1138 { 1139 const void *h; 1140 const unsigned char *extdheader; 1141 size_t extdsize; 1142 size_t datasize; 1143 unsigned int i; 1144 unsigned char extdtype; 1145 1146 #define EXT_HEADER_CRC 0x00 /* Header CRC and information*/ 1147 #define EXT_FILENAME 0x01 /* Filename */ 1148 #define EXT_DIRECTORY 0x02 /* Directory name */ 1149 #define EXT_DOS_ATTR 0x40 /* MS-DOS attribute */ 1150 #define EXT_TIMESTAMP 0x41 /* Windows time stamp */ 1151 #define EXT_FILESIZE 0x42 /* Large file size */ 1152 #define EXT_TIMEZONE 0x43 /* Time zone */ 1153 #define EXT_UTF16_FILENAME 0x44 /* UTF-16 filename */ 1154 #define EXT_UTF16_DIRECTORY 0x45 /* UTF-16 directory name */ 1155 #define EXT_CODEPAGE 0x46 /* Codepage */ 1156 #define EXT_UNIX_MODE 0x50 /* File permission */ 1157 #define EXT_UNIX_GID_UID 0x51 /* gid,uid */ 1158 #define EXT_UNIX_GNAME 0x52 /* Group name */ 1159 #define EXT_UNIX_UNAME 0x53 /* User name */ 1160 #define EXT_UNIX_MTIME 0x54 /* Modified time */ 1161 #define EXT_OS2_NEW_ATTR 0x7f /* new attribute(OS/2 only) */ 1162 #define EXT_NEW_ATTR 0xff /* new attribute */ 1163 1164 *total_size = sizefield_length; 1165 1166 for (;;) { 1167 /* Read an extended header size. */ 1168 if ((h = 1169 __archive_read_ahead(a, sizefield_length, NULL)) == NULL) 1170 return (truncated_error(a)); 1171 /* Check if the size is the zero indicates the end of the 1172 * extended header. */ 1173 if (sizefield_length == sizeof(uint16_t)) 1174 extdsize = archive_le16dec(h); 1175 else 1176 extdsize = archive_le32dec(h); 1177 if (extdsize == 0) { 1178 /* End of extended header */ 1179 if (crc != NULL) 1180 *crc = lha_crc16(*crc, h, sizefield_length); 1181 __archive_read_consume(a, sizefield_length); 1182 return (ARCHIVE_OK); 1183 } 1184 1185 /* Sanity check to the extended header size. */ 1186 if (((uint64_t)*total_size + extdsize) > 1187 (uint64_t)limitsize || 1188 extdsize <= (size_t)sizefield_length) 1189 goto invalid; 1190 1191 /* Read the extended header. */ 1192 if ((h = __archive_read_ahead(a, extdsize, NULL)) == NULL) 1193 return (truncated_error(a)); 1194 *total_size += extdsize; 1195 1196 extdheader = (const unsigned char *)h; 1197 /* Get the extended header type. */ 1198 extdtype = extdheader[sizefield_length]; 1199 /* Calculate an extended data size. */ 1200 datasize = extdsize - (1 + sizefield_length); 1201 /* Skip an extended header size field and type field. */ 1202 extdheader += sizefield_length + 1; 1203 1204 if (crc != NULL && extdtype != EXT_HEADER_CRC) 1205 *crc = lha_crc16(*crc, h, extdsize); 1206 switch (extdtype) { 1207 case EXT_HEADER_CRC: 1208 /* We only use a header CRC. Following data will not 1209 * be used. */ 1210 if (datasize >= 2) { 1211 lha->header_crc = archive_le16dec(extdheader); 1212 if (crc != NULL) { 1213 static const char zeros[2] = {0, 0}; 1214 *crc = lha_crc16(*crc, h, 1215 extdsize - datasize); 1216 /* CRC value itself as zero */ 1217 *crc = lha_crc16(*crc, zeros, 2); 1218 *crc = lha_crc16(*crc, 1219 extdheader+2, datasize - 2); 1220 } 1221 } 1222 break; 1223 case EXT_FILENAME: 1224 if (datasize == 0) { 1225 /* maybe directory header */ 1226 archive_string_empty(&lha->filename); 1227 break; 1228 } 1229 if (extdheader[0] == '\0') 1230 goto invalid; 1231 archive_strncpy(&lha->filename, 1232 (const char *)extdheader, datasize); 1233 break; 1234 case EXT_UTF16_FILENAME: 1235 if (datasize == 0) { 1236 /* maybe directory header */ 1237 archive_string_empty(&lha->filename); 1238 break; 1239 } else if (datasize & 1) { 1240 /* UTF-16 characters take always 2 or 4 bytes */ 1241 goto invalid; 1242 } 1243 if (extdheader[0] == '\0') 1244 goto invalid; 1245 archive_string_empty(&lha->filename); 1246 archive_array_append(&lha->filename, 1247 (const char *)extdheader, datasize); 1248 /* Setup a string conversion for a filename. */ 1249 lha->sconv_fname = archive_string_conversion_from_charset( 1250 &a->archive, "UTF-16LE", 1); 1251 if (lha->sconv_fname == NULL) 1252 return (ARCHIVE_FATAL); 1253 break; 1254 case EXT_DIRECTORY: 1255 if (datasize == 0 || extdheader[0] == '\0') 1256 /* no directory name data. exit this case. */ 1257 goto invalid; 1258 1259 archive_strncpy(&lha->dirname, 1260 (const char *)extdheader, datasize); 1261 /* 1262 * Convert directory delimiter from 0xFF 1263 * to '/' for local system. 1264 */ 1265 for (i = 0; i < lha->dirname.length; i++) { 1266 if ((unsigned char)lha->dirname.s[i] == 0xFF) 1267 lha->dirname.s[i] = '/'; 1268 } 1269 /* Is last character directory separator? */ 1270 if (lha->dirname.s[lha->dirname.length-1] != '/') 1271 /* invalid directory data */ 1272 goto invalid; 1273 break; 1274 case EXT_UTF16_DIRECTORY: 1275 /* UTF-16 characters take always 2 or 4 bytes */ 1276 if (datasize == 0 || (datasize & 1) || extdheader[0] == '\0') 1277 /* no directory name data. exit this case. */ 1278 goto invalid; 1279 1280 archive_string_empty(&lha->dirname); 1281 archive_array_append(&lha->dirname, 1282 (const char *)extdheader, datasize); 1283 lha->sconv_dir = archive_string_conversion_from_charset( 1284 &a->archive, "UTF-16LE", 1); 1285 if (lha->sconv_dir == NULL) 1286 return (ARCHIVE_FATAL); 1287 else { 1288 /* 1289 * Convert directory delimiter from 0xFF 1290 * to '/' for local system. 1291 */ 1292 /* UTF-16LE character */ 1293 uint16_t *utf16name = (uint16_t *)lha->dirname.s; 1294 for (i = 0; i < lha->dirname.length / 2; i++) { 1295 if (utf16name[i] == 0xFFFF) 1296 utf16name[i] = L'/'; 1297 } 1298 /* Is last character directory separator? */ 1299 if (utf16name[lha->dirname.length / 2 - 1] != L'/') 1300 /* invalid directory data */ 1301 goto invalid; 1302 } 1303 break; 1304 case EXT_DOS_ATTR: 1305 if (datasize == 2) 1306 lha->dos_attr = (unsigned char) 1307 (archive_le16dec(extdheader) & 0xff); 1308 break; 1309 case EXT_TIMESTAMP: 1310 if (datasize == (sizeof(uint64_t) * 3)) { 1311 lha->birthtime = lha_win_time( 1312 archive_le64dec(extdheader), 1313 &lha->birthtime_tv_nsec); 1314 extdheader += sizeof(uint64_t); 1315 lha->mtime = lha_win_time( 1316 archive_le64dec(extdheader), 1317 &lha->mtime_tv_nsec); 1318 extdheader += sizeof(uint64_t); 1319 lha->atime = lha_win_time( 1320 archive_le64dec(extdheader), 1321 &lha->atime_tv_nsec); 1322 lha->setflag |= BIRTHTIME_IS_SET | 1323 ATIME_IS_SET; 1324 } 1325 break; 1326 case EXT_FILESIZE: 1327 if (datasize == sizeof(uint64_t) * 2) { 1328 lha->compsize = archive_le64dec(extdheader); 1329 extdheader += sizeof(uint64_t); 1330 lha->origsize = archive_le64dec(extdheader); 1331 } 1332 break; 1333 case EXT_CODEPAGE: 1334 /* Get an archived filename charset from codepage. 1335 * This overwrites the charset specified by 1336 * hdrcharset option. */ 1337 if (datasize == sizeof(uint32_t)) { 1338 struct archive_string cp; 1339 const char *charset; 1340 1341 archive_string_init(&cp); 1342 switch (archive_le32dec(extdheader)) { 1343 case 65001: /* UTF-8 */ 1344 charset = "UTF-8"; 1345 break; 1346 default: 1347 archive_string_sprintf(&cp, "CP%d", 1348 (int)archive_le32dec(extdheader)); 1349 charset = cp.s; 1350 break; 1351 } 1352 lha->sconv_dir = 1353 archive_string_conversion_from_charset( 1354 &(a->archive), charset, 1); 1355 lha->sconv_fname = 1356 archive_string_conversion_from_charset( 1357 &(a->archive), charset, 1); 1358 archive_string_free(&cp); 1359 if (lha->sconv_dir == NULL) 1360 return (ARCHIVE_FATAL); 1361 if (lha->sconv_fname == NULL) 1362 return (ARCHIVE_FATAL); 1363 } 1364 break; 1365 case EXT_UNIX_MODE: 1366 if (datasize == sizeof(uint16_t)) { 1367 lha->mode = archive_le16dec(extdheader); 1368 lha->setflag |= UNIX_MODE_IS_SET; 1369 } 1370 break; 1371 case EXT_UNIX_GID_UID: 1372 if (datasize == (sizeof(uint16_t) * 2)) { 1373 lha->gid = archive_le16dec(extdheader); 1374 lha->uid = archive_le16dec(extdheader+2); 1375 } 1376 break; 1377 case EXT_UNIX_GNAME: 1378 if (datasize > 0) 1379 archive_strncpy(&lha->gname, 1380 (const char *)extdheader, datasize); 1381 break; 1382 case EXT_UNIX_UNAME: 1383 if (datasize > 0) 1384 archive_strncpy(&lha->uname, 1385 (const char *)extdheader, datasize); 1386 break; 1387 case EXT_UNIX_MTIME: 1388 if (datasize == sizeof(uint32_t)) 1389 lha->mtime = archive_le32dec(extdheader); 1390 break; 1391 case EXT_OS2_NEW_ATTR: 1392 /* This extended header is OS/2 depend. */ 1393 if (datasize == 16) { 1394 lha->dos_attr = (unsigned char) 1395 (archive_le16dec(extdheader) & 0xff); 1396 lha->mode = archive_le16dec(extdheader+2); 1397 lha->gid = archive_le16dec(extdheader+4); 1398 lha->uid = archive_le16dec(extdheader+6); 1399 lha->birthtime = archive_le32dec(extdheader+8); 1400 lha->atime = archive_le32dec(extdheader+12); 1401 lha->setflag |= UNIX_MODE_IS_SET 1402 | BIRTHTIME_IS_SET | ATIME_IS_SET; 1403 } 1404 break; 1405 case EXT_NEW_ATTR: 1406 if (datasize == 20) { 1407 lha->mode = (mode_t)archive_le32dec(extdheader); 1408 lha->gid = archive_le32dec(extdheader+4); 1409 lha->uid = archive_le32dec(extdheader+8); 1410 lha->birthtime = archive_le32dec(extdheader+12); 1411 lha->atime = archive_le32dec(extdheader+16); 1412 lha->setflag |= UNIX_MODE_IS_SET 1413 | BIRTHTIME_IS_SET | ATIME_IS_SET; 1414 } 1415 break; 1416 case EXT_TIMEZONE: /* Not supported */ 1417 break; 1418 default: 1419 break; 1420 } 1421 1422 __archive_read_consume(a, extdsize); 1423 } 1424 invalid: 1425 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1426 "Invalid extended LHa header"); 1427 return (ARCHIVE_FATAL); 1428 } 1429 1430 static int 1431 lha_end_of_entry(struct archive_read *a) 1432 { 1433 struct lha *lha = (struct lha *)(a->format->data); 1434 int r = ARCHIVE_EOF; 1435 1436 if (!lha->end_of_entry_cleanup) { 1437 if ((lha->setflag & CRC_IS_SET) && 1438 lha->crc != lha->entry_crc_calculated) { 1439 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1440 "LHa data CRC error"); 1441 r = ARCHIVE_WARN; 1442 } 1443 1444 /* End-of-entry cleanup done. */ 1445 lha->end_of_entry_cleanup = 1; 1446 } 1447 return (r); 1448 } 1449 1450 static int 1451 archive_read_format_lha_read_data(struct archive_read *a, 1452 const void **buff, size_t *size, int64_t *offset) 1453 { 1454 struct lha *lha = (struct lha *)(a->format->data); 1455 int r; 1456 1457 if (lha->entry_unconsumed) { 1458 /* Consume as much as the decompressor actually used. */ 1459 __archive_read_consume(a, lha->entry_unconsumed); 1460 lha->entry_unconsumed = 0; 1461 } 1462 if (lha->end_of_entry) { 1463 *offset = lha->entry_offset; 1464 *size = 0; 1465 *buff = NULL; 1466 return (lha_end_of_entry(a)); 1467 } 1468 1469 if (lha->entry_is_compressed) 1470 r = lha_read_data_lzh(a, buff, size, offset); 1471 else 1472 /* No compression. */ 1473 r = lha_read_data_none(a, buff, size, offset); 1474 return (r); 1475 } 1476 1477 /* 1478 * Read a file content in no compression. 1479 * 1480 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets 1481 * lha->end_of_entry if it consumes all of the data. 1482 */ 1483 static int 1484 lha_read_data_none(struct archive_read *a, const void **buff, 1485 size_t *size, int64_t *offset) 1486 { 1487 struct lha *lha = (struct lha *)(a->format->data); 1488 ssize_t bytes_avail; 1489 1490 if (lha->entry_bytes_remaining == 0) { 1491 *buff = NULL; 1492 *size = 0; 1493 *offset = lha->entry_offset; 1494 lha->end_of_entry = 1; 1495 return (ARCHIVE_OK); 1496 } 1497 /* 1498 * Note: '1' here is a performance optimization. 1499 * Recall that the decompression layer returns a count of 1500 * available bytes; asking for more than that forces the 1501 * decompressor to combine reads by copying data. 1502 */ 1503 *buff = __archive_read_ahead(a, 1, &bytes_avail); 1504 if (bytes_avail <= 0) { 1505 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1506 "Truncated LHa file data"); 1507 return (ARCHIVE_FATAL); 1508 } 1509 if (bytes_avail > lha->entry_bytes_remaining) 1510 bytes_avail = (ssize_t)lha->entry_bytes_remaining; 1511 lha->entry_crc_calculated = 1512 lha_crc16(lha->entry_crc_calculated, *buff, bytes_avail); 1513 *size = bytes_avail; 1514 *offset = lha->entry_offset; 1515 lha->entry_offset += bytes_avail; 1516 lha->entry_bytes_remaining -= bytes_avail; 1517 if (lha->entry_bytes_remaining == 0) 1518 lha->end_of_entry = 1; 1519 lha->entry_unconsumed = bytes_avail; 1520 return (ARCHIVE_OK); 1521 } 1522 1523 /* 1524 * Read a file content in LZHUFF encoding. 1525 * 1526 * Returns ARCHIVE_OK if successful, returns ARCHIVE_WARN if compression is 1527 * unsupported, ARCHIVE_FATAL otherwise, sets lha->end_of_entry if it consumes 1528 * all of the data. 1529 */ 1530 static int 1531 lha_read_data_lzh(struct archive_read *a, const void **buff, 1532 size_t *size, int64_t *offset) 1533 { 1534 struct lha *lha = (struct lha *)(a->format->data); 1535 ssize_t bytes_avail; 1536 int r; 1537 1538 /* If we haven't yet read any data, initialize the decompressor. */ 1539 if (!lha->decompress_init) { 1540 r = lzh_decode_init(&(lha->strm), lha->method); 1541 switch (r) { 1542 case ARCHIVE_OK: 1543 break; 1544 case ARCHIVE_FAILED: 1545 /* Unsupported compression. */ 1546 *buff = NULL; 1547 *size = 0; 1548 *offset = 0; 1549 archive_set_error(&a->archive, 1550 ARCHIVE_ERRNO_FILE_FORMAT, 1551 "Unsupported lzh compression method -%c%c%c-", 1552 lha->method[0], lha->method[1], lha->method[2]); 1553 /* We know compressed size; just skip it. */ 1554 archive_read_format_lha_read_data_skip(a); 1555 return (ARCHIVE_WARN); 1556 default: 1557 archive_set_error(&a->archive, ENOMEM, 1558 "Couldn't allocate memory " 1559 "for lzh decompression"); 1560 return (ARCHIVE_FATAL); 1561 } 1562 /* We've initialized decompression for this stream. */ 1563 lha->decompress_init = 1; 1564 lha->strm.avail_out = 0; 1565 lha->strm.total_out = 0; 1566 } 1567 1568 /* 1569 * Note: '1' here is a performance optimization. 1570 * Recall that the decompression layer returns a count of 1571 * available bytes; asking for more than that forces the 1572 * decompressor to combine reads by copying data. 1573 */ 1574 lha->strm.next_in = __archive_read_ahead(a, 1, &bytes_avail); 1575 if (bytes_avail <= 0) { 1576 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1577 "Truncated LHa file body"); 1578 return (ARCHIVE_FATAL); 1579 } 1580 if (bytes_avail > lha->entry_bytes_remaining) 1581 bytes_avail = (ssize_t)lha->entry_bytes_remaining; 1582 1583 lha->strm.avail_in = (int)bytes_avail; 1584 lha->strm.total_in = 0; 1585 lha->strm.avail_out = 0; 1586 1587 r = lzh_decode(&(lha->strm), bytes_avail == lha->entry_bytes_remaining); 1588 switch (r) { 1589 case ARCHIVE_OK: 1590 break; 1591 case ARCHIVE_EOF: 1592 lha->end_of_entry = 1; 1593 break; 1594 default: 1595 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1596 "Bad lzh data"); 1597 return (ARCHIVE_FAILED); 1598 } 1599 lha->entry_unconsumed = lha->strm.total_in; 1600 lha->entry_bytes_remaining -= lha->strm.total_in; 1601 1602 if (lha->strm.avail_out) { 1603 *offset = lha->entry_offset; 1604 *size = lha->strm.avail_out; 1605 *buff = lha->strm.ref_ptr; 1606 lha->entry_crc_calculated = 1607 lha_crc16(lha->entry_crc_calculated, *buff, *size); 1608 lha->entry_offset += *size; 1609 } else { 1610 *offset = lha->entry_offset; 1611 *size = 0; 1612 *buff = NULL; 1613 if (lha->end_of_entry) 1614 return (lha_end_of_entry(a)); 1615 } 1616 return (ARCHIVE_OK); 1617 } 1618 1619 /* 1620 * Skip a file content. 1621 */ 1622 static int 1623 archive_read_format_lha_read_data_skip(struct archive_read *a) 1624 { 1625 struct lha *lha; 1626 int64_t bytes_skipped; 1627 1628 lha = (struct lha *)(a->format->data); 1629 1630 if (lha->entry_unconsumed) { 1631 /* Consume as much as the decompressor actually used. */ 1632 __archive_read_consume(a, lha->entry_unconsumed); 1633 lha->entry_unconsumed = 0; 1634 } 1635 1636 /* if we've already read to end of data, we're done. */ 1637 if (lha->end_of_entry_cleanup) 1638 return (ARCHIVE_OK); 1639 1640 /* 1641 * If the length is at the beginning, we can skip the 1642 * compressed data much more quickly. 1643 */ 1644 bytes_skipped = __archive_read_consume(a, lha->entry_bytes_remaining); 1645 if (bytes_skipped < 0) 1646 return (ARCHIVE_FATAL); 1647 1648 /* This entry is finished and done. */ 1649 lha->end_of_entry_cleanup = lha->end_of_entry = 1; 1650 return (ARCHIVE_OK); 1651 } 1652 1653 static int 1654 archive_read_format_lha_cleanup(struct archive_read *a) 1655 { 1656 struct lha *lha = (struct lha *)(a->format->data); 1657 1658 lzh_decode_free(&(lha->strm)); 1659 archive_string_free(&(lha->dirname)); 1660 archive_string_free(&(lha->filename)); 1661 archive_string_free(&(lha->uname)); 1662 archive_string_free(&(lha->gname)); 1663 archive_wstring_free(&(lha->ws)); 1664 free(lha); 1665 (a->format->data) = NULL; 1666 return (ARCHIVE_OK); 1667 } 1668 1669 /* 1670 * 'LHa for UNIX' utility has archived a symbolic-link name after 1671 * a pathname with '|' character. 1672 * This function extracts the symbolic-link name from the pathname. 1673 * 1674 * example. 1675 * 1. a symbolic-name is 'aaa/bb/cc' 1676 * 2. a filename is 'xxx/bbb' 1677 * then a archived pathname is 'xxx/bbb|aaa/bb/cc' 1678 */ 1679 static int 1680 lha_parse_linkname(struct archive_wstring *linkname, 1681 struct archive_wstring *pathname) 1682 { 1683 wchar_t * linkptr; 1684 size_t symlen; 1685 1686 linkptr = wcschr(pathname->s, L'|'); 1687 if (linkptr != NULL) { 1688 symlen = wcslen(linkptr + 1); 1689 archive_wstrncpy(linkname, linkptr+1, symlen); 1690 1691 *linkptr = 0; 1692 pathname->length = wcslen(pathname->s); 1693 1694 return (1); 1695 } 1696 return (0); 1697 } 1698 1699 /* Convert an MSDOS-style date/time into Unix-style time. */ 1700 static time_t 1701 lha_dos_time(const unsigned char *p) 1702 { 1703 int msTime, msDate; 1704 struct tm ts; 1705 1706 msTime = archive_le16dec(p); 1707 msDate = archive_le16dec(p+2); 1708 1709 memset(&ts, 0, sizeof(ts)); 1710 ts.tm_year = ((msDate >> 9) & 0x7f) + 80; /* Years since 1900. */ 1711 ts.tm_mon = ((msDate >> 5) & 0x0f) - 1; /* Month number. */ 1712 ts.tm_mday = msDate & 0x1f; /* Day of month. */ 1713 ts.tm_hour = (msTime >> 11) & 0x1f; 1714 ts.tm_min = (msTime >> 5) & 0x3f; 1715 ts.tm_sec = (msTime << 1) & 0x3e; 1716 ts.tm_isdst = -1; 1717 return (mktime(&ts)); 1718 } 1719 1720 /* Convert an MS-Windows-style date/time into Unix-style time. */ 1721 static time_t 1722 lha_win_time(uint64_t wintime, long *ns) 1723 { 1724 #define EPOC_TIME ARCHIVE_LITERAL_ULL(116444736000000000) 1725 1726 if (wintime >= EPOC_TIME) { 1727 wintime -= EPOC_TIME; /* 1970-01-01 00:00:00 (UTC) */ 1728 if (ns != NULL) 1729 *ns = (long)(wintime % 10000000) * 100; 1730 return (wintime / 10000000); 1731 } else { 1732 if (ns != NULL) 1733 *ns = 0; 1734 return (0); 1735 } 1736 } 1737 1738 static unsigned char 1739 lha_calcsum(unsigned char sum, const void *pp, int offset, size_t size) 1740 { 1741 unsigned char const *p = (unsigned char const *)pp; 1742 1743 p += offset; 1744 for (;size > 0; --size) 1745 sum += *p++; 1746 return (sum); 1747 } 1748 1749 static uint16_t crc16tbl[2][256]; 1750 static void 1751 lha_crc16_init(void) 1752 { 1753 unsigned int i; 1754 static int crc16init = 0; 1755 1756 if (crc16init) 1757 return; 1758 crc16init = 1; 1759 1760 for (i = 0; i < 256; i++) { 1761 unsigned int j; 1762 uint16_t crc = (uint16_t)i; 1763 for (j = 8; j; j--) 1764 crc = (crc >> 1) ^ ((crc & 1) * 0xA001); 1765 crc16tbl[0][i] = crc; 1766 } 1767 1768 for (i = 0; i < 256; i++) { 1769 crc16tbl[1][i] = (crc16tbl[0][i] >> 8) 1770 ^ crc16tbl[0][crc16tbl[0][i] & 0xff]; 1771 } 1772 } 1773 1774 static uint16_t 1775 lha_crc16(uint16_t crc, const void *pp, size_t len) 1776 { 1777 const unsigned char *p = (const unsigned char *)pp; 1778 const uint16_t *buff; 1779 const union { 1780 uint32_t i; 1781 char c[4]; 1782 } u = { 0x01020304 }; 1783 1784 if (len == 0) 1785 return crc; 1786 1787 /* Process unaligned address. */ 1788 if (((uintptr_t)p) & (uintptr_t)0x1) { 1789 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff]; 1790 len--; 1791 } 1792 buff = (const uint16_t *)p; 1793 /* 1794 * Modern C compiler such as GCC does not unroll automatically yet 1795 * without unrolling pragma, and Clang is so. So we should 1796 * unroll this loop for its performance. 1797 */ 1798 for (;len >= 8; len -= 8) { 1799 /* This if statement expects compiler optimization will 1800 * remove the statement which will not be executed. */ 1801 #undef bswap16 1802 #if defined(_MSC_VER) && _MSC_VER >= 1400 /* Visual Studio */ 1803 # define bswap16(x) _byteswap_ushort(x) 1804 #elif defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4) 1805 /* GCC 4.8 and later has __builtin_bswap16() */ 1806 # define bswap16(x) __builtin_bswap16(x) 1807 #elif defined(__clang__) 1808 /* All clang versions have __builtin_bswap16() */ 1809 # define bswap16(x) __builtin_bswap16(x) 1810 #else 1811 # define bswap16(x) ((((x) >> 8) & 0xff) | ((x) << 8)) 1812 #endif 1813 #define CRC16W do { \ 1814 if(u.c[0] == 1) { /* Big endian */ \ 1815 crc ^= bswap16(*buff); buff++; \ 1816 } else \ 1817 crc ^= *buff++; \ 1818 crc = crc16tbl[1][crc & 0xff] ^ crc16tbl[0][crc >> 8];\ 1819 } while (0) 1820 CRC16W; 1821 CRC16W; 1822 CRC16W; 1823 CRC16W; 1824 #undef CRC16W 1825 #undef bswap16 1826 } 1827 1828 p = (const unsigned char *)buff; 1829 for (;len; len--) { 1830 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff]; 1831 } 1832 return crc; 1833 } 1834 1835 /* 1836 * Initialize LZHUF decoder. 1837 * 1838 * Returns ARCHIVE_OK if initialization was successful. 1839 * Returns ARCHIVE_FAILED if method is unsupported. 1840 * Returns ARCHIVE_FATAL if initialization failed; memory allocation 1841 * error occurred. 1842 */ 1843 static int 1844 lzh_decode_init(struct lzh_stream *strm, const char *method) 1845 { 1846 struct lzh_dec *ds; 1847 int w_bits, w_size; 1848 1849 if (strm->ds == NULL) { 1850 strm->ds = calloc(1, sizeof(*strm->ds)); 1851 if (strm->ds == NULL) 1852 return (ARCHIVE_FATAL); 1853 } 1854 ds = strm->ds; 1855 ds->error = ARCHIVE_FAILED; 1856 if (method == NULL || method[0] != 'l' || method[1] != 'h') 1857 return (ARCHIVE_FAILED); 1858 switch (method[2]) { 1859 case '5': 1860 w_bits = 13;/* 8KiB for window */ 1861 break; 1862 case '6': 1863 w_bits = 15;/* 32KiB for window */ 1864 break; 1865 case '7': 1866 w_bits = 16;/* 64KiB for window */ 1867 break; 1868 default: 1869 return (ARCHIVE_FAILED);/* Not supported. */ 1870 } 1871 ds->error = ARCHIVE_FATAL; 1872 /* Expand a window size up to 128 KiB for decompressing process 1873 * performance whatever its original window size is. */ 1874 ds->w_size = 1U << 17; 1875 ds->w_mask = ds->w_size -1; 1876 if (ds->w_buff == NULL) { 1877 ds->w_buff = malloc(ds->w_size); 1878 if (ds->w_buff == NULL) 1879 return (ARCHIVE_FATAL); 1880 } 1881 w_size = 1U << w_bits; 1882 memset(ds->w_buff + ds->w_size - w_size, 0x20, w_size); 1883 ds->w_pos = 0; 1884 ds->state = 0; 1885 ds->pos_pt_len_size = w_bits + 1; 1886 ds->pos_pt_len_bits = (w_bits == 15 || w_bits == 16)? 5: 4; 1887 ds->literal_pt_len_size = PT_BITLEN_SIZE; 1888 ds->literal_pt_len_bits = 5; 1889 ds->br.cache_buffer = 0; 1890 ds->br.cache_avail = 0; 1891 1892 if (lzh_huffman_init(&(ds->lt), LT_BITLEN_SIZE, 16) 1893 != ARCHIVE_OK) 1894 return (ARCHIVE_FATAL); 1895 ds->lt.len_bits = 9; 1896 if (lzh_huffman_init(&(ds->pt), PT_BITLEN_SIZE, 16) 1897 != ARCHIVE_OK) 1898 return (ARCHIVE_FATAL); 1899 ds->error = 0; 1900 1901 return (ARCHIVE_OK); 1902 } 1903 1904 /* 1905 * Release LZHUF decoder. 1906 */ 1907 static void 1908 lzh_decode_free(struct lzh_stream *strm) 1909 { 1910 1911 if (strm->ds == NULL) 1912 return; 1913 free(strm->ds->w_buff); 1914 lzh_huffman_free(&(strm->ds->lt)); 1915 lzh_huffman_free(&(strm->ds->pt)); 1916 free(strm->ds); 1917 strm->ds = NULL; 1918 } 1919 1920 /* 1921 * Bit stream reader. 1922 */ 1923 /* Check that the cache buffer has enough bits. */ 1924 #define lzh_br_has(br, n) ((br)->cache_avail >= n) 1925 /* Get compressed data by bit. */ 1926 #define lzh_br_bits(br, n) \ 1927 (((uint16_t)((br)->cache_buffer >> \ 1928 ((br)->cache_avail - (n)))) & cache_masks[n]) 1929 #define lzh_br_bits_forced(br, n) \ 1930 (((uint16_t)((br)->cache_buffer << \ 1931 ((n) - (br)->cache_avail))) & cache_masks[n]) 1932 /* Read ahead to make sure the cache buffer has enough compressed data we 1933 * will use. 1934 * True : completed, there is enough data in the cache buffer. 1935 * False : we met that strm->next_in is empty, we have to get following 1936 * bytes. */ 1937 #define lzh_br_read_ahead_0(strm, br, n) \ 1938 (lzh_br_has(br, (n)) || lzh_br_fillup(strm, br)) 1939 /* True : the cache buffer has some bits as much as we need. 1940 * False : there are no enough bits in the cache buffer to be used, 1941 * we have to get following bytes if we could. */ 1942 #define lzh_br_read_ahead(strm, br, n) \ 1943 (lzh_br_read_ahead_0((strm), (br), (n)) || lzh_br_has((br), (n))) 1944 1945 /* Notify how many bits we consumed. */ 1946 #define lzh_br_consume(br, n) ((br)->cache_avail -= (n)) 1947 #define lzh_br_unconsume(br, n) ((br)->cache_avail += (n)) 1948 1949 static const uint16_t cache_masks[] = { 1950 0x0000, 0x0001, 0x0003, 0x0007, 1951 0x000F, 0x001F, 0x003F, 0x007F, 1952 0x00FF, 0x01FF, 0x03FF, 0x07FF, 1953 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 1954 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF 1955 }; 1956 1957 /* 1958 * Shift away used bits in the cache data and fill it up with following bits. 1959 * Call this when cache buffer does not have enough bits you need. 1960 * 1961 * Returns 1 if the cache buffer is full. 1962 * Returns 0 if the cache buffer is not full; input buffer is empty. 1963 */ 1964 static int 1965 lzh_br_fillup(struct lzh_stream *strm, struct lzh_br *br) 1966 { 1967 int n = CACHE_BITS - br->cache_avail; 1968 1969 for (;;) { 1970 const int x = n >> 3; 1971 if (strm->avail_in >= x) { 1972 switch (x) { 1973 case 8: 1974 br->cache_buffer = 1975 ((uint64_t)strm->next_in[0]) << 56 | 1976 ((uint64_t)strm->next_in[1]) << 48 | 1977 ((uint64_t)strm->next_in[2]) << 40 | 1978 ((uint64_t)strm->next_in[3]) << 32 | 1979 ((uint32_t)strm->next_in[4]) << 24 | 1980 ((uint32_t)strm->next_in[5]) << 16 | 1981 ((uint32_t)strm->next_in[6]) << 8 | 1982 (uint32_t)strm->next_in[7]; 1983 strm->next_in += 8; 1984 strm->avail_in -= 8; 1985 br->cache_avail += 8 * 8; 1986 return (1); 1987 case 7: 1988 br->cache_buffer = 1989 (br->cache_buffer << 56) | 1990 ((uint64_t)strm->next_in[0]) << 48 | 1991 ((uint64_t)strm->next_in[1]) << 40 | 1992 ((uint64_t)strm->next_in[2]) << 32 | 1993 ((uint32_t)strm->next_in[3]) << 24 | 1994 ((uint32_t)strm->next_in[4]) << 16 | 1995 ((uint32_t)strm->next_in[5]) << 8 | 1996 (uint32_t)strm->next_in[6]; 1997 strm->next_in += 7; 1998 strm->avail_in -= 7; 1999 br->cache_avail += 7 * 8; 2000 return (1); 2001 case 6: 2002 br->cache_buffer = 2003 (br->cache_buffer << 48) | 2004 ((uint64_t)strm->next_in[0]) << 40 | 2005 ((uint64_t)strm->next_in[1]) << 32 | 2006 ((uint32_t)strm->next_in[2]) << 24 | 2007 ((uint32_t)strm->next_in[3]) << 16 | 2008 ((uint32_t)strm->next_in[4]) << 8 | 2009 (uint32_t)strm->next_in[5]; 2010 strm->next_in += 6; 2011 strm->avail_in -= 6; 2012 br->cache_avail += 6 * 8; 2013 return (1); 2014 case 0: 2015 /* We have enough compressed data in 2016 * the cache buffer.*/ 2017 return (1); 2018 default: 2019 break; 2020 } 2021 } 2022 if (strm->avail_in == 0) { 2023 /* There is not enough compressed data to fill up the 2024 * cache buffer. */ 2025 return (0); 2026 } 2027 br->cache_buffer = 2028 (br->cache_buffer << 8) | *strm->next_in++; 2029 strm->avail_in--; 2030 br->cache_avail += 8; 2031 n -= 8; 2032 } 2033 } 2034 2035 /* 2036 * Decode LZHUF. 2037 * 2038 * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty. 2039 * Please set available buffer and call this function again. 2040 * 2. Returns ARCHIVE_EOF if decompression has been completed. 2041 * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data 2042 * is broken or you do not set 'last' flag properly. 2043 * 4. 'last' flag is very important, you must set 1 to the flag if there 2044 * is no input data. The lha compressed data format does not provide how 2045 * to know the compressed data is really finished. 2046 * Note: lha command utility check if the total size of output bytes is 2047 * reached the uncompressed size recorded in its header. it does not mind 2048 * that the decoding process is properly finished. 2049 * GNU ZIP can decompress another compressed file made by SCO LZH compress. 2050 * it handles EOF as null to fill read buffer with zero until the decoding 2051 * process meet 2 bytes of zeros at reading a size of a next chunk, so the 2052 * zeros are treated as the mark of the end of the data although the zeros 2053 * is dummy, not the file data. 2054 */ 2055 static int lzh_read_blocks(struct lzh_stream *, int); 2056 static int lzh_decode_blocks(struct lzh_stream *, int); 2057 #define ST_RD_BLOCK 0 2058 #define ST_RD_PT_1 1 2059 #define ST_RD_PT_2 2 2060 #define ST_RD_PT_3 3 2061 #define ST_RD_PT_4 4 2062 #define ST_RD_LITERAL_1 5 2063 #define ST_RD_LITERAL_2 6 2064 #define ST_RD_LITERAL_3 7 2065 #define ST_RD_POS_DATA_1 8 2066 #define ST_GET_LITERAL 9 2067 #define ST_GET_POS_1 10 2068 #define ST_GET_POS_2 11 2069 #define ST_COPY_DATA 12 2070 2071 static int 2072 lzh_decode(struct lzh_stream *strm, int last) 2073 { 2074 struct lzh_dec *ds = strm->ds; 2075 int avail_in; 2076 int r; 2077 2078 if (ds->error) 2079 return (ds->error); 2080 2081 avail_in = strm->avail_in; 2082 do { 2083 if (ds->state < ST_GET_LITERAL) 2084 r = lzh_read_blocks(strm, last); 2085 else 2086 r = lzh_decode_blocks(strm, last); 2087 } while (r == 100); 2088 strm->total_in += avail_in - strm->avail_in; 2089 return (r); 2090 } 2091 2092 static void 2093 lzh_emit_window(struct lzh_stream *strm, size_t s) 2094 { 2095 strm->ref_ptr = strm->ds->w_buff; 2096 strm->avail_out = (int)s; 2097 strm->total_out += s; 2098 } 2099 2100 static int 2101 lzh_read_blocks(struct lzh_stream *strm, int last) 2102 { 2103 struct lzh_dec *ds = strm->ds; 2104 struct lzh_br *br = &(ds->br); 2105 int c = 0, i; 2106 unsigned rbits; 2107 2108 for (;;) { 2109 switch (ds->state) { 2110 case ST_RD_BLOCK: 2111 /* 2112 * Read a block number indicates how many blocks 2113 * we will handle. The block is composed of a 2114 * literal and a match, sometimes a literal only 2115 * in particular, there are no reference data at 2116 * the beginning of the decompression. 2117 */ 2118 if (!lzh_br_read_ahead_0(strm, br, 16)) { 2119 if (!last) 2120 /* We need following data. */ 2121 return (ARCHIVE_OK); 2122 if (lzh_br_has(br, 8)) { 2123 /* 2124 * It seems there are extra bits. 2125 * 1. Compressed data is broken. 2126 * 2. `last' flag does not properly 2127 * set. 2128 */ 2129 goto failed; 2130 } 2131 if (ds->w_pos > 0) { 2132 lzh_emit_window(strm, ds->w_pos); 2133 ds->w_pos = 0; 2134 return (ARCHIVE_OK); 2135 } 2136 /* End of compressed data; we have completely 2137 * handled all compressed data. */ 2138 return (ARCHIVE_EOF); 2139 } 2140 ds->blocks_avail = lzh_br_bits(br, 16); 2141 if (ds->blocks_avail == 0) 2142 goto failed; 2143 lzh_br_consume(br, 16); 2144 /* 2145 * Read a literal table compressed in huffman 2146 * coding. 2147 */ 2148 ds->pt.len_size = ds->literal_pt_len_size; 2149 ds->pt.len_bits = ds->literal_pt_len_bits; 2150 ds->reading_position = 0; 2151 /* FALL THROUGH */ 2152 case ST_RD_PT_1: 2153 /* Note: ST_RD_PT_1, ST_RD_PT_2 and ST_RD_PT_4 are 2154 * used in reading both a literal table and a 2155 * position table. */ 2156 if (!lzh_br_read_ahead(strm, br, ds->pt.len_bits)) { 2157 if (last) 2158 goto failed;/* Truncated data. */ 2159 ds->state = ST_RD_PT_1; 2160 return (ARCHIVE_OK); 2161 } 2162 ds->pt.len_avail = lzh_br_bits(br, ds->pt.len_bits); 2163 lzh_br_consume(br, ds->pt.len_bits); 2164 /* FALL THROUGH */ 2165 case ST_RD_PT_2: 2166 if (ds->pt.len_avail == 0) { 2167 /* There is no bitlen. */ 2168 if (!lzh_br_read_ahead(strm, br, 2169 ds->pt.len_bits)) { 2170 if (last) 2171 goto failed;/* Truncated data.*/ 2172 ds->state = ST_RD_PT_2; 2173 return (ARCHIVE_OK); 2174 } 2175 if (!lzh_make_fake_table(&(ds->pt), 2176 lzh_br_bits(br, ds->pt.len_bits))) 2177 goto failed;/* Invalid data. */ 2178 lzh_br_consume(br, ds->pt.len_bits); 2179 if (ds->reading_position) 2180 ds->state = ST_GET_LITERAL; 2181 else 2182 ds->state = ST_RD_LITERAL_1; 2183 break; 2184 } else if (ds->pt.len_avail > ds->pt.len_size) 2185 goto failed;/* Invalid data. */ 2186 ds->loop = 0; 2187 memset(ds->pt.freq, 0, sizeof(ds->pt.freq)); 2188 if (ds->pt.len_avail < 3 || 2189 ds->pt.len_size == ds->pos_pt_len_size) { 2190 ds->state = ST_RD_PT_4; 2191 break; 2192 } 2193 /* FALL THROUGH */ 2194 case ST_RD_PT_3: 2195 ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 3); 2196 if (ds->loop < 3) { 2197 if (ds->loop < 0 || last) 2198 goto failed;/* Invalid data. */ 2199 /* Not completed, get following data. */ 2200 ds->state = ST_RD_PT_3; 2201 return (ARCHIVE_OK); 2202 } 2203 /* There are some null in bitlen of the literal. */ 2204 if (!lzh_br_read_ahead(strm, br, 2)) { 2205 if (last) 2206 goto failed;/* Truncated data. */ 2207 ds->state = ST_RD_PT_3; 2208 return (ARCHIVE_OK); 2209 } 2210 c = lzh_br_bits(br, 2); 2211 lzh_br_consume(br, 2); 2212 if (c > ds->pt.len_avail - 3) 2213 goto failed;/* Invalid data. */ 2214 for (i = 3; c-- > 0 ;) 2215 ds->pt.bitlen[i++] = 0; 2216 ds->loop = i; 2217 /* FALL THROUGH */ 2218 case ST_RD_PT_4: 2219 ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 2220 ds->pt.len_avail); 2221 if (ds->loop < ds->pt.len_avail) { 2222 if (ds->loop < 0 || last) 2223 goto failed;/* Invalid data. */ 2224 /* Not completed, get following data. */ 2225 ds->state = ST_RD_PT_4; 2226 return (ARCHIVE_OK); 2227 } 2228 if (!lzh_make_huffman_table(&(ds->pt))) 2229 goto failed;/* Invalid data */ 2230 if (ds->reading_position) { 2231 ds->state = ST_GET_LITERAL; 2232 break; 2233 } 2234 /* FALL THROUGH */ 2235 case ST_RD_LITERAL_1: 2236 if (!lzh_br_read_ahead(strm, br, ds->lt.len_bits)) { 2237 if (last) 2238 goto failed;/* Truncated data. */ 2239 ds->state = ST_RD_LITERAL_1; 2240 return (ARCHIVE_OK); 2241 } 2242 ds->lt.len_avail = lzh_br_bits(br, ds->lt.len_bits); 2243 lzh_br_consume(br, ds->lt.len_bits); 2244 /* FALL THROUGH */ 2245 case ST_RD_LITERAL_2: 2246 if (ds->lt.len_avail == 0) { 2247 /* There is no bitlen. */ 2248 if (!lzh_br_read_ahead(strm, br, 2249 ds->lt.len_bits)) { 2250 if (last) 2251 goto failed;/* Truncated data.*/ 2252 ds->state = ST_RD_LITERAL_2; 2253 return (ARCHIVE_OK); 2254 } 2255 if (!lzh_make_fake_table(&(ds->lt), 2256 lzh_br_bits(br, ds->lt.len_bits))) 2257 goto failed;/* Invalid data */ 2258 lzh_br_consume(br, ds->lt.len_bits); 2259 ds->state = ST_RD_POS_DATA_1; 2260 break; 2261 } else if (ds->lt.len_avail > ds->lt.len_size) 2262 goto failed;/* Invalid data */ 2263 ds->loop = 0; 2264 memset(ds->lt.freq, 0, sizeof(ds->lt.freq)); 2265 /* FALL THROUGH */ 2266 case ST_RD_LITERAL_3: 2267 i = ds->loop; 2268 while (i < ds->lt.len_avail) { 2269 if (!lzh_br_read_ahead(strm, br, 2270 ds->pt.max_bits)) { 2271 if (last) 2272 goto failed;/* Truncated data.*/ 2273 ds->loop = i; 2274 ds->state = ST_RD_LITERAL_3; 2275 return (ARCHIVE_OK); 2276 } 2277 rbits = lzh_br_bits(br, ds->pt.max_bits); 2278 c = lzh_decode_huffman(&(ds->pt), rbits); 2279 if (c > 2) { 2280 /* Note: 'c' will never be more than 2281 * eighteen since it's limited by 2282 * PT_BITLEN_SIZE, which is being set 2283 * to ds->pt.len_size through 2284 * ds->literal_pt_len_size. */ 2285 lzh_br_consume(br, ds->pt.bitlen[c]); 2286 c -= 2; 2287 ds->lt.freq[c]++; 2288 ds->lt.bitlen[i++] = c; 2289 } else if (c == 0) { 2290 lzh_br_consume(br, ds->pt.bitlen[c]); 2291 ds->lt.bitlen[i++] = 0; 2292 } else { 2293 /* c == 1 or c == 2 */ 2294 int n = (c == 1)?4:9; 2295 if (!lzh_br_read_ahead(strm, br, 2296 ds->pt.bitlen[c] + n)) { 2297 if (last) /* Truncated data. */ 2298 goto failed; 2299 ds->loop = i; 2300 ds->state = ST_RD_LITERAL_3; 2301 return (ARCHIVE_OK); 2302 } 2303 lzh_br_consume(br, ds->pt.bitlen[c]); 2304 c = lzh_br_bits(br, n); 2305 lzh_br_consume(br, n); 2306 c += (n == 4)?3:20; 2307 if (i + c > ds->lt.len_avail) 2308 goto failed;/* Invalid data */ 2309 memset(&(ds->lt.bitlen[i]), 0, c); 2310 i += c; 2311 } 2312 } 2313 if (i > ds->lt.len_avail || 2314 !lzh_make_huffman_table(&(ds->lt))) 2315 goto failed;/* Invalid data */ 2316 /* FALL THROUGH */ 2317 case ST_RD_POS_DATA_1: 2318 /* 2319 * Read a position table compressed in huffman 2320 * coding. 2321 */ 2322 ds->pt.len_size = ds->pos_pt_len_size; 2323 ds->pt.len_bits = ds->pos_pt_len_bits; 2324 ds->reading_position = 1; 2325 ds->state = ST_RD_PT_1; 2326 break; 2327 case ST_GET_LITERAL: 2328 return (100); 2329 } 2330 } 2331 failed: 2332 return (ds->error = ARCHIVE_FAILED); 2333 } 2334 2335 static int 2336 lzh_decode_blocks(struct lzh_stream *strm, int last) 2337 { 2338 struct lzh_dec *ds = strm->ds; 2339 struct lzh_br bre = ds->br; 2340 struct huffman *lt = &(ds->lt); 2341 struct huffman *pt = &(ds->pt); 2342 unsigned char *w_buff = ds->w_buff; 2343 unsigned char *lt_bitlen = lt->bitlen; 2344 unsigned char *pt_bitlen = pt->bitlen; 2345 int blocks_avail = ds->blocks_avail, c = 0; 2346 int copy_len = ds->copy_len, copy_pos = ds->copy_pos; 2347 int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size; 2348 int lt_max_bits = lt->max_bits, pt_max_bits = pt->max_bits; 2349 int state = ds->state; 2350 2351 for (;;) { 2352 switch (state) { 2353 case ST_GET_LITERAL: 2354 for (;;) { 2355 if (blocks_avail == 0) { 2356 /* We have decoded all blocks. 2357 * Let's handle next blocks. */ 2358 ds->state = ST_RD_BLOCK; 2359 ds->br = bre; 2360 ds->blocks_avail = 0; 2361 ds->w_pos = w_pos; 2362 ds->copy_pos = 0; 2363 return (100); 2364 } 2365 2366 /* lzh_br_read_ahead() always try to fill the 2367 * cache buffer up. In specific situation we 2368 * are close to the end of the data, the cache 2369 * buffer will not be full and thus we have to 2370 * determine if the cache buffer has some bits 2371 * as much as we need after lzh_br_read_ahead() 2372 * failed. */ 2373 if (!lzh_br_read_ahead(strm, &bre, 2374 lt_max_bits)) { 2375 if (!last) 2376 goto next_data; 2377 /* Remaining bits are less than 2378 * maximum bits(lt.max_bits) but maybe 2379 * it still remains as much as we need, 2380 * so we should try to use it with 2381 * dummy bits. */ 2382 c = lzh_decode_huffman(lt, 2383 lzh_br_bits_forced(&bre, 2384 lt_max_bits)); 2385 lzh_br_consume(&bre, lt_bitlen[c]); 2386 if (!lzh_br_has(&bre, 0)) 2387 goto failed;/* Over read. */ 2388 } else { 2389 c = lzh_decode_huffman(lt, 2390 lzh_br_bits(&bre, lt_max_bits)); 2391 lzh_br_consume(&bre, lt_bitlen[c]); 2392 } 2393 blocks_avail--; 2394 if (c > UCHAR_MAX) 2395 /* Current block is a match data. */ 2396 break; 2397 /* 2398 * 'c' is exactly a literal code. 2399 */ 2400 /* Save a decoded code to reference it 2401 * afterward. */ 2402 w_buff[w_pos] = c; 2403 if (++w_pos >= w_size) { 2404 w_pos = 0; 2405 lzh_emit_window(strm, w_size); 2406 goto next_data; 2407 } 2408 } 2409 /* 'c' is the length of a match pattern we have 2410 * already extracted, which has be stored in 2411 * window(ds->w_buff). */ 2412 copy_len = c - (UCHAR_MAX + 1) + MINMATCH; 2413 /* FALL THROUGH */ 2414 case ST_GET_POS_1: 2415 /* 2416 * Get a reference position. 2417 */ 2418 if (!lzh_br_read_ahead(strm, &bre, pt_max_bits)) { 2419 if (!last) { 2420 state = ST_GET_POS_1; 2421 ds->copy_len = copy_len; 2422 goto next_data; 2423 } 2424 copy_pos = lzh_decode_huffman(pt, 2425 lzh_br_bits_forced(&bre, pt_max_bits)); 2426 lzh_br_consume(&bre, pt_bitlen[copy_pos]); 2427 if (!lzh_br_has(&bre, 0)) 2428 goto failed;/* Over read. */ 2429 } else { 2430 copy_pos = lzh_decode_huffman(pt, 2431 lzh_br_bits(&bre, pt_max_bits)); 2432 lzh_br_consume(&bre, pt_bitlen[copy_pos]); 2433 } 2434 /* FALL THROUGH */ 2435 case ST_GET_POS_2: 2436 if (copy_pos > 1) { 2437 /* We need an additional adjustment number to 2438 * the position. */ 2439 int p = copy_pos - 1; 2440 if (!lzh_br_read_ahead(strm, &bre, p)) { 2441 if (last) 2442 goto failed;/* Truncated data.*/ 2443 state = ST_GET_POS_2; 2444 ds->copy_len = copy_len; 2445 ds->copy_pos = copy_pos; 2446 goto next_data; 2447 } 2448 copy_pos = (1 << p) + lzh_br_bits(&bre, p); 2449 lzh_br_consume(&bre, p); 2450 } 2451 /* The position is actually a distance from the last 2452 * code we had extracted and thus we have to convert 2453 * it to a position of the window. */ 2454 copy_pos = (w_pos - copy_pos - 1) & w_mask; 2455 /* FALL THROUGH */ 2456 case ST_COPY_DATA: 2457 /* 2458 * Copy `copy_len' bytes as extracted data from 2459 * the window into the output buffer. 2460 */ 2461 for (;;) { 2462 int l; 2463 2464 l = copy_len; 2465 if (copy_pos > w_pos) { 2466 if (l > w_size - copy_pos) 2467 l = w_size - copy_pos; 2468 } else { 2469 if (l > w_size - w_pos) 2470 l = w_size - w_pos; 2471 } 2472 if ((copy_pos + l < w_pos) 2473 || (w_pos + l < copy_pos)) { 2474 /* No overlap. */ 2475 memcpy(w_buff + w_pos, 2476 w_buff + copy_pos, l); 2477 } else { 2478 const unsigned char *s; 2479 unsigned char *d; 2480 int li; 2481 2482 d = w_buff + w_pos; 2483 s = w_buff + copy_pos; 2484 for (li = 0; li < l-1;) { 2485 d[li] = s[li];li++; 2486 d[li] = s[li];li++; 2487 } 2488 if (li < l) 2489 d[li] = s[li]; 2490 } 2491 w_pos += l; 2492 if (w_pos == w_size) { 2493 w_pos = 0; 2494 lzh_emit_window(strm, w_size); 2495 if (copy_len <= l) 2496 state = ST_GET_LITERAL; 2497 else { 2498 state = ST_COPY_DATA; 2499 ds->copy_len = copy_len - l; 2500 ds->copy_pos = 2501 (copy_pos + l) & w_mask; 2502 } 2503 goto next_data; 2504 } 2505 if (copy_len <= l) 2506 /* A copy of current pattern ended. */ 2507 break; 2508 copy_len -= l; 2509 copy_pos = (copy_pos + l) & w_mask; 2510 } 2511 state = ST_GET_LITERAL; 2512 break; 2513 } 2514 } 2515 failed: 2516 return (ds->error = ARCHIVE_FAILED); 2517 next_data: 2518 ds->br = bre; 2519 ds->blocks_avail = blocks_avail; 2520 ds->state = state; 2521 ds->w_pos = w_pos; 2522 return (ARCHIVE_OK); 2523 } 2524 2525 static int 2526 lzh_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits) 2527 { 2528 int bits; 2529 2530 if (hf->bitlen == NULL) { 2531 hf->bitlen = malloc(len_size * sizeof(hf->bitlen[0])); 2532 if (hf->bitlen == NULL) 2533 return (ARCHIVE_FATAL); 2534 } 2535 if (hf->tbl == NULL) { 2536 if (tbl_bits < HTBL_BITS) 2537 bits = tbl_bits; 2538 else 2539 bits = HTBL_BITS; 2540 hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0])); 2541 if (hf->tbl == NULL) 2542 return (ARCHIVE_FATAL); 2543 } 2544 if (hf->tree == NULL && tbl_bits > HTBL_BITS) { 2545 hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4); 2546 hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0])); 2547 if (hf->tree == NULL) 2548 return (ARCHIVE_FATAL); 2549 } 2550 hf->len_size = (int)len_size; 2551 hf->tbl_bits = tbl_bits; 2552 return (ARCHIVE_OK); 2553 } 2554 2555 static void 2556 lzh_huffman_free(struct huffman *hf) 2557 { 2558 free(hf->bitlen); 2559 free(hf->tbl); 2560 free(hf->tree); 2561 } 2562 2563 static const char bitlen_tbl[0x400] = { 2564 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2565 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2566 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2567 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2568 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2569 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2570 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2571 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2572 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2573 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2574 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2575 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2576 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2577 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2578 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2579 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2580 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2581 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2582 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2583 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2584 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2585 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2586 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2587 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2588 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2589 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2590 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2591 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2592 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2593 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2594 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2595 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2596 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2597 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2598 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2599 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2600 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2601 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2602 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2603 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2604 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2605 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2606 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2607 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2608 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2609 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2610 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2611 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2612 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2613 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2614 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2615 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2616 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2617 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2618 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2619 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2620 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 2621 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 2622 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 2623 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 2624 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 2625 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 2626 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 2627 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 16, 0 2628 }; 2629 static int 2630 lzh_read_pt_bitlen(struct lzh_stream *strm, int start, int end) 2631 { 2632 struct lzh_dec *ds = strm->ds; 2633 struct lzh_br *br = &(ds->br); 2634 int c, i; 2635 2636 for (i = start; i < end; ) { 2637 /* 2638 * bit pattern the number we need 2639 * 000 -> 0 2640 * 001 -> 1 2641 * 010 -> 2 2642 * ... 2643 * 110 -> 6 2644 * 1110 -> 7 2645 * 11110 -> 8 2646 * ... 2647 * 1111111111110 -> 16 2648 */ 2649 if (!lzh_br_read_ahead(strm, br, 3)) 2650 return (i); 2651 if ((c = lzh_br_bits(br, 3)) == 7) { 2652 if (!lzh_br_read_ahead(strm, br, 13)) 2653 return (i); 2654 c = bitlen_tbl[lzh_br_bits(br, 13) & 0x3FF]; 2655 if (c) 2656 lzh_br_consume(br, c - 3); 2657 else 2658 return (-1);/* Invalid data. */ 2659 } else 2660 lzh_br_consume(br, 3); 2661 ds->pt.bitlen[i++] = c; 2662 ds->pt.freq[c]++; 2663 } 2664 return (i); 2665 } 2666 2667 static int 2668 lzh_make_fake_table(struct huffman *hf, uint16_t c) 2669 { 2670 if (c >= hf->len_size) 2671 return (0); 2672 hf->tbl[0] = c; 2673 hf->max_bits = 0; 2674 hf->shift_bits = 0; 2675 hf->bitlen[hf->tbl[0]] = 0; 2676 return (1); 2677 } 2678 2679 /* 2680 * Make a huffman coding table. 2681 */ 2682 static int 2683 lzh_make_huffman_table(struct huffman *hf) 2684 { 2685 uint16_t *tbl; 2686 const unsigned char *bitlen; 2687 int bitptn[17], weight[17]; 2688 int i, maxbits = 0, ptn, tbl_size, w; 2689 int diffbits, len_avail; 2690 2691 /* 2692 * Initialize bit patterns. 2693 */ 2694 ptn = 0; 2695 for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) { 2696 bitptn[i] = ptn; 2697 weight[i] = w; 2698 if (hf->freq[i]) { 2699 ptn += hf->freq[i] * w; 2700 maxbits = i; 2701 } 2702 } 2703 if (ptn != 0x10000 || maxbits > hf->tbl_bits) 2704 return (0);/* Invalid */ 2705 2706 hf->max_bits = maxbits; 2707 2708 /* 2709 * Cut out extra bits which we won't house in the table. 2710 * This preparation reduces the same calculation in the for-loop 2711 * making the table. 2712 */ 2713 if (maxbits < 16) { 2714 int ebits = 16 - maxbits; 2715 for (i = 1; i <= maxbits; i++) { 2716 bitptn[i] >>= ebits; 2717 weight[i] >>= ebits; 2718 } 2719 } 2720 if (maxbits > HTBL_BITS) { 2721 unsigned htbl_max; 2722 uint16_t *p; 2723 2724 diffbits = maxbits - HTBL_BITS; 2725 for (i = 1; i <= HTBL_BITS; i++) { 2726 bitptn[i] >>= diffbits; 2727 weight[i] >>= diffbits; 2728 } 2729 htbl_max = bitptn[HTBL_BITS] + 2730 weight[HTBL_BITS] * hf->freq[HTBL_BITS]; 2731 p = &(hf->tbl[htbl_max]); 2732 while (p < &hf->tbl[1U<<HTBL_BITS]) 2733 *p++ = 0; 2734 } else 2735 diffbits = 0; 2736 hf->shift_bits = diffbits; 2737 2738 /* 2739 * Make the table. 2740 */ 2741 tbl_size = 1 << HTBL_BITS; 2742 tbl = hf->tbl; 2743 bitlen = hf->bitlen; 2744 len_avail = hf->len_avail; 2745 hf->tree_used = 0; 2746 for (i = 0; i < len_avail; i++) { 2747 uint16_t *p; 2748 int len, cnt; 2749 uint16_t bit; 2750 int extlen; 2751 struct htree_t *ht; 2752 2753 if (bitlen[i] == 0) 2754 continue; 2755 /* Get a bit pattern */ 2756 len = bitlen[i]; 2757 ptn = bitptn[len]; 2758 cnt = weight[len]; 2759 if (len <= HTBL_BITS) { 2760 /* Calculate next bit pattern */ 2761 if ((bitptn[len] = ptn + cnt) > tbl_size) 2762 return (0);/* Invalid */ 2763 /* Update the table */ 2764 p = &(tbl[ptn]); 2765 if (cnt > 7) { 2766 uint16_t *pc; 2767 2768 cnt -= 8; 2769 pc = &p[cnt]; 2770 pc[0] = (uint16_t)i; 2771 pc[1] = (uint16_t)i; 2772 pc[2] = (uint16_t)i; 2773 pc[3] = (uint16_t)i; 2774 pc[4] = (uint16_t)i; 2775 pc[5] = (uint16_t)i; 2776 pc[6] = (uint16_t)i; 2777 pc[7] = (uint16_t)i; 2778 if (cnt > 7) { 2779 cnt -= 8; 2780 memcpy(&p[cnt], pc, 2781 8 * sizeof(uint16_t)); 2782 pc = &p[cnt]; 2783 while (cnt > 15) { 2784 cnt -= 16; 2785 memcpy(&p[cnt], pc, 2786 16 * sizeof(uint16_t)); 2787 } 2788 } 2789 if (cnt) 2790 memcpy(p, pc, cnt * sizeof(uint16_t)); 2791 } else { 2792 while (cnt > 1) { 2793 p[--cnt] = (uint16_t)i; 2794 p[--cnt] = (uint16_t)i; 2795 } 2796 if (cnt) 2797 p[--cnt] = (uint16_t)i; 2798 } 2799 continue; 2800 } 2801 2802 /* 2803 * A bit length is too big to be housed to a direct table, 2804 * so we use a tree model for its extra bits. 2805 */ 2806 bitptn[len] = ptn + cnt; 2807 bit = 1U << (diffbits -1); 2808 extlen = len - HTBL_BITS; 2809 2810 p = &(tbl[ptn >> diffbits]); 2811 if (*p == 0) { 2812 *p = len_avail + hf->tree_used; 2813 ht = &(hf->tree[hf->tree_used++]); 2814 if (hf->tree_used > hf->tree_avail) 2815 return (0);/* Invalid */ 2816 ht->left = 0; 2817 ht->right = 0; 2818 } else { 2819 if (*p < len_avail || 2820 *p >= (len_avail + hf->tree_used)) 2821 return (0);/* Invalid */ 2822 ht = &(hf->tree[*p - len_avail]); 2823 } 2824 while (--extlen > 0) { 2825 if (ptn & bit) { 2826 if (ht->left < len_avail) { 2827 ht->left = len_avail + hf->tree_used; 2828 ht = &(hf->tree[hf->tree_used++]); 2829 if (hf->tree_used > hf->tree_avail) 2830 return (0);/* Invalid */ 2831 ht->left = 0; 2832 ht->right = 0; 2833 } else { 2834 ht = &(hf->tree[ht->left - len_avail]); 2835 } 2836 } else { 2837 if (ht->right < len_avail) { 2838 ht->right = len_avail + hf->tree_used; 2839 ht = &(hf->tree[hf->tree_used++]); 2840 if (hf->tree_used > hf->tree_avail) 2841 return (0);/* Invalid */ 2842 ht->left = 0; 2843 ht->right = 0; 2844 } else { 2845 ht = &(hf->tree[ht->right - len_avail]); 2846 } 2847 } 2848 bit >>= 1; 2849 } 2850 if (ptn & bit) { 2851 if (ht->left != 0) 2852 return (0);/* Invalid */ 2853 ht->left = (uint16_t)i; 2854 } else { 2855 if (ht->right != 0) 2856 return (0);/* Invalid */ 2857 ht->right = (uint16_t)i; 2858 } 2859 } 2860 return (1); 2861 } 2862 2863 static int 2864 lzh_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c) 2865 { 2866 struct htree_t *ht; 2867 int extlen; 2868 2869 ht = hf->tree; 2870 extlen = hf->shift_bits; 2871 while (c >= hf->len_avail) { 2872 c -= hf->len_avail; 2873 if (extlen-- <= 0 || c >= hf->tree_used) 2874 return (0); 2875 if (rbits & (1U << extlen)) 2876 c = ht[c].left; 2877 else 2878 c = ht[c].right; 2879 } 2880 return (c); 2881 } 2882 2883 static inline int 2884 lzh_decode_huffman(struct huffman *hf, unsigned rbits) 2885 { 2886 int c; 2887 /* 2888 * At first search an index table for a bit pattern. 2889 * If it fails, search a huffman tree for. 2890 */ 2891 c = hf->tbl[rbits >> hf->shift_bits]; 2892 if (c < hf->len_avail || hf->len_avail == 0) 2893 return (c); 2894 /* This bit pattern needs to be found out at a huffman tree. */ 2895 return (lzh_decode_huffman_tree(hf, rbits, c)); 2896 } 2897 2898