1 /* $NetBSD: readcdf.c,v 1.15 2018/04/15 19:45:32 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2016 Christos Zoulas 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 #include "file.h" 29 30 #ifndef lint 31 #if 0 32 FILE_RCSID("@(#)$File: readcdf.c,v 1.66 2017/11/02 20:25:39 christos Exp $") 33 #else 34 __RCSID("$NetBSD: readcdf.c,v 1.15 2018/04/15 19:45:32 christos Exp $"); 35 #endif 36 #endif 37 38 #include <assert.h> 39 #include <stdlib.h> 40 #include <unistd.h> 41 #include <string.h> 42 #include <time.h> 43 #include <ctype.h> 44 45 #include "cdf.h" 46 #include "magic.h" 47 48 #ifndef __arraycount 49 #define __arraycount(a) (sizeof(a) / sizeof(a[0])) 50 #endif 51 52 #define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0) 53 54 static const struct nv { 55 const char *pattern; 56 const char *mime; 57 } app2mime[] = { 58 { "Word", "msword", }, 59 { "Excel", "vnd.ms-excel", }, 60 { "Powerpoint", "vnd.ms-powerpoint", }, 61 { "Crystal Reports", "x-rpt", }, 62 { "Advanced Installer", "vnd.ms-msi", }, 63 { "InstallShield", "vnd.ms-msi", }, 64 { "Microsoft Patch Compiler", "vnd.ms-msi", }, 65 { "NAnt", "vnd.ms-msi", }, 66 { "Windows Installer", "vnd.ms-msi", }, 67 { NULL, NULL, }, 68 }, name2mime[] = { 69 { "Book", "vnd.ms-excel", }, 70 { "Workbook", "vnd.ms-excel", }, 71 { "WordDocument", "msword", }, 72 { "PowerPoint", "vnd.ms-powerpoint", }, 73 { "DigitalSignature", "vnd.ms-msi", }, 74 { NULL, NULL, }, 75 }, name2desc[] = { 76 { "Book", "Microsoft Excel", }, 77 { "Workbook", "Microsoft Excel", }, 78 { "WordDocument", "Microsoft Word", }, 79 { "PowerPoint", "Microsoft PowerPoint", }, 80 { "DigitalSignature", "Microsoft Installer", }, 81 { NULL, NULL, }, 82 }; 83 84 static const struct cv { 85 uint64_t clsid[2]; 86 const char *mime; 87 } clsid2mime[] = { 88 { 89 { 0x00000000000c1084ULL, 0x46000000000000c0ULL }, 90 "x-msi", 91 }, 92 { { 0, 0 }, 93 NULL, 94 }, 95 }, clsid2desc[] = { 96 { 97 { 0x00000000000c1084ULL, 0x46000000000000c0ULL }, 98 "MSI Installer", 99 }, 100 { { 0, 0 }, 101 NULL, 102 }, 103 }; 104 105 private const char * 106 cdf_clsid_to_mime(const uint64_t clsid[2], const struct cv *cv) 107 { 108 size_t i; 109 for (i = 0; cv[i].mime != NULL; i++) { 110 if (clsid[0] == cv[i].clsid[0] && clsid[1] == cv[i].clsid[1]) 111 return cv[i].mime; 112 } 113 #ifdef CDF_DEBUG 114 fprintf(stderr, "unknown mime %" PRIx64 ", %" PRIx64 "\n", clsid[0], 115 clsid[1]); 116 #endif 117 return NULL; 118 } 119 120 private const char * 121 cdf_app_to_mime(const char *vbuf, const struct nv *nv) 122 { 123 size_t i; 124 const char *rv = NULL; 125 #ifdef USE_C_LOCALE 126 locale_t old_lc_ctype, c_lc_ctype; 127 128 c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0); 129 assert(c_lc_ctype != NULL); 130 old_lc_ctype = uselocale(c_lc_ctype); 131 assert(old_lc_ctype != NULL); 132 #else 133 char *old_lc_ctype = setlocale(LC_CTYPE, "C"); 134 #endif 135 for (i = 0; nv[i].pattern != NULL; i++) 136 if (strcasestr(vbuf, nv[i].pattern) != NULL) { 137 rv = nv[i].mime; 138 break; 139 } 140 #ifdef CDF_DEBUG 141 fprintf(stderr, "unknown app %s\n", vbuf); 142 #endif 143 #ifdef USE_C_LOCALE 144 (void)uselocale(old_lc_ctype); 145 freelocale(c_lc_ctype); 146 #else 147 setlocale(LC_CTYPE, old_lc_ctype); 148 #endif 149 return rv; 150 } 151 152 private int 153 cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info, 154 size_t count, const cdf_directory_t *root_storage) 155 { 156 size_t i; 157 cdf_timestamp_t tp; 158 struct timespec ts; 159 char buf[64]; 160 const char *str = NULL; 161 const char *s, *e; 162 int len; 163 164 if (!NOTMIME(ms) && root_storage) 165 str = cdf_clsid_to_mime(root_storage->d_storage_uuid, 166 clsid2mime); 167 168 for (i = 0; i < count; i++) { 169 cdf_print_property_name(buf, sizeof(buf), info[i].pi_id); 170 switch (info[i].pi_type) { 171 case CDF_NULL: 172 break; 173 case CDF_SIGNED16: 174 if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf, 175 info[i].pi_s16) == -1) 176 return -1; 177 break; 178 case CDF_SIGNED32: 179 if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf, 180 info[i].pi_s32) == -1) 181 return -1; 182 break; 183 case CDF_UNSIGNED32: 184 if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf, 185 info[i].pi_u32) == -1) 186 return -1; 187 break; 188 case CDF_FLOAT: 189 if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf, 190 info[i].pi_f) == -1) 191 return -1; 192 break; 193 case CDF_DOUBLE: 194 if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf, 195 info[i].pi_d) == -1) 196 return -1; 197 break; 198 case CDF_LENGTH32_STRING: 199 case CDF_LENGTH32_WSTRING: 200 len = info[i].pi_str.s_len; 201 if (len > 1) { 202 char vbuf[1024]; 203 size_t j, k = 1; 204 205 if (info[i].pi_type == CDF_LENGTH32_WSTRING) 206 k++; 207 s = info[i].pi_str.s_buf; 208 e = info[i].pi_str.s_buf + len; 209 for (j = 0; s < e && j < sizeof(vbuf) 210 && len--; s += k) { 211 if (*s == '\0') 212 break; 213 if (isprint((unsigned char)*s)) 214 vbuf[j++] = *s; 215 } 216 if (j == sizeof(vbuf)) 217 --j; 218 vbuf[j] = '\0'; 219 if (NOTMIME(ms)) { 220 if (vbuf[0]) { 221 if (file_printf(ms, ", %s: %s", 222 buf, vbuf) == -1) 223 return -1; 224 } 225 } else if (str == NULL && info[i].pi_id == 226 CDF_PROPERTY_NAME_OF_APPLICATION) { 227 str = cdf_app_to_mime(vbuf, app2mime); 228 } 229 } 230 break; 231 case CDF_FILETIME: 232 tp = info[i].pi_tp; 233 if (tp != 0) { 234 char tbuf[64]; 235 if (tp < 1000000000000000LL) { 236 cdf_print_elapsed_time(tbuf, 237 sizeof(tbuf), tp); 238 if (NOTMIME(ms) && file_printf(ms, 239 ", %s: %s", buf, tbuf) == -1) 240 return -1; 241 } else { 242 char *c, *ec; 243 cdf_timestamp_to_timespec(&ts, tp); 244 c = cdf_ctime(&ts.tv_sec, tbuf); 245 if (c != NULL && 246 (ec = strchr(c, '\n')) != NULL) 247 *ec = '\0'; 248 249 if (NOTMIME(ms) && file_printf(ms, 250 ", %s: %s", buf, c) == -1) 251 return -1; 252 } 253 } 254 break; 255 case CDF_CLIPBOARD: 256 break; 257 default: 258 return -1; 259 } 260 } 261 if (!NOTMIME(ms)) { 262 if (str == NULL) 263 return 0; 264 if (file_printf(ms, "application/%s", str) == -1) 265 return -1; 266 } 267 return 1; 268 } 269 270 private int 271 cdf_file_catalog(struct magic_set *ms, const cdf_header_t *h, 272 const cdf_stream_t *sst) 273 { 274 cdf_catalog_t *cat; 275 size_t i; 276 char buf[256]; 277 cdf_catalog_entry_t *ce; 278 279 if (NOTMIME(ms)) { 280 if (file_printf(ms, "Microsoft Thumbs.db [") == -1) 281 return -1; 282 if (cdf_unpack_catalog(h, sst, &cat) == -1) 283 return -1; 284 ce = cat->cat_e; 285 /* skip first entry since it has a , or paren */ 286 for (i = 1; i < cat->cat_num; i++) 287 if (file_printf(ms, "%s%s", 288 cdf_u16tos8(buf, ce[i].ce_namlen, ce[i].ce_name), 289 i == cat->cat_num - 1 ? "]" : ", ") == -1) { 290 free(cat); 291 return -1; 292 } 293 free(cat); 294 } else { 295 if (file_printf(ms, "application/CDFV2") == -1) 296 return -1; 297 } 298 return 1; 299 } 300 301 private int 302 cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h, 303 const cdf_stream_t *sst, const cdf_directory_t *root_storage) 304 { 305 cdf_summary_info_header_t si; 306 cdf_property_info_t *info; 307 size_t count; 308 int m; 309 310 if (cdf_unpack_summary_info(sst, h, &si, &info, &count) == -1) 311 return -1; 312 313 if (NOTMIME(ms)) { 314 const char *str; 315 316 if (file_printf(ms, "Composite Document File V2 Document") 317 == -1) 318 return -1; 319 320 if (file_printf(ms, ", %s Endian", 321 si.si_byte_order == 0xfffe ? "Little" : "Big") == -1) 322 return -2; 323 switch (si.si_os) { 324 case 2: 325 if (file_printf(ms, ", Os: Windows, Version %d.%d", 326 si.si_os_version & 0xff, 327 (uint32_t)si.si_os_version >> 8) == -1) 328 return -2; 329 break; 330 case 1: 331 if (file_printf(ms, ", Os: MacOS, Version %d.%d", 332 (uint32_t)si.si_os_version >> 8, 333 si.si_os_version & 0xff) == -1) 334 return -2; 335 break; 336 default: 337 if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os, 338 si.si_os_version & 0xff, 339 (uint32_t)si.si_os_version >> 8) == -1) 340 return -2; 341 break; 342 } 343 if (root_storage) { 344 str = cdf_clsid_to_mime(root_storage->d_storage_uuid, 345 clsid2desc); 346 if (str) { 347 if (file_printf(ms, ", %s", str) == -1) 348 return -2; 349 } 350 } 351 } 352 353 m = cdf_file_property_info(ms, info, count, root_storage); 354 free(info); 355 356 return m == -1 ? -2 : m; 357 } 358 359 #ifdef notdef 360 private char * 361 format_clsid(char *buf, size_t len, const uint64_t uuid[2]) { 362 snprintf(buf, len, "%.8" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.4" 363 PRIx64 "-%.12" PRIx64, 364 (uuid[0] >> 32) & (uint64_t)0x000000000ffffffffULL, 365 (uuid[0] >> 16) & (uint64_t)0x0000000000000ffffULL, 366 (uuid[0] >> 0) & (uint64_t)0x0000000000000ffffULL, 367 (uuid[1] >> 48) & (uint64_t)0x0000000000000ffffULL, 368 (uuid[1] >> 0) & (uint64_t)0x0000fffffffffffffULL); 369 return buf; 370 } 371 #endif 372 373 private int 374 cdf_file_catalog_info(struct magic_set *ms, const cdf_info_t *info, 375 const cdf_header_t *h, const cdf_sat_t *sat, const cdf_sat_t *ssat, 376 const cdf_stream_t *sst, const cdf_dir_t *dir, cdf_stream_t *scn) 377 { 378 int i; 379 380 if ((i = cdf_read_user_stream(info, h, sat, ssat, sst, 381 dir, "Catalog", scn)) == -1) 382 return i; 383 #ifdef CDF_DEBUG 384 cdf_dump_catalog(h, scn); 385 #endif 386 if ((i = cdf_file_catalog(ms, h, scn)) == -1) 387 return -1; 388 return i; 389 } 390 391 private int 392 cdf_check_summary_info(struct magic_set *ms, const cdf_info_t *info, 393 const cdf_header_t *h, const cdf_sat_t *sat, const cdf_sat_t *ssat, 394 const cdf_stream_t *sst, const cdf_dir_t *dir, cdf_stream_t *scn, 395 const cdf_directory_t *root_storage, const char **expn) 396 { 397 int i; 398 const char *str = NULL; 399 cdf_directory_t *d; 400 char name[__arraycount(d->d_name)]; 401 size_t j, k; 402 403 #ifdef CDF_DEBUG 404 cdf_dump_summary_info(h, scn); 405 #endif 406 if ((i = cdf_file_summary_info(ms, h, scn, root_storage)) < 0) { 407 *expn = "Can't expand summary_info"; 408 return i; 409 } 410 if (i == 1) 411 return i; 412 for (j = 0; str == NULL && j < dir->dir_len; j++) { 413 d = &dir->dir_tab[j]; 414 for (k = 0; k < sizeof(name); k++) 415 name[k] = (char)cdf_tole2(d->d_name[k]); 416 str = cdf_app_to_mime(name, 417 NOTMIME(ms) ? name2desc : name2mime); 418 } 419 if (NOTMIME(ms)) { 420 if (str != NULL) { 421 if (file_printf(ms, "%s", str) == -1) 422 return -1; 423 i = 1; 424 } 425 } else { 426 if (str == NULL) 427 str = "vnd.ms-office"; 428 if (file_printf(ms, "application/%s", str) == -1) 429 return -1; 430 i = 1; 431 } 432 if (i <= 0) { 433 i = cdf_file_catalog_info(ms, info, h, sat, ssat, sst, 434 dir, scn); 435 } 436 return i; 437 } 438 439 private struct sinfo { 440 const char *name; 441 const char *mime; 442 const char *sections[5]; 443 const int types[5]; 444 } sectioninfo[] = { 445 { "Encrypted", "encrypted", 446 { 447 "EncryptedPackage", "EncryptedSummary", 448 NULL, NULL, NULL, 449 }, 450 { 451 CDF_DIR_TYPE_USER_STREAM, 452 CDF_DIR_TYPE_USER_STREAM, 453 0, 0, 0, 454 455 }, 456 }, 457 { "QuickBooks", "quickbooks", 458 { 459 #if 0 460 "TaxForms", "PDFTaxForms", "modulesInBackup", 461 #endif 462 "mfbu_header", NULL, NULL, NULL, NULL, 463 }, 464 { 465 #if 0 466 CDF_DIR_TYPE_USER_STORAGE, 467 CDF_DIR_TYPE_USER_STORAGE, 468 CDF_DIR_TYPE_USER_STREAM, 469 #endif 470 CDF_DIR_TYPE_USER_STREAM, 471 0, 0, 0, 0 472 }, 473 }, 474 { "Microsoft Excel", "vnd.ms-excel", 475 { 476 "Book", "Workbook", NULL, NULL, NULL, 477 }, 478 { 479 CDF_DIR_TYPE_USER_STREAM, 480 CDF_DIR_TYPE_USER_STREAM, 481 0, 0, 0, 482 }, 483 }, 484 { "Microsoft Word", "msword", 485 { 486 "WordDocument", NULL, NULL, NULL, NULL, 487 }, 488 { 489 CDF_DIR_TYPE_USER_STREAM, 490 0, 0, 0, 0, 491 }, 492 }, 493 { "Microsoft PowerPoint", "vnd.ms-powerpoint", 494 { 495 "PowerPoint", NULL, NULL, NULL, NULL, 496 }, 497 { 498 CDF_DIR_TYPE_USER_STREAM, 499 0, 0, 0, 0, 500 }, 501 }, 502 { "Microsoft Outlook Message", "vnd.ms-outlook", 503 { 504 "__properties_version1.0", 505 "__recip_version1.0_#00000000", 506 NULL, NULL, NULL, 507 }, 508 { 509 CDF_DIR_TYPE_USER_STREAM, 510 CDF_DIR_TYPE_USER_STORAGE, 511 0, 0, 0, 512 }, 513 }, 514 }; 515 516 private int 517 cdf_file_dir_info(struct magic_set *ms, const cdf_dir_t *dir) 518 { 519 size_t sd, j; 520 521 for (sd = 0; sd < __arraycount(sectioninfo); sd++) { 522 const struct sinfo *si = §ioninfo[sd]; 523 for (j = 0; si->sections[j]; j++) { 524 if (cdf_find_stream(dir, si->sections[j], si->types[j]) 525 > 0) 526 break; 527 #ifdef CDF_DEBUG 528 fprintf(stderr, "Can't read %s\n", si->sections[j]); 529 #endif 530 } 531 if (si->sections[j] == NULL) 532 continue; 533 if (NOTMIME(ms)) { 534 if (file_printf(ms, "CDFV2 %s", si->name) == -1) 535 return -1; 536 } else { 537 if (file_printf(ms, "application/%s", si->mime) == -1) 538 return -1; 539 } 540 return 1; 541 } 542 return -1; 543 } 544 545 protected int 546 file_trycdf(struct magic_set *ms, const struct buffer *b) 547 { 548 int fd = b->fd; 549 const unsigned char *buf = b->fbuf; 550 size_t nbytes = b->flen; 551 cdf_info_t info; 552 cdf_header_t h; 553 cdf_sat_t sat, ssat; 554 cdf_stream_t sst, scn; 555 cdf_dir_t dir; 556 int i; 557 const char *expn = ""; 558 const cdf_directory_t *root_storage; 559 560 scn.sst_tab = NULL; 561 info.i_fd = fd; 562 info.i_buf = buf; 563 info.i_len = nbytes; 564 if (ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) 565 return 0; 566 if (cdf_read_header(&info, &h) == -1) 567 return 0; 568 #ifdef CDF_DEBUG 569 cdf_dump_header(&h); 570 #endif 571 572 if ((i = cdf_read_sat(&info, &h, &sat)) == -1) { 573 expn = "Can't read SAT"; 574 goto out0; 575 } 576 #ifdef CDF_DEBUG 577 cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h)); 578 #endif 579 580 if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) { 581 expn = "Can't read SSAT"; 582 goto out1; 583 } 584 #ifdef CDF_DEBUG 585 cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h)); 586 #endif 587 588 if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) { 589 expn = "Can't read directory"; 590 goto out2; 591 } 592 593 if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst, 594 &root_storage)) == -1) { 595 expn = "Cannot read short stream"; 596 goto out3; 597 } 598 #ifdef CDF_DEBUG 599 cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir); 600 #endif 601 #ifdef notdef 602 if (root_storage) { 603 if (NOTMIME(ms)) { 604 char clsbuf[128]; 605 if (file_printf(ms, "CLSID %s, ", 606 format_clsid(clsbuf, sizeof(clsbuf), 607 root_storage->d_storage_uuid)) == -1) 608 return -1; 609 } 610 } 611 #endif 612 613 if ((i = cdf_read_user_stream(&info, &h, &sat, &ssat, &sst, &dir, 614 "FileHeader", &scn)) != -1) { 615 #define HWP5_SIGNATURE "HWP Document File" 616 if (scn.sst_len * scn.sst_ss >= sizeof(HWP5_SIGNATURE) - 1 617 && memcmp(scn.sst_tab, HWP5_SIGNATURE, 618 sizeof(HWP5_SIGNATURE) - 1) == 0) { 619 if (NOTMIME(ms)) { 620 if (file_printf(ms, 621 "Hangul (Korean) Word Processor File 5.x") == -1) 622 return -1; 623 } else { 624 if (file_printf(ms, "application/x-hwp") == -1) 625 return -1; 626 } 627 i = 1; 628 goto out5; 629 } else { 630 cdf_zero_stream(&scn); 631 } 632 } 633 634 if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir, 635 &scn)) == -1) { 636 if (errno != ESRCH) { 637 expn = "Cannot read summary info"; 638 } 639 } else { 640 i = cdf_check_summary_info(ms, &info, &h, 641 &sat, &ssat, &sst, &dir, &scn, root_storage, &expn); 642 cdf_zero_stream(&scn); 643 } 644 if (i <= 0) { 645 if ((i = cdf_read_doc_summary_info(&info, &h, &sat, &ssat, 646 &sst, &dir, &scn)) == -1) { 647 if (errno != ESRCH) { 648 expn = "Cannot read summary info"; 649 } 650 } else { 651 i = cdf_check_summary_info(ms, &info, &h, &sat, &ssat, 652 &sst, &dir, &scn, root_storage, &expn); 653 } 654 } 655 if (i <= 0) { 656 i = cdf_file_dir_info(ms, &dir); 657 if (i < 0) 658 expn = "Cannot read section info"; 659 } 660 out5: 661 cdf_zero_stream(&scn); 662 cdf_zero_stream(&sst); 663 out3: 664 free(dir.dir_tab); 665 out2: 666 free(ssat.sat_tab); 667 out1: 668 free(sat.sat_tab); 669 out0: 670 if (i == -1) { 671 if (NOTMIME(ms)) { 672 if (file_printf(ms, 673 "Composite Document File V2 Document") == -1) 674 return -1; 675 if (*expn) 676 if (file_printf(ms, ", %s", expn) == -1) 677 return -1; 678 } else { 679 if (file_printf(ms, "application/CDFV2") == -1) 680 return -1; 681 } 682 i = 1; 683 } 684 return i; 685 } 686