1 /* $NetBSD: readcdf.c,v 1.13 2017/02/10 17:53:24 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2016 Christos Zoulas 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 #include "file.h" 29 30 #ifndef lint 31 #if 0 32 FILE_RCSID("@(#)$File: readcdf.c,v 1.63 2016/10/18 22:25:42 christos Exp $") 33 #else 34 __RCSID("$NetBSD: readcdf.c,v 1.13 2017/02/10 17:53:24 christos Exp $"); 35 #endif 36 #endif 37 38 #include <assert.h> 39 #include <stdlib.h> 40 #include <unistd.h> 41 #include <string.h> 42 #include <time.h> 43 #include <ctype.h> 44 45 #include "cdf.h" 46 #include "magic.h" 47 48 #ifndef __arraycount 49 #define __arraycount(a) (sizeof(a) / sizeof(a[0])) 50 #endif 51 52 #define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0) 53 54 static const struct nv { 55 const char *pattern; 56 const char *mime; 57 } app2mime[] = { 58 { "Word", "msword", }, 59 { "Excel", "vnd.ms-excel", }, 60 { "Powerpoint", "vnd.ms-powerpoint", }, 61 { "Crystal Reports", "x-rpt", }, 62 { "Advanced Installer", "vnd.ms-msi", }, 63 { "InstallShield", "vnd.ms-msi", }, 64 { "Microsoft Patch Compiler", "vnd.ms-msi", }, 65 { "NAnt", "vnd.ms-msi", }, 66 { "Windows Installer", "vnd.ms-msi", }, 67 { NULL, NULL, }, 68 }, name2mime[] = { 69 { "Book", "vnd.ms-excel", }, 70 { "Workbook", "vnd.ms-excel", }, 71 { "WordDocument", "msword", }, 72 { "PowerPoint", "vnd.ms-powerpoint", }, 73 { "DigitalSignature", "vnd.ms-msi", }, 74 { NULL, NULL, }, 75 }, name2desc[] = { 76 { "Book", "Microsoft Excel", }, 77 { "Workbook", "Microsoft Excel", }, 78 { "WordDocument", "Microsoft Word", }, 79 { "PowerPoint", "Microsoft PowerPoint", }, 80 { "DigitalSignature", "Microsoft Installer", }, 81 { NULL, NULL, }, 82 }; 83 84 static const struct cv { 85 uint64_t clsid[2]; 86 const char *mime; 87 } clsid2mime[] = { 88 { 89 { 0x00000000000c1084ULL, 0x46000000000000c0ULL }, 90 "x-msi", 91 }, 92 { { 0, 0 }, 93 NULL, 94 }, 95 }, clsid2desc[] = { 96 { 97 { 0x00000000000c1084ULL, 0x46000000000000c0ULL }, 98 "MSI Installer", 99 }, 100 { { 0, 0 }, 101 NULL, 102 }, 103 }; 104 105 private const char * 106 cdf_clsid_to_mime(const uint64_t clsid[2], const struct cv *cv) 107 { 108 size_t i; 109 for (i = 0; cv[i].mime != NULL; i++) { 110 if (clsid[0] == cv[i].clsid[0] && clsid[1] == cv[i].clsid[1]) 111 return cv[i].mime; 112 } 113 #ifdef CDF_DEBUG 114 fprintf(stderr, "unknown mime %" PRIx64 ", %" PRIx64 "\n", clsid[0], 115 clsid[1]); 116 #endif 117 return NULL; 118 } 119 120 private const char * 121 cdf_app_to_mime(const char *vbuf, const struct nv *nv) 122 { 123 size_t i; 124 const char *rv = NULL; 125 #ifdef USE_C_LOCALE 126 locale_t old_lc_ctype, c_lc_ctype; 127 128 c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0); 129 assert(c_lc_ctype != NULL); 130 old_lc_ctype = uselocale(c_lc_ctype); 131 assert(old_lc_ctype != NULL); 132 #else 133 char *old_lc_ctype = setlocale(LC_CTYPE, "C"); 134 #endif 135 for (i = 0; nv[i].pattern != NULL; i++) 136 if (strcasestr(vbuf, nv[i].pattern) != NULL) { 137 rv = nv[i].mime; 138 break; 139 } 140 #ifdef CDF_DEBUG 141 fprintf(stderr, "unknown app %s\n", vbuf); 142 #endif 143 #ifdef USE_C_LOCALE 144 (void)uselocale(old_lc_ctype); 145 freelocale(c_lc_ctype); 146 #else 147 setlocale(LC_CTYPE, old_lc_ctype); 148 #endif 149 return rv; 150 } 151 152 private int 153 cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info, 154 size_t count, const cdf_directory_t *root_storage) 155 { 156 size_t i; 157 cdf_timestamp_t tp; 158 struct timespec ts; 159 char buf[64]; 160 const char *str = NULL; 161 const char *s; 162 int len; 163 164 if (!NOTMIME(ms) && root_storage) 165 str = cdf_clsid_to_mime(root_storage->d_storage_uuid, 166 clsid2mime); 167 168 for (i = 0; i < count; i++) { 169 cdf_print_property_name(buf, sizeof(buf), info[i].pi_id); 170 switch (info[i].pi_type) { 171 case CDF_NULL: 172 break; 173 case CDF_SIGNED16: 174 if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf, 175 info[i].pi_s16) == -1) 176 return -1; 177 break; 178 case CDF_SIGNED32: 179 if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf, 180 info[i].pi_s32) == -1) 181 return -1; 182 break; 183 case CDF_UNSIGNED32: 184 if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf, 185 info[i].pi_u32) == -1) 186 return -1; 187 break; 188 case CDF_FLOAT: 189 if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf, 190 info[i].pi_f) == -1) 191 return -1; 192 break; 193 case CDF_DOUBLE: 194 if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf, 195 info[i].pi_d) == -1) 196 return -1; 197 break; 198 case CDF_LENGTH32_STRING: 199 case CDF_LENGTH32_WSTRING: 200 len = info[i].pi_str.s_len; 201 if (len > 1) { 202 char vbuf[1024]; 203 size_t j, k = 1; 204 205 if (info[i].pi_type == CDF_LENGTH32_WSTRING) 206 k++; 207 s = info[i].pi_str.s_buf; 208 for (j = 0; j < sizeof(vbuf) && len--; s += k) { 209 if (*s == '\0') 210 break; 211 if (isprint((unsigned char)*s)) 212 vbuf[j++] = *s; 213 } 214 if (j == sizeof(vbuf)) 215 --j; 216 vbuf[j] = '\0'; 217 if (NOTMIME(ms)) { 218 if (vbuf[0]) { 219 if (file_printf(ms, ", %s: %s", 220 buf, vbuf) == -1) 221 return -1; 222 } 223 } else if (str == NULL && info[i].pi_id == 224 CDF_PROPERTY_NAME_OF_APPLICATION) { 225 str = cdf_app_to_mime(vbuf, app2mime); 226 } 227 } 228 break; 229 case CDF_FILETIME: 230 tp = info[i].pi_tp; 231 if (tp != 0) { 232 char tbuf[64]; 233 if (tp < 1000000000000000LL) { 234 cdf_print_elapsed_time(tbuf, 235 sizeof(tbuf), tp); 236 if (NOTMIME(ms) && file_printf(ms, 237 ", %s: %s", buf, tbuf) == -1) 238 return -1; 239 } else { 240 char *c, *ec; 241 cdf_timestamp_to_timespec(&ts, tp); 242 c = cdf_ctime(&ts.tv_sec, tbuf); 243 if (c != NULL && 244 (ec = strchr(c, '\n')) != NULL) 245 *ec = '\0'; 246 247 if (NOTMIME(ms) && file_printf(ms, 248 ", %s: %s", buf, c) == -1) 249 return -1; 250 } 251 } 252 break; 253 case CDF_CLIPBOARD: 254 break; 255 default: 256 return -1; 257 } 258 } 259 if (!NOTMIME(ms)) { 260 if (str == NULL) 261 return 0; 262 if (file_printf(ms, "application/%s", str) == -1) 263 return -1; 264 } 265 return 1; 266 } 267 268 private int 269 cdf_file_catalog(struct magic_set *ms, const cdf_header_t *h, 270 const cdf_stream_t *sst) 271 { 272 cdf_catalog_t *cat; 273 size_t i; 274 char buf[256]; 275 cdf_catalog_entry_t *ce; 276 277 if (NOTMIME(ms)) { 278 if (file_printf(ms, "Microsoft Thumbs.db [") == -1) 279 return -1; 280 if (cdf_unpack_catalog(h, sst, &cat) == -1) 281 return -1; 282 ce = cat->cat_e; 283 /* skip first entry since it has a , or paren */ 284 for (i = 1; i < cat->cat_num; i++) 285 if (file_printf(ms, "%s%s", 286 cdf_u16tos8(buf, ce[i].ce_namlen, ce[i].ce_name), 287 i == cat->cat_num - 1 ? "]" : ", ") == -1) { 288 free(cat); 289 return -1; 290 } 291 free(cat); 292 } else { 293 if (file_printf(ms, "application/CDFV2") == -1) 294 return -1; 295 } 296 return 1; 297 } 298 299 private int 300 cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h, 301 const cdf_stream_t *sst, const cdf_directory_t *root_storage) 302 { 303 cdf_summary_info_header_t si; 304 cdf_property_info_t *info; 305 size_t count; 306 int m; 307 308 if (cdf_unpack_summary_info(sst, h, &si, &info, &count) == -1) 309 return -1; 310 311 if (NOTMIME(ms)) { 312 const char *str; 313 314 if (file_printf(ms, "Composite Document File V2 Document") 315 == -1) 316 return -1; 317 318 if (file_printf(ms, ", %s Endian", 319 si.si_byte_order == 0xfffe ? "Little" : "Big") == -1) 320 return -2; 321 switch (si.si_os) { 322 case 2: 323 if (file_printf(ms, ", Os: Windows, Version %d.%d", 324 si.si_os_version & 0xff, 325 (uint32_t)si.si_os_version >> 8) == -1) 326 return -2; 327 break; 328 case 1: 329 if (file_printf(ms, ", Os: MacOS, Version %d.%d", 330 (uint32_t)si.si_os_version >> 8, 331 si.si_os_version & 0xff) == -1) 332 return -2; 333 break; 334 default: 335 if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os, 336 si.si_os_version & 0xff, 337 (uint32_t)si.si_os_version >> 8) == -1) 338 return -2; 339 break; 340 } 341 if (root_storage) { 342 str = cdf_clsid_to_mime(root_storage->d_storage_uuid, 343 clsid2desc); 344 if (str) { 345 if (file_printf(ms, ", %s", str) == -1) 346 return -2; 347 } 348 } 349 } 350 351 m = cdf_file_property_info(ms, info, count, root_storage); 352 free(info); 353 354 return m == -1 ? -2 : m; 355 } 356 357 #ifdef notdef 358 private char * 359 format_clsid(char *buf, size_t len, const uint64_t uuid[2]) { 360 snprintf(buf, len, "%.8" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.4" 361 PRIx64 "-%.12" PRIx64, 362 (uuid[0] >> 32) & (uint64_t)0x000000000ffffffffULL, 363 (uuid[0] >> 16) & (uint64_t)0x0000000000000ffffULL, 364 (uuid[0] >> 0) & (uint64_t)0x0000000000000ffffULL, 365 (uuid[1] >> 48) & (uint64_t)0x0000000000000ffffULL, 366 (uuid[1] >> 0) & (uint64_t)0x0000fffffffffffffULL); 367 return buf; 368 } 369 #endif 370 371 private int 372 cdf_file_catalog_info(struct magic_set *ms, const cdf_info_t *info, 373 const cdf_header_t *h, const cdf_sat_t *sat, const cdf_sat_t *ssat, 374 const cdf_stream_t *sst, const cdf_dir_t *dir, cdf_stream_t *scn) 375 { 376 int i; 377 378 if ((i = cdf_read_user_stream(info, h, sat, ssat, sst, 379 dir, "Catalog", scn)) == -1) 380 return i; 381 #ifdef CDF_DEBUG 382 cdf_dump_catalog(h, scn); 383 #endif 384 if ((i = cdf_file_catalog(ms, h, scn)) == -1) 385 return -1; 386 return i; 387 } 388 389 private int 390 cdf_check_summary_info(struct magic_set *ms, const cdf_info_t *info, 391 const cdf_header_t *h, const cdf_sat_t *sat, const cdf_sat_t *ssat, 392 const cdf_stream_t *sst, const cdf_dir_t *dir, cdf_stream_t *scn, 393 const cdf_directory_t *root_storage, const char **expn) 394 { 395 int i; 396 const char *str = NULL; 397 cdf_directory_t *d; 398 char name[__arraycount(d->d_name)]; 399 size_t j, k; 400 401 #ifdef CDF_DEBUG 402 cdf_dump_summary_info(h, scn); 403 #endif 404 if ((i = cdf_file_summary_info(ms, h, scn, root_storage)) < 0) { 405 *expn = "Can't expand summary_info"; 406 return i; 407 } 408 if (i == 1) 409 return i; 410 for (j = 0; str == NULL && j < dir->dir_len; j++) { 411 d = &dir->dir_tab[j]; 412 for (k = 0; k < sizeof(name); k++) 413 name[k] = (char)cdf_tole2(d->d_name[k]); 414 str = cdf_app_to_mime(name, 415 NOTMIME(ms) ? name2desc : name2mime); 416 } 417 if (NOTMIME(ms)) { 418 if (str != NULL) { 419 if (file_printf(ms, "%s", str) == -1) 420 return -1; 421 i = 1; 422 } 423 } else { 424 if (str == NULL) 425 str = "vnd.ms-office"; 426 if (file_printf(ms, "application/%s", str) == -1) 427 return -1; 428 i = 1; 429 } 430 if (i <= 0) { 431 i = cdf_file_catalog_info(ms, info, h, sat, ssat, sst, 432 dir, scn); 433 } 434 return i; 435 } 436 437 private struct sinfo { 438 const char *name; 439 const char *mime; 440 const char *sections[5]; 441 const int types[5]; 442 } sectioninfo[] = { 443 { "Encrypted", "encrypted", 444 { 445 "EncryptedPackage", "EncryptedSummary", 446 NULL, NULL, NULL, 447 }, 448 { 449 CDF_DIR_TYPE_USER_STREAM, 450 CDF_DIR_TYPE_USER_STREAM, 451 0, 0, 0, 452 453 }, 454 }, 455 { "QuickBooks", "quickbooks", 456 { 457 #if 0 458 "TaxForms", "PDFTaxForms", "modulesInBackup", 459 #endif 460 "mfbu_header", NULL, NULL, NULL, NULL, 461 }, 462 { 463 #if 0 464 CDF_DIR_TYPE_USER_STORAGE, 465 CDF_DIR_TYPE_USER_STORAGE, 466 CDF_DIR_TYPE_USER_STREAM, 467 #endif 468 CDF_DIR_TYPE_USER_STREAM, 469 0, 0, 0, 0 470 }, 471 }, 472 { "Microsoft Excel", "vnd.ms-excel", 473 { 474 "Book", "Workbook", NULL, NULL, NULL, 475 }, 476 { 477 CDF_DIR_TYPE_USER_STREAM, 478 CDF_DIR_TYPE_USER_STREAM, 479 0, 0, 0, 480 }, 481 }, 482 { "Microsoft Word", "msword", 483 { 484 "WordDocument", NULL, NULL, NULL, NULL, 485 }, 486 { 487 CDF_DIR_TYPE_USER_STREAM, 488 0, 0, 0, 0, 489 }, 490 }, 491 { "Microsoft PowerPoint", "vnd.ms-powerpoint", 492 { 493 "PowerPoint", NULL, NULL, NULL, NULL, 494 }, 495 { 496 CDF_DIR_TYPE_USER_STREAM, 497 0, 0, 0, 0, 498 }, 499 }, 500 { "Microsoft Outlook Message", "vnd.ms-outlook", 501 { 502 "__properties_version1.0", 503 "__recip_version1.0_#00000000", 504 NULL, NULL, NULL, 505 }, 506 { 507 CDF_DIR_TYPE_USER_STREAM, 508 CDF_DIR_TYPE_USER_STORAGE, 509 0, 0, 0, 510 }, 511 }, 512 }; 513 514 private int 515 cdf_file_dir_info(struct magic_set *ms, const cdf_dir_t *dir) 516 { 517 size_t sd, j; 518 519 for (sd = 0; sd < __arraycount(sectioninfo); sd++) { 520 const struct sinfo *si = §ioninfo[sd]; 521 for (j = 0; si->sections[j]; j++) { 522 if (cdf_find_stream(dir, si->sections[j], si->types[j]) 523 > 0) 524 break; 525 #ifdef CDF_DEBUG 526 fprintf(stderr, "Can't read %s\n", si->sections[j]); 527 #endif 528 } 529 if (si->sections[j] == NULL) 530 continue; 531 if (NOTMIME(ms)) { 532 if (file_printf(ms, "CDFV2 %s", si->name) == -1) 533 return -1; 534 } else { 535 if (file_printf(ms, "application/%s", si->mime) == -1) 536 return -1; 537 } 538 return 1; 539 } 540 return -1; 541 } 542 543 protected int 544 file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf, 545 size_t nbytes) 546 { 547 cdf_info_t info; 548 cdf_header_t h; 549 cdf_sat_t sat, ssat; 550 cdf_stream_t sst, scn; 551 cdf_dir_t dir; 552 int i; 553 const char *expn = ""; 554 const cdf_directory_t *root_storage; 555 556 scn.sst_tab = NULL; 557 info.i_fd = fd; 558 info.i_buf = buf; 559 info.i_len = nbytes; 560 if (ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) 561 return 0; 562 if (cdf_read_header(&info, &h) == -1) 563 return 0; 564 #ifdef CDF_DEBUG 565 cdf_dump_header(&h); 566 #endif 567 568 if ((i = cdf_read_sat(&info, &h, &sat)) == -1) { 569 expn = "Can't read SAT"; 570 goto out0; 571 } 572 #ifdef CDF_DEBUG 573 cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h)); 574 #endif 575 576 if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) { 577 expn = "Can't read SSAT"; 578 goto out1; 579 } 580 #ifdef CDF_DEBUG 581 cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h)); 582 #endif 583 584 if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) { 585 expn = "Can't read directory"; 586 goto out2; 587 } 588 589 if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst, 590 &root_storage)) == -1) { 591 expn = "Cannot read short stream"; 592 goto out3; 593 } 594 #ifdef CDF_DEBUG 595 cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir); 596 #endif 597 #ifdef notdef 598 if (root_storage) { 599 if (NOTMIME(ms)) { 600 char clsbuf[128]; 601 if (file_printf(ms, "CLSID %s, ", 602 format_clsid(clsbuf, sizeof(clsbuf), 603 root_storage->d_storage_uuid)) == -1) 604 return -1; 605 } 606 } 607 #endif 608 609 if ((i = cdf_read_user_stream(&info, &h, &sat, &ssat, &sst, &dir, 610 "FileHeader", &scn)) != -1) { 611 #define HWP5_SIGNATURE "HWP Document File" 612 if (scn.sst_dirlen >= sizeof(HWP5_SIGNATURE) - 1 613 && memcmp(scn.sst_tab, HWP5_SIGNATURE, 614 sizeof(HWP5_SIGNATURE) - 1) == 0) { 615 if (NOTMIME(ms)) { 616 if (file_printf(ms, 617 "Hangul (Korean) Word Processor File 5.x") == -1) 618 return -1; 619 } else { 620 if (file_printf(ms, "application/x-hwp") == -1) 621 return -1; 622 } 623 i = 1; 624 goto out5; 625 } else { 626 cdf_zero_stream(&scn); 627 } 628 } 629 630 if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir, 631 &scn)) == -1) { 632 if (errno != ESRCH) { 633 expn = "Cannot read summary info"; 634 } 635 } else { 636 i = cdf_check_summary_info(ms, &info, &h, 637 &sat, &ssat, &sst, &dir, &scn, root_storage, &expn); 638 cdf_zero_stream(&scn); 639 } 640 if (i <= 0) { 641 if ((i = cdf_read_doc_summary_info(&info, &h, &sat, &ssat, 642 &sst, &dir, &scn)) == -1) { 643 if (errno != ESRCH) { 644 expn = "Cannot read summary info"; 645 } 646 } else { 647 i = cdf_check_summary_info(ms, &info, &h, &sat, &ssat, 648 &sst, &dir, &scn, root_storage, &expn); 649 } 650 } 651 if (i <= 0) { 652 i = cdf_file_dir_info(ms, &dir); 653 if (i < 0) 654 expn = "Cannot read section info"; 655 } 656 out5: 657 cdf_zero_stream(&scn); 658 cdf_zero_stream(&sst); 659 out3: 660 free(dir.dir_tab); 661 out2: 662 free(ssat.sat_tab); 663 out1: 664 free(sat.sat_tab); 665 out0: 666 if (i == -1) { 667 if (NOTMIME(ms)) { 668 if (file_printf(ms, 669 "Composite Document File V2 Document") == -1) 670 return -1; 671 if (*expn) 672 if (file_printf(ms, ", %s", expn) == -1) 673 return -1; 674 } else { 675 if (file_printf(ms, "application/CDFV2") == -1) 676 return -1; 677 } 678 i = 1; 679 } 680 return i; 681 } 682