1 /* $NetBSD: readcdf.c,v 1.10 2014/06/13 02:08:06 christos Exp $ */ 2 /*- 3 * Copyright (c) 2008 Christos Zoulas 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 16 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 17 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 19 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 * POSSIBILITY OF SUCH DAMAGE. 26 */ 27 #include "file.h" 28 29 #ifndef lint 30 #if 0 31 FILE_RCSID("@(#)$File: readcdf.c,v 1.44 2014/05/14 23:22:48 christos Exp $") 32 #else 33 __RCSID("$NetBSD: readcdf.c,v 1.10 2014/06/13 02:08:06 christos Exp $"); 34 #endif 35 #endif 36 37 #include <assert.h> 38 #include <stdlib.h> 39 #include <unistd.h> 40 #include <string.h> 41 #include <time.h> 42 #include <ctype.h> 43 #if defined(HAVE_LOCALE_H) 44 #include <locale.h> 45 #endif 46 47 #include "cdf.h" 48 #include "magic.h" 49 50 #ifndef __arraycount 51 #define __arraycount(a) (sizeof(a) / sizeof(a[0])) 52 #endif 53 54 #define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0) 55 56 static const struct nv { 57 const char *pattern; 58 const char *mime; 59 } app2mime[] = { 60 { "Word", "msword", }, 61 { "Excel", "vnd.ms-excel", }, 62 { "Powerpoint", "vnd.ms-powerpoint", }, 63 { "Crystal Reports", "x-rpt", }, 64 { "Advanced Installer", "vnd.ms-msi", }, 65 { "InstallShield", "vnd.ms-msi", }, 66 { "Microsoft Patch Compiler", "vnd.ms-msi", }, 67 { "NAnt", "vnd.ms-msi", }, 68 { "Windows Installer", "vnd.ms-msi", }, 69 { NULL, NULL, }, 70 }, name2mime[] = { 71 { "WordDocument", "msword", }, 72 { "PowerPoint", "vnd.ms-powerpoint", }, 73 { "DigitalSignature", "vnd.ms-msi", }, 74 { NULL, NULL, }, 75 }, name2desc[] = { 76 { "WordDocument", "Microsoft Office Word",}, 77 { "PowerPoint", "Microsoft PowerPoint", }, 78 { "DigitalSignature", "Microsoft Installer", }, 79 { NULL, NULL, }, 80 }; 81 82 static const struct cv { 83 uint64_t clsid[2]; 84 const char *mime; 85 } clsid2mime[] = { 86 { 87 { 0x00000000000c1084LLU, 0x46000000000000c0LLU }, 88 "x-msi", 89 }, 90 { { 0, 0 }, 91 NULL, 92 }, 93 }, clsid2desc[] = { 94 { 95 { 0x00000000000c1084LLU, 0x46000000000000c0LLU }, 96 "MSI Installer", 97 }, 98 { { 0, 0 }, 99 NULL, 100 }, 101 }; 102 103 private const char * 104 cdf_clsid_to_mime(const uint64_t clsid[2], const struct cv *cv) 105 { 106 size_t i; 107 for (i = 0; cv[i].mime != NULL; i++) { 108 if (clsid[0] == cv[i].clsid[0] && clsid[1] == cv[i].clsid[1]) 109 return cv[i].mime; 110 } 111 return NULL; 112 } 113 114 private const char * 115 cdf_app_to_mime(const char *vbuf, const struct nv *nv) 116 { 117 size_t i; 118 const char *rv = NULL; 119 char *old_lc_ctype; 120 121 old_lc_ctype = setlocale(LC_CTYPE, NULL); 122 assert(old_lc_ctype != NULL); 123 old_lc_ctype = strdup(old_lc_ctype); 124 assert(old_lc_ctype != NULL); 125 (void)setlocale(LC_CTYPE, "C"); 126 for (i = 0; nv[i].pattern != NULL; i++) 127 if (strcasestr(vbuf, nv[i].pattern) != NULL) { 128 rv = nv[i].mime; 129 break; 130 } 131 (void)setlocale(LC_CTYPE, old_lc_ctype); 132 free(old_lc_ctype); 133 return rv; 134 } 135 136 private int 137 cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info, 138 size_t count, const cdf_directory_t *root_storage) 139 { 140 size_t i; 141 cdf_timestamp_t tp; 142 struct timespec ts; 143 char buf[64]; 144 const char *str = NULL; 145 const char *s; 146 int len; 147 148 if (!NOTMIME(ms) && root_storage) 149 str = cdf_clsid_to_mime(root_storage->d_storage_uuid, 150 clsid2mime); 151 152 for (i = 0; i < count; i++) { 153 cdf_print_property_name(buf, sizeof(buf), info[i].pi_id); 154 switch (info[i].pi_type) { 155 case CDF_NULL: 156 break; 157 case CDF_SIGNED16: 158 if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf, 159 info[i].pi_s16) == -1) 160 return -1; 161 break; 162 case CDF_SIGNED32: 163 if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf, 164 info[i].pi_s32) == -1) 165 return -1; 166 break; 167 case CDF_UNSIGNED32: 168 if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf, 169 info[i].pi_u32) == -1) 170 return -1; 171 break; 172 case CDF_FLOAT: 173 if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf, 174 info[i].pi_f) == -1) 175 return -1; 176 break; 177 case CDF_DOUBLE: 178 if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf, 179 info[i].pi_d) == -1) 180 return -1; 181 break; 182 case CDF_LENGTH32_STRING: 183 case CDF_LENGTH32_WSTRING: 184 len = info[i].pi_str.s_len; 185 if (len > 1) { 186 char vbuf[1024]; 187 size_t j, k = 1; 188 189 if (info[i].pi_type == CDF_LENGTH32_WSTRING) 190 k++; 191 s = info[i].pi_str.s_buf; 192 for (j = 0; j < sizeof(vbuf) && len--; s += k) { 193 if (*s == '\0') 194 break; 195 if (isprint((unsigned char)*s)) 196 vbuf[j++] = *s; 197 } 198 if (j == sizeof(vbuf)) 199 --j; 200 vbuf[j] = '\0'; 201 if (NOTMIME(ms)) { 202 if (vbuf[0]) { 203 if (file_printf(ms, ", %s: %s", 204 buf, vbuf) == -1) 205 return -1; 206 } 207 } else if (str == NULL && info[i].pi_id == 208 CDF_PROPERTY_NAME_OF_APPLICATION) { 209 str = cdf_app_to_mime(vbuf, app2mime); 210 } 211 } 212 break; 213 case CDF_FILETIME: 214 tp = info[i].pi_tp; 215 if (tp != 0) { 216 char tbuf[64]; 217 if (tp < 1000000000000000LL) { 218 cdf_print_elapsed_time(tbuf, 219 sizeof(tbuf), tp); 220 if (NOTMIME(ms) && file_printf(ms, 221 ", %s: %s", buf, tbuf) == -1) 222 return -1; 223 } else { 224 char *c, *ec; 225 cdf_timestamp_to_timespec(&ts, tp); 226 c = cdf_ctime(&ts.tv_sec, tbuf); 227 if (c != NULL && 228 (ec = strchr(c, '\n')) != NULL) 229 *ec = '\0'; 230 231 if (NOTMIME(ms) && file_printf(ms, 232 ", %s: %s", buf, c) == -1) 233 return -1; 234 } 235 } 236 break; 237 case CDF_CLIPBOARD: 238 break; 239 default: 240 return -1; 241 } 242 } 243 if (!NOTMIME(ms)) { 244 if (str == NULL) 245 return 0; 246 if (file_printf(ms, "application/%s", str) == -1) 247 return -1; 248 } 249 return 1; 250 } 251 252 private int 253 cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h, 254 const cdf_stream_t *sst, const cdf_directory_t *root_storage) 255 { 256 cdf_summary_info_header_t si; 257 cdf_property_info_t *info; 258 size_t count; 259 int m; 260 261 if (cdf_unpack_summary_info(sst, h, &si, &info, &count) == -1) 262 return -1; 263 264 if (NOTMIME(ms)) { 265 const char *str; 266 267 if (file_printf(ms, "Composite Document File V2 Document") 268 == -1) 269 return -1; 270 271 if (file_printf(ms, ", %s Endian", 272 si.si_byte_order == 0xfffe ? "Little" : "Big") == -1) 273 return -2; 274 switch (si.si_os) { 275 case 2: 276 if (file_printf(ms, ", Os: Windows, Version %d.%d", 277 si.si_os_version & 0xff, 278 (uint32_t)si.si_os_version >> 8) == -1) 279 return -2; 280 break; 281 case 1: 282 if (file_printf(ms, ", Os: MacOS, Version %d.%d", 283 (uint32_t)si.si_os_version >> 8, 284 si.si_os_version & 0xff) == -1) 285 return -2; 286 break; 287 default: 288 if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os, 289 si.si_os_version & 0xff, 290 (uint32_t)si.si_os_version >> 8) == -1) 291 return -2; 292 break; 293 } 294 if (root_storage) { 295 str = cdf_clsid_to_mime(root_storage->d_storage_uuid, 296 clsid2desc); 297 if (str) 298 if (file_printf(ms, ", %s", str) == -1) 299 return -2; 300 } 301 } 302 303 m = cdf_file_property_info(ms, info, count, root_storage); 304 free(info); 305 306 return m == -1 ? -2 : m; 307 } 308 309 #ifdef notdef 310 private char * 311 format_clsid(char *buf, size_t len, const uint64_t uuid[2]) { 312 snprintf(buf, len, "%.8" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.4" 313 PRIx64 "-%.12" PRIx64, 314 (uuid[0] >> 32) & (uint64_t)0x000000000ffffffffLLU, 315 (uuid[0] >> 16) & (uint64_t)0x0000000000000ffffLLU, 316 (uuid[0] >> 0) & (uint64_t)0x0000000000000ffffLLU, 317 (uuid[1] >> 48) & (uint64_t)0x0000000000000ffffLLU, 318 (uuid[1] >> 0) & (uint64_t)0x0000fffffffffffffLLU); 319 return buf; 320 } 321 #endif 322 323 protected int 324 file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf, 325 size_t nbytes) 326 { 327 cdf_info_t info; 328 cdf_header_t h; 329 cdf_sat_t sat, ssat; 330 cdf_stream_t sst, scn; 331 cdf_dir_t dir; 332 int i; 333 const char *expn = ""; 334 const char *corrupt = "corrupt: "; 335 336 info.i_fd = fd; 337 info.i_buf = buf; 338 info.i_len = nbytes; 339 if (ms->flags & MAGIC_APPLE) 340 return 0; 341 if (cdf_read_header(&info, &h) == -1) 342 return 0; 343 #ifdef CDF_DEBUG 344 cdf_dump_header(&h); 345 #endif 346 347 if ((i = cdf_read_sat(&info, &h, &sat)) == -1) { 348 expn = "Can't read SAT"; 349 goto out0; 350 } 351 #ifdef CDF_DEBUG 352 cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h)); 353 #endif 354 355 if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) { 356 expn = "Can't read SSAT"; 357 goto out1; 358 } 359 #ifdef CDF_DEBUG 360 cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h)); 361 #endif 362 363 if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) { 364 expn = "Can't read directory"; 365 goto out2; 366 } 367 368 const cdf_directory_t *root_storage; 369 if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst, 370 &root_storage)) == -1) { 371 expn = "Cannot read short stream"; 372 goto out3; 373 } 374 #ifdef CDF_DEBUG 375 cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir); 376 #endif 377 #ifdef notdef 378 if (root_storage) { 379 if (NOTMIME(ms)) { 380 char clsbuf[128]; 381 if (file_printf(ms, "CLSID %s, ", 382 format_clsid(clsbuf, sizeof(clsbuf), 383 root_storage->d_storage_uuid)) == -1) 384 return -1; 385 } 386 } 387 #endif 388 389 if ((i = cdf_read_user_stream(&info, &h, &sat, &ssat, &sst, &dir, 390 "FileHeader", &scn)) != -1) { 391 #define HWP5_SIGNATURE "HWP Document File" 392 if (scn.sst_dirlen >= sizeof(HWP5_SIGNATURE) - 1 393 && memcmp(scn.sst_tab, HWP5_SIGNATURE, 394 sizeof(HWP5_SIGNATURE) - 1) == 0) { 395 if (NOTMIME(ms)) { 396 if (file_printf(ms, 397 "Hangul (Korean) Word Processor File 5.x") == -1) 398 return -1; 399 } else { 400 if (file_printf(ms, "application/x-hwp") == -1) 401 return -1; 402 } 403 i = 1; 404 goto out5; 405 } else { 406 free(scn.sst_tab); 407 scn.sst_tab = NULL; 408 scn.sst_len = 0; 409 scn.sst_dirlen = 0; 410 } 411 } 412 413 if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir, 414 &scn)) == -1) { 415 if (errno == ESRCH) { 416 corrupt = expn; 417 expn = "No summary info"; 418 } else { 419 expn = "Cannot read summary info"; 420 } 421 goto out4; 422 } 423 #ifdef CDF_DEBUG 424 cdf_dump_summary_info(&h, &scn); 425 #endif 426 if ((i = cdf_file_summary_info(ms, &h, &scn, root_storage)) < 0) 427 expn = "Can't expand summary_info"; 428 429 if (i == 0) { 430 const char *str = NULL; 431 cdf_directory_t *d; 432 char name[__arraycount(d->d_name)]; 433 size_t j, k; 434 435 for (j = 0; str == NULL && j < dir.dir_len; j++) { 436 d = &dir.dir_tab[j]; 437 for (k = 0; k < sizeof(name); k++) 438 name[k] = (char)cdf_tole2(d->d_name[k]); 439 str = cdf_app_to_mime(name, 440 NOTMIME(ms) ? name2desc : name2mime); 441 } 442 if (NOTMIME(ms)) { 443 if (str != NULL) { 444 if (file_printf(ms, "%s", str) == -1) 445 return -1; 446 i = 1; 447 } 448 } else { 449 if (str == NULL) 450 str = "vnd.ms-office"; 451 if (file_printf(ms, "application/%s", str) == -1) 452 return -1; 453 i = 1; 454 } 455 } 456 out5: 457 free(scn.sst_tab); 458 out4: 459 free(sst.sst_tab); 460 out3: 461 free(dir.dir_tab); 462 out2: 463 free(ssat.sat_tab); 464 out1: 465 free(sat.sat_tab); 466 out0: 467 if (i == -1) { 468 if (NOTMIME(ms)) { 469 if (file_printf(ms, 470 "Composite Document File V2 Document") == -1) 471 return -1; 472 if (*expn) 473 if (file_printf(ms, ", %s%s", corrupt, expn) == -1) 474 return -1; 475 } else { 476 if (file_printf(ms, "application/CDFV2-corrupt") == -1) 477 return -1; 478 } 479 i = 1; 480 } 481 return i; 482 } 483