1 /* $NetBSD: funcs.c,v 1.11 2017/02/10 17:53:24 christos Exp $ */ 2 3 /* 4 * Copyright (c) Christos Zoulas 2003. 5 * All Rights Reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice immediately at the beginning of the file, without modification, 12 * this list of conditions, and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 21 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 #include "file.h" 30 31 #ifndef lint 32 #if 0 33 FILE_RCSID("@(#)$File: funcs.c,v 1.91 2016/12/01 16:16:14 christos Exp $") 34 #else 35 __RCSID("$NetBSD: funcs.c,v 1.11 2017/02/10 17:53:24 christos Exp $"); 36 #endif 37 #endif /* lint */ 38 39 #include "magic.h" 40 #include <assert.h> 41 #include <stdarg.h> 42 #include <stdlib.h> 43 #include <string.h> 44 #include <ctype.h> 45 #if defined(HAVE_WCHAR_H) 46 #include <wchar.h> 47 #endif 48 #if defined(HAVE_WCTYPE_H) 49 #include <wctype.h> 50 #endif 51 #if defined(HAVE_LIMITS_H) 52 #include <limits.h> 53 #endif 54 55 #ifndef SIZE_MAX 56 #define SIZE_MAX ((size_t)~0) 57 #endif 58 59 /* 60 * Like printf, only we append to a buffer. 61 */ 62 protected int 63 file_vprintf(struct magic_set *ms, const char *fmt, va_list ap) 64 { 65 int len; 66 char *buf, *newstr; 67 68 if (ms->event_flags & EVENT_HAD_ERR) 69 return 0; 70 len = vasprintf(&buf, fmt, ap); 71 if (len < 0) 72 goto out; 73 74 if (ms->o.buf != NULL) { 75 len = asprintf(&newstr, "%s%s", ms->o.buf, buf); 76 free(buf); 77 if (len < 0) 78 goto out; 79 free(ms->o.buf); 80 buf = newstr; 81 } 82 ms->o.buf = buf; 83 return 0; 84 out: 85 fprintf(stderr, "vasprintf failed (%s)", strerror(errno)); 86 return -1; 87 } 88 89 protected int 90 file_printf(struct magic_set *ms, const char *fmt, ...) 91 { 92 int rv; 93 va_list ap; 94 95 va_start(ap, fmt); 96 rv = file_vprintf(ms, fmt, ap); 97 va_end(ap); 98 return rv; 99 } 100 101 /* 102 * error - print best error message possible 103 */ 104 /*VARARGS*/ 105 __attribute__((__format__(__printf__, 3, 0))) 106 private void 107 file_error_core(struct magic_set *ms, int error, const char *f, va_list va, 108 size_t lineno) 109 { 110 /* Only the first error is ok */ 111 if (ms->event_flags & EVENT_HAD_ERR) 112 return; 113 if (lineno != 0) { 114 free(ms->o.buf); 115 ms->o.buf = NULL; 116 file_printf(ms, "line %" SIZE_T_FORMAT "u:", lineno); 117 } 118 if (ms->o.buf && *ms->o.buf) 119 file_printf(ms, " "); 120 file_vprintf(ms, f, va); 121 if (error > 0) 122 file_printf(ms, " (%s)", strerror(error)); 123 ms->event_flags |= EVENT_HAD_ERR; 124 ms->error = error; 125 } 126 127 /*VARARGS*/ 128 protected void 129 file_error(struct magic_set *ms, int error, const char *f, ...) 130 { 131 va_list va; 132 va_start(va, f); 133 file_error_core(ms, error, f, va, 0); 134 va_end(va); 135 } 136 137 /* 138 * Print an error with magic line number. 139 */ 140 /*VARARGS*/ 141 protected void 142 file_magerror(struct magic_set *ms, const char *f, ...) 143 { 144 va_list va; 145 va_start(va, f); 146 file_error_core(ms, 0, f, va, ms->line); 147 va_end(va); 148 } 149 150 protected void 151 file_oomem(struct magic_set *ms, size_t len) 152 { 153 file_error(ms, errno, "cannot allocate %" SIZE_T_FORMAT "u bytes", 154 len); 155 } 156 157 protected void 158 file_badseek(struct magic_set *ms) 159 { 160 file_error(ms, errno, "error seeking"); 161 } 162 163 protected void 164 file_badread(struct magic_set *ms) 165 { 166 file_error(ms, errno, "error reading"); 167 } 168 169 #ifndef COMPILE_ONLY 170 171 static int 172 checkdone(struct magic_set *ms, int *rv) 173 { 174 if ((ms->flags & MAGIC_CONTINUE) == 0) 175 return 1; 176 if (file_printf(ms, "\n- ") == -1) 177 *rv = -1; 178 return 0; 179 } 180 181 /*ARGSUSED*/ 182 protected int 183 file_buffer(struct magic_set *ms, int fd, const char *inname __attribute__ ((__unused__)), 184 const void *buf, size_t nb) 185 { 186 int m = 0, rv = 0, looks_text = 0; 187 const unsigned char *ubuf = CAST(const unsigned char *, buf); 188 unichar *u8buf = NULL; 189 size_t ulen; 190 const char *code = NULL; 191 const char *code_mime = "binary"; 192 const char *type = "application/octet-stream"; 193 const char *def = "data"; 194 const char *ftype = NULL; 195 196 if (nb == 0) { 197 def = "empty"; 198 type = "application/x-empty"; 199 goto simple; 200 } else if (nb == 1) { 201 def = "very short file (no magic)"; 202 goto simple; 203 } 204 205 if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) { 206 looks_text = file_encoding(ms, ubuf, nb, &u8buf, &ulen, 207 &code, &code_mime, &ftype); 208 } 209 210 #ifdef __EMX__ 211 if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) { 212 m = file_os2_apptype(ms, inname, buf, nb); 213 if ((ms->flags & MAGIC_DEBUG) != 0) 214 (void)fprintf(stderr, "[try os2_apptype %d]\n", m); 215 switch (m) { 216 case -1: 217 return -1; 218 case 0: 219 break; 220 default: 221 return 1; 222 } 223 } 224 #endif 225 #if HAVE_FORK 226 /* try compression stuff */ 227 if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) == 0) { 228 m = file_zmagic(ms, fd, inname, ubuf, nb); 229 if ((ms->flags & MAGIC_DEBUG) != 0) 230 (void)fprintf(stderr, "[try zmagic %d]\n", m); 231 if (m) { 232 goto done_encoding; 233 } 234 } 235 #endif 236 /* Check if we have a tar file */ 237 if ((ms->flags & MAGIC_NO_CHECK_TAR) == 0) { 238 m = file_is_tar(ms, ubuf, nb); 239 if ((ms->flags & MAGIC_DEBUG) != 0) 240 (void)fprintf(stderr, "[try tar %d]\n", m); 241 if (m) { 242 if (checkdone(ms, &rv)) 243 goto done; 244 } 245 } 246 247 /* Check if we have a CDF file */ 248 if ((ms->flags & MAGIC_NO_CHECK_CDF) == 0) { 249 m = file_trycdf(ms, fd, ubuf, nb); 250 if ((ms->flags & MAGIC_DEBUG) != 0) 251 (void)fprintf(stderr, "[try cdf %d]\n", m); 252 if (m) { 253 if (checkdone(ms, &rv)) 254 goto done; 255 } 256 } 257 258 /* try soft magic tests */ 259 if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0) { 260 m = file_softmagic(ms, ubuf, nb, NULL, NULL, BINTEST, 261 looks_text); 262 if ((ms->flags & MAGIC_DEBUG) != 0) 263 (void)fprintf(stderr, "[try softmagic %d]\n", m); 264 if (m) { 265 #ifdef BUILTIN_ELF 266 if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && m == 1 && 267 nb > 5 && fd != -1) { 268 /* 269 * We matched something in the file, so this 270 * *might* be an ELF file, and the file is at 271 * least 5 bytes long, so if it's an ELF file 272 * it has at least one byte past the ELF magic 273 * number - try extracting information from the 274 * ELF headers that cannot easily * be 275 * extracted with rules in the magic file. 276 */ 277 m = file_tryelf(ms, fd, ubuf, nb); 278 if ((ms->flags & MAGIC_DEBUG) != 0) 279 (void)fprintf(stderr, "[try elf %d]\n", 280 m); 281 } 282 #endif 283 if (checkdone(ms, &rv)) 284 goto done; 285 } 286 } 287 288 /* try text properties */ 289 if ((ms->flags & MAGIC_NO_CHECK_TEXT) == 0) { 290 291 m = file_ascmagic(ms, ubuf, nb, looks_text); 292 if ((ms->flags & MAGIC_DEBUG) != 0) 293 (void)fprintf(stderr, "[try ascmagic %d]\n", m); 294 if (m) { 295 if (checkdone(ms, &rv)) 296 goto done; 297 } 298 } 299 300 simple: 301 /* give up */ 302 m = 1; 303 if (ms->flags & MAGIC_MIME) { 304 if ((ms->flags & MAGIC_MIME_TYPE) && 305 file_printf(ms, "%s", type) == -1) 306 rv = -1; 307 } else if (ms->flags & MAGIC_APPLE) { 308 if (file_printf(ms, "UNKNUNKN") == -1) 309 rv = -1; 310 } else if (ms->flags & MAGIC_EXTENSION) { 311 if (file_printf(ms, "???") == -1) 312 rv = -1; 313 } else { 314 if (file_printf(ms, "%s", def) == -1) 315 rv = -1; 316 } 317 done: 318 if ((ms->flags & MAGIC_MIME_ENCODING) != 0) { 319 if (ms->flags & MAGIC_MIME_TYPE) 320 if (file_printf(ms, "; charset=") == -1) 321 rv = -1; 322 if (file_printf(ms, "%s", code_mime) == -1) 323 rv = -1; 324 } 325 #if HAVE_FORK 326 done_encoding: 327 #endif 328 free(u8buf); 329 if (rv) 330 return rv; 331 332 return m; 333 } 334 #endif 335 336 protected int 337 file_reset(struct magic_set *ms) 338 { 339 if (ms->mlist[0] == NULL) { 340 file_error(ms, 0, "no magic files loaded"); 341 return -1; 342 } 343 if (ms->o.buf) { 344 free(ms->o.buf); 345 ms->o.buf = NULL; 346 } 347 if (ms->o.pbuf) { 348 free(ms->o.pbuf); 349 ms->o.pbuf = NULL; 350 } 351 ms->event_flags &= ~EVENT_HAD_ERR; 352 ms->error = -1; 353 return 0; 354 } 355 356 #define OCTALIFY(n, o) \ 357 /*LINTED*/ \ 358 (void)(*(n)++ = '\\', \ 359 *(n)++ = (((uint32_t)*(o) >> 6) & 3) + '0', \ 360 *(n)++ = (((uint32_t)*(o) >> 3) & 7) + '0', \ 361 *(n)++ = (((uint32_t)*(o) >> 0) & 7) + '0', \ 362 (o)++) 363 364 protected const char * 365 file_getbuffer(struct magic_set *ms) 366 { 367 char *pbuf, *op, *np; 368 size_t psize, len; 369 370 if (ms->event_flags & EVENT_HAD_ERR) 371 return NULL; 372 373 if (ms->flags & MAGIC_RAW) 374 return ms->o.buf; 375 376 if (ms->o.buf == NULL) 377 return NULL; 378 379 /* * 4 is for octal representation, + 1 is for NUL */ 380 len = strlen(ms->o.buf); 381 if (len > (SIZE_MAX - 1) / 4) { 382 file_oomem(ms, len); 383 return NULL; 384 } 385 psize = len * 4 + 1; 386 if ((pbuf = CAST(char *, realloc(ms->o.pbuf, psize))) == NULL) { 387 file_oomem(ms, psize); 388 return NULL; 389 } 390 ms->o.pbuf = pbuf; 391 392 #if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) 393 { 394 mbstate_t state; 395 wchar_t nextchar; 396 int mb_conv = 1; 397 size_t bytesconsumed; 398 char *eop; 399 (void)memset(&state, 0, sizeof(mbstate_t)); 400 401 np = ms->o.pbuf; 402 op = ms->o.buf; 403 eop = op + len; 404 405 while (op < eop) { 406 bytesconsumed = mbrtowc(&nextchar, op, 407 (size_t)(eop - op), &state); 408 if (bytesconsumed == (size_t)(-1) || 409 bytesconsumed == (size_t)(-2)) { 410 mb_conv = 0; 411 break; 412 } 413 414 if (iswprint(nextchar)) { 415 (void)memcpy(np, op, bytesconsumed); 416 op += bytesconsumed; 417 np += bytesconsumed; 418 } else { 419 while (bytesconsumed-- > 0) 420 OCTALIFY(np, op); 421 } 422 } 423 *np = '\0'; 424 425 /* Parsing succeeded as a multi-byte sequence */ 426 if (mb_conv != 0) 427 return ms->o.pbuf; 428 } 429 #endif 430 431 for (np = ms->o.pbuf, op = ms->o.buf; *op;) { 432 if (isprint((unsigned char)*op)) { 433 *np++ = *op++; 434 } else { 435 OCTALIFY(np, op); 436 } 437 } 438 *np = '\0'; 439 return ms->o.pbuf; 440 } 441 442 protected int 443 file_check_mem(struct magic_set *ms, unsigned int level) 444 { 445 size_t len; 446 447 if (level >= ms->c.len) { 448 len = (ms->c.len = 20 + level) * sizeof(*ms->c.li); 449 ms->c.li = CAST(struct level_info *, (ms->c.li == NULL) ? 450 malloc(len) : 451 realloc(ms->c.li, len)); 452 if (ms->c.li == NULL) { 453 file_oomem(ms, len); 454 return -1; 455 } 456 } 457 ms->c.li[level].got_match = 0; 458 #ifdef ENABLE_CONDITIONALS 459 ms->c.li[level].last_match = 0; 460 ms->c.li[level].last_cond = COND_NONE; 461 #endif /* ENABLE_CONDITIONALS */ 462 return 0; 463 } 464 465 protected size_t 466 file_printedlen(const struct magic_set *ms) 467 { 468 return ms->o.buf == NULL ? 0 : strlen(ms->o.buf); 469 } 470 471 protected int 472 file_replace(struct magic_set *ms, const char *pat, const char *rep) 473 { 474 file_regex_t rx; 475 int rc, rv = -1; 476 477 rc = file_regcomp(&rx, pat, REG_EXTENDED); 478 if (rc) { 479 file_regerror(&rx, rc, ms); 480 } else { 481 regmatch_t rm; 482 int nm = 0; 483 while (file_regexec(&rx, ms->o.buf, 1, &rm, 0) == 0) { 484 ms->o.buf[rm.rm_so] = '\0'; 485 if (file_printf(ms, "%s%s", rep, 486 rm.rm_eo != 0 ? ms->o.buf + rm.rm_eo : "") == -1) 487 goto out; 488 nm++; 489 } 490 rv = nm; 491 } 492 out: 493 file_regfree(&rx); 494 return rv; 495 } 496 497 protected int 498 file_regcomp(file_regex_t *rx, const char *pat, int flags) 499 { 500 #ifdef USE_C_LOCALE 501 rx->c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0); 502 assert(rx->c_lc_ctype != NULL); 503 rx->old_lc_ctype = uselocale(rx->c_lc_ctype); 504 assert(rx->old_lc_ctype != NULL); 505 #else 506 rx->old_lc_ctype = setlocale(LC_CTYPE, "C"); 507 #endif 508 rx->pat = pat; 509 510 return rx->rc = regcomp(&rx->rx, pat, flags); 511 } 512 513 protected int 514 file_regexec(file_regex_t *rx, const char *str, size_t nmatch, 515 regmatch_t* pmatch, int eflags) 516 { 517 assert(rx->rc == 0); 518 return regexec(&rx->rx, str, nmatch, pmatch, eflags); 519 } 520 521 protected void 522 file_regfree(file_regex_t *rx) 523 { 524 if (rx->rc == 0) 525 regfree(&rx->rx); 526 #ifdef USE_C_LOCALE 527 (void)uselocale(rx->old_lc_ctype); 528 freelocale(rx->c_lc_ctype); 529 #else 530 (void)setlocale(LC_CTYPE, rx->old_lc_ctype); 531 #endif 532 } 533 534 protected void 535 file_regerror(file_regex_t *rx, int rc, struct magic_set *ms) 536 { 537 char errmsg[512]; 538 539 (void)regerror(rc, &rx->rx, errmsg, sizeof(errmsg)); 540 file_magerror(ms, "regex error %d for `%s', (%s)", rc, rx->pat, 541 errmsg); 542 } 543 544 protected file_pushbuf_t * 545 file_push_buffer(struct magic_set *ms) 546 { 547 file_pushbuf_t *pb; 548 549 if (ms->event_flags & EVENT_HAD_ERR) 550 return NULL; 551 552 if ((pb = (CAST(file_pushbuf_t *, malloc(sizeof(*pb))))) == NULL) 553 return NULL; 554 555 pb->buf = ms->o.buf; 556 pb->offset = ms->offset; 557 558 ms->o.buf = NULL; 559 ms->offset = 0; 560 561 return pb; 562 } 563 564 protected char * 565 file_pop_buffer(struct magic_set *ms, file_pushbuf_t *pb) 566 { 567 char *rbuf; 568 569 if (ms->event_flags & EVENT_HAD_ERR) { 570 free(pb->buf); 571 free(pb); 572 return NULL; 573 } 574 575 rbuf = ms->o.buf; 576 577 ms->o.buf = pb->buf; 578 ms->offset = pb->offset; 579 580 free(pb); 581 return rbuf; 582 } 583 584 /* 585 * convert string to ascii printable format. 586 */ 587 protected char * 588 file_printable(char *buf, size_t bufsiz, const char *str) 589 { 590 char *ptr, *eptr; 591 const unsigned char *s = (const unsigned char *)str; 592 593 for (ptr = buf, eptr = ptr + bufsiz - 1; ptr < eptr && *s; s++) { 594 if (isprint(*s)) { 595 *ptr++ = *s; 596 continue; 597 } 598 if (ptr >= eptr - 3) 599 break; 600 *ptr++ = '\\'; 601 *ptr++ = ((CAST(unsigned int, *s) >> 6) & 7) + '0'; 602 *ptr++ = ((CAST(unsigned int, *s) >> 3) & 7) + '0'; 603 *ptr++ = ((CAST(unsigned int, *s) >> 0) & 7) + '0'; 604 } 605 *ptr = '\0'; 606 return buf; 607 } 608