1 /* $NetBSD: funcs.c,v 1.17 2019/12/17 02:31:05 christos Exp $ */ 2 3 /* 4 * Copyright (c) Christos Zoulas 2003. 5 * All Rights Reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice immediately at the beginning of the file, without modification, 12 * this list of conditions, and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 21 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 #include "file.h" 30 31 #ifndef lint 32 #if 0 33 FILE_RCSID("@(#)$File: funcs.c,v 1.108 2019/11/09 00:35:46 christos Exp $") 34 #else 35 __RCSID("$NetBSD: funcs.c,v 1.17 2019/12/17 02:31:05 christos Exp $"); 36 #endif 37 #endif /* lint */ 38 39 #include "magic.h" 40 #include <assert.h> 41 #include <stdarg.h> 42 #include <stdlib.h> 43 #include <string.h> 44 #include <ctype.h> 45 #if defined(HAVE_WCHAR_H) 46 #include <wchar.h> 47 #endif 48 #if defined(HAVE_WCTYPE_H) 49 #include <wctype.h> 50 #endif 51 #include <limits.h> 52 53 #ifndef SIZE_MAX 54 #define SIZE_MAX ((size_t)~0) 55 #endif 56 57 /* 58 * Like printf, only we append to a buffer. 59 */ 60 protected int 61 file_vprintf(struct magic_set *ms, const char *fmt, va_list ap) 62 { 63 int len; 64 char *buf, *newstr; 65 66 if (ms->event_flags & EVENT_HAD_ERR) 67 return 0; 68 len = vasprintf(&buf, fmt, ap); 69 if (len < 0) 70 goto out; 71 72 if (ms->o.buf != NULL) { 73 len = asprintf(&newstr, "%s%s", ms->o.buf, buf); 74 free(buf); 75 if (len < 0) 76 goto out; 77 free(ms->o.buf); 78 buf = newstr; 79 } 80 ms->o.buf = buf; 81 return 0; 82 out: 83 fprintf(stderr, "vasprintf failed (%s)", strerror(errno)); 84 return -1; 85 } 86 87 protected int 88 file_printf(struct magic_set *ms, const char *fmt, ...) 89 { 90 int rv; 91 va_list ap; 92 93 va_start(ap, fmt); 94 rv = file_vprintf(ms, fmt, ap); 95 va_end(ap); 96 return rv; 97 } 98 99 /* 100 * error - print best error message possible 101 */ 102 /*VARARGS*/ 103 __attribute__((__format__(__printf__, 3, 0))) 104 private void 105 file_error_core(struct magic_set *ms, int error, const char *f, va_list va, 106 size_t lineno) 107 { 108 /* Only the first error is ok */ 109 if (ms->event_flags & EVENT_HAD_ERR) 110 return; 111 if (lineno != 0) { 112 free(ms->o.buf); 113 ms->o.buf = NULL; 114 (void)file_printf(ms, "line %" SIZE_T_FORMAT "u:", lineno); 115 } 116 if (ms->o.buf && *ms->o.buf) 117 (void)file_printf(ms, " "); 118 (void)file_vprintf(ms, f, va); 119 if (error > 0) 120 (void)file_printf(ms, " (%s)", strerror(error)); 121 ms->event_flags |= EVENT_HAD_ERR; 122 ms->error = error; 123 } 124 125 /*VARARGS*/ 126 protected void 127 file_error(struct magic_set *ms, int error, const char *f, ...) 128 { 129 va_list va; 130 va_start(va, f); 131 file_error_core(ms, error, f, va, 0); 132 va_end(va); 133 } 134 135 /* 136 * Print an error with magic line number. 137 */ 138 /*VARARGS*/ 139 protected void 140 file_magerror(struct magic_set *ms, const char *f, ...) 141 { 142 va_list va; 143 va_start(va, f); 144 file_error_core(ms, 0, f, va, ms->line); 145 va_end(va); 146 } 147 148 protected void 149 file_oomem(struct magic_set *ms, size_t len) 150 { 151 file_error(ms, errno, "cannot allocate %" SIZE_T_FORMAT "u bytes", 152 len); 153 } 154 155 protected void 156 file_badseek(struct magic_set *ms) 157 { 158 file_error(ms, errno, "error seeking"); 159 } 160 161 protected void 162 file_badread(struct magic_set *ms) 163 { 164 file_error(ms, errno, "error reading"); 165 } 166 167 #ifndef COMPILE_ONLY 168 169 protected int 170 file_separator(struct magic_set *ms) 171 { 172 return file_printf(ms, "\n- "); 173 } 174 175 static int 176 checkdone(struct magic_set *ms, int *rv) 177 { 178 if ((ms->flags & MAGIC_CONTINUE) == 0) 179 return 1; 180 if (file_separator(ms) == -1) 181 *rv = -1; 182 return 0; 183 } 184 185 protected int 186 file_default(struct magic_set *ms, size_t nb) 187 { 188 if (ms->flags & MAGIC_MIME) { 189 if ((ms->flags & MAGIC_MIME_TYPE) && 190 file_printf(ms, "application/%s", 191 nb ? "octet-stream" : "x-empty") == -1) 192 return -1; 193 return 1; 194 } 195 if (ms->flags & MAGIC_APPLE) { 196 if (file_printf(ms, "UNKNUNKN") == -1) 197 return -1; 198 return 1; 199 } 200 if (ms->flags & MAGIC_EXTENSION) { 201 if (file_printf(ms, "???") == -1) 202 return -1; 203 return 1; 204 } 205 return 0; 206 } 207 208 /* 209 * The magic detection functions return: 210 * 1: found 211 * 0: not found 212 * -1: error 213 */ 214 /*ARGSUSED*/ 215 protected int 216 file_buffer(struct magic_set *ms, int fd, struct stat *st, 217 const char *inname __attribute__ ((__unused__)), 218 const void *buf, size_t nb) 219 { 220 int m = 0, rv = 0, looks_text = 0; 221 const char *code = NULL; 222 const char *code_mime = "binary"; 223 const char *def = "data"; 224 const char *ftype = NULL; 225 char *rbuf = NULL; 226 struct buffer b; 227 228 buffer_init(&b, fd, st, buf, nb); 229 ms->mode = b.st.st_mode; 230 231 if (nb == 0) { 232 def = "empty"; 233 goto simple; 234 } else if (nb == 1) { 235 def = "very short file (no magic)"; 236 goto simple; 237 } 238 239 if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) { 240 looks_text = file_encoding(ms, &b, NULL, 0, 241 &code, &code_mime, &ftype); 242 } 243 244 #ifdef __EMX__ 245 if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) { 246 m = file_os2_apptype(ms, inname, &b); 247 if ((ms->flags & MAGIC_DEBUG) != 0) 248 (void)fprintf(stderr, "[try os2_apptype %d]\n", m); 249 switch (m) { 250 case -1: 251 return -1; 252 case 0: 253 break; 254 default: 255 return 1; 256 } 257 } 258 #endif 259 #if HAVE_FORK 260 /* try compression stuff */ 261 if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) == 0) { 262 m = file_zmagic(ms, &b, inname); 263 if ((ms->flags & MAGIC_DEBUG) != 0) 264 (void)fprintf(stderr, "[try zmagic %d]\n", m); 265 if (m) { 266 goto done_encoding; 267 } 268 } 269 #endif 270 /* Check if we have a tar file */ 271 if ((ms->flags & MAGIC_NO_CHECK_TAR) == 0) { 272 m = file_is_tar(ms, &b); 273 if ((ms->flags & MAGIC_DEBUG) != 0) 274 (void)fprintf(stderr, "[try tar %d]\n", m); 275 if (m) { 276 if (checkdone(ms, &rv)) 277 goto done; 278 } 279 } 280 281 /* Check if we have a JSON file */ 282 if ((ms->flags & MAGIC_NO_CHECK_JSON) == 0) { 283 m = file_is_json(ms, &b); 284 if ((ms->flags & MAGIC_DEBUG) != 0) 285 (void)fprintf(stderr, "[try json %d]\n", m); 286 if (m) { 287 if (checkdone(ms, &rv)) 288 goto done; 289 } 290 } 291 292 /* Check if we have a CSV file */ 293 if ((ms->flags & MAGIC_NO_CHECK_CSV) == 0) { 294 m = file_is_csv(ms, &b, looks_text); 295 if ((ms->flags & MAGIC_DEBUG) != 0) 296 (void)fprintf(stderr, "[try csv %d]\n", m); 297 if (m) { 298 if (checkdone(ms, &rv)) 299 goto done; 300 } 301 } 302 303 /* Check if we have a CDF file */ 304 if ((ms->flags & MAGIC_NO_CHECK_CDF) == 0) { 305 m = file_trycdf(ms, &b); 306 if ((ms->flags & MAGIC_DEBUG) != 0) 307 (void)fprintf(stderr, "[try cdf %d]\n", m); 308 if (m) { 309 if (checkdone(ms, &rv)) 310 goto done; 311 } 312 } 313 #ifdef BUILTIN_ELF 314 if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && nb > 5 && fd != -1) { 315 file_pushbuf_t *pb; 316 /* 317 * We matched something in the file, so this 318 * *might* be an ELF file, and the file is at 319 * least 5 bytes long, so if it's an ELF file 320 * it has at least one byte past the ELF magic 321 * number - try extracting information from the 322 * ELF headers that cannot easily be extracted 323 * with rules in the magic file. We we don't 324 * print the information yet. 325 */ 326 if ((pb = file_push_buffer(ms)) == NULL) 327 return -1; 328 329 rv = file_tryelf(ms, &b); 330 rbuf = file_pop_buffer(ms, pb); 331 if (rv == -1) { 332 free(rbuf); 333 rbuf = NULL; 334 } 335 if ((ms->flags & MAGIC_DEBUG) != 0) 336 (void)fprintf(stderr, "[try elf %d]\n", m); 337 } 338 #endif 339 340 /* try soft magic tests */ 341 if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0) { 342 m = file_softmagic(ms, &b, NULL, NULL, BINTEST, looks_text); 343 if ((ms->flags & MAGIC_DEBUG) != 0) 344 (void)fprintf(stderr, "[try softmagic %d]\n", m); 345 if (m == 1 && rbuf) { 346 if (file_printf(ms, "%s", rbuf) == -1) 347 goto done; 348 } 349 if (m) { 350 if (checkdone(ms, &rv)) 351 goto done; 352 } 353 } 354 355 /* try text properties */ 356 if ((ms->flags & MAGIC_NO_CHECK_TEXT) == 0) { 357 358 m = file_ascmagic(ms, &b, looks_text); 359 if ((ms->flags & MAGIC_DEBUG) != 0) 360 (void)fprintf(stderr, "[try ascmagic %d]\n", m); 361 if (m) { 362 goto done; 363 } 364 } 365 366 simple: 367 /* give up */ 368 if (m == 0) { 369 m = 1; 370 rv = file_default(ms, nb); 371 if (rv == 0) 372 if (file_printf(ms, "%s", def) == -1) 373 rv = -1; 374 } 375 done: 376 if ((ms->flags & MAGIC_MIME_ENCODING) != 0) { 377 if (ms->flags & MAGIC_MIME_TYPE) 378 if (file_printf(ms, "; charset=") == -1) 379 rv = -1; 380 if (file_printf(ms, "%s", code_mime) == -1) 381 rv = -1; 382 } 383 #if HAVE_FORK 384 done_encoding: 385 #endif 386 free(rbuf); 387 buffer_fini(&b); 388 if (rv) 389 return rv; 390 391 return m; 392 } 393 #endif 394 395 protected int 396 file_reset(struct magic_set *ms, int checkloaded) 397 { 398 if (checkloaded && ms->mlist[0] == NULL) { 399 file_error(ms, 0, "no magic files loaded"); 400 return -1; 401 } 402 if (ms->o.buf) { 403 free(ms->o.buf); 404 ms->o.buf = NULL; 405 } 406 if (ms->o.pbuf) { 407 free(ms->o.pbuf); 408 ms->o.pbuf = NULL; 409 } 410 ms->event_flags &= ~EVENT_HAD_ERR; 411 ms->error = -1; 412 return 0; 413 } 414 415 #define OCTALIFY(n, o) \ 416 /*LINTED*/ \ 417 (void)(*(n)++ = '\\', \ 418 *(n)++ = ((CAST(uint32_t, *(o)) >> 6) & 3) + '0', \ 419 *(n)++ = ((CAST(uint32_t, *(o)) >> 3) & 7) + '0', \ 420 *(n)++ = ((CAST(uint32_t, *(o)) >> 0) & 7) + '0', \ 421 (o)++) 422 423 protected const char * 424 file_getbuffer(struct magic_set *ms) 425 { 426 char *pbuf, *op, *np; 427 size_t psize, len; 428 429 if (ms->event_flags & EVENT_HAD_ERR) 430 return NULL; 431 432 if (ms->flags & MAGIC_RAW) 433 return ms->o.buf; 434 435 if (ms->o.buf == NULL) 436 return NULL; 437 438 /* * 4 is for octal representation, + 1 is for NUL */ 439 len = strlen(ms->o.buf); 440 if (len > (SIZE_MAX - 1) / 4) { 441 file_oomem(ms, len); 442 return NULL; 443 } 444 psize = len * 4 + 1; 445 if ((pbuf = CAST(char *, realloc(ms->o.pbuf, psize))) == NULL) { 446 file_oomem(ms, psize); 447 return NULL; 448 } 449 ms->o.pbuf = pbuf; 450 451 #if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) 452 { 453 mbstate_t state; 454 wchar_t nextchar; 455 int mb_conv = 1; 456 size_t bytesconsumed; 457 char *eop; 458 (void)memset(&state, 0, sizeof(mbstate_t)); 459 460 np = ms->o.pbuf; 461 op = ms->o.buf; 462 eop = op + len; 463 464 while (op < eop) { 465 bytesconsumed = mbrtowc(&nextchar, op, 466 CAST(size_t, eop - op), &state); 467 if (bytesconsumed == CAST(size_t, -1) || 468 bytesconsumed == CAST(size_t, -2)) { 469 mb_conv = 0; 470 break; 471 } 472 473 if (iswprint(nextchar)) { 474 (void)memcpy(np, op, bytesconsumed); 475 op += bytesconsumed; 476 np += bytesconsumed; 477 } else { 478 while (bytesconsumed-- > 0) 479 OCTALIFY(np, op); 480 } 481 } 482 *np = '\0'; 483 484 /* Parsing succeeded as a multi-byte sequence */ 485 if (mb_conv != 0) 486 return ms->o.pbuf; 487 } 488 #endif 489 490 for (np = ms->o.pbuf, op = ms->o.buf; *op;) { 491 if (isprint(CAST(unsigned char, *op))) { 492 *np++ = *op++; 493 } else { 494 OCTALIFY(np, op); 495 } 496 } 497 *np = '\0'; 498 return ms->o.pbuf; 499 } 500 501 protected int 502 file_check_mem(struct magic_set *ms, unsigned int level) 503 { 504 size_t len; 505 506 if (level >= ms->c.len) { 507 len = (ms->c.len = 20 + level) * sizeof(*ms->c.li); 508 ms->c.li = CAST(struct level_info *, (ms->c.li == NULL) ? 509 malloc(len) : 510 realloc(ms->c.li, len)); 511 if (ms->c.li == NULL) { 512 file_oomem(ms, len); 513 return -1; 514 } 515 } 516 ms->c.li[level].got_match = 0; 517 #ifdef ENABLE_CONDITIONALS 518 ms->c.li[level].last_match = 0; 519 ms->c.li[level].last_cond = COND_NONE; 520 #endif /* ENABLE_CONDITIONALS */ 521 return 0; 522 } 523 524 protected size_t 525 file_printedlen(const struct magic_set *ms) 526 { 527 return ms->o.buf == NULL ? 0 : strlen(ms->o.buf); 528 } 529 530 protected int 531 file_replace(struct magic_set *ms, const char *pat, const char *rep) 532 { 533 file_regex_t rx; 534 int rc, rv = -1; 535 536 rc = file_regcomp(&rx, pat, REG_EXTENDED); 537 if (rc) { 538 file_regerror(&rx, rc, ms); 539 } else { 540 regmatch_t rm; 541 int nm = 0; 542 while (file_regexec(&rx, ms->o.buf, 1, &rm, 0) == 0) { 543 ms->o.buf[rm.rm_so] = '\0'; 544 if (file_printf(ms, "%s%s", rep, 545 rm.rm_eo != 0 ? ms->o.buf + rm.rm_eo : "") == -1) 546 goto out; 547 nm++; 548 } 549 rv = nm; 550 } 551 out: 552 file_regfree(&rx); 553 return rv; 554 } 555 556 protected int 557 file_regcomp(file_regex_t *rx, const char *pat, int flags) 558 { 559 #ifdef USE_C_LOCALE 560 rx->c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0); 561 assert(rx->c_lc_ctype != NULL); 562 rx->old_lc_ctype = uselocale(rx->c_lc_ctype); 563 assert(rx->old_lc_ctype != NULL); 564 #else 565 rx->old_lc_ctype = setlocale(LC_CTYPE, NULL); 566 assert(rx->old_lc_ctype != NULL); 567 rx->old_lc_ctype = strdup(rx->old_lc_ctype); 568 assert(rx->old_lc_ctype != NULL); 569 (void)setlocale(LC_CTYPE, "C"); 570 #endif 571 rx->pat = pat; 572 573 return rx->rc = regcomp(&rx->rx, pat, flags); 574 } 575 576 protected int 577 file_regexec(file_regex_t *rx, const char *str, size_t nmatch, 578 regmatch_t* pmatch, int eflags) 579 { 580 assert(rx->rc == 0); 581 /* XXX: force initialization because glibc does not always do this */ 582 if (nmatch != 0) 583 memset(pmatch, 0, nmatch * sizeof(*pmatch)); 584 return regexec(&rx->rx, str, nmatch, pmatch, eflags); 585 } 586 587 protected void 588 file_regfree(file_regex_t *rx) 589 { 590 if (rx->rc == 0) 591 regfree(&rx->rx); 592 #ifdef USE_C_LOCALE 593 (void)uselocale(rx->old_lc_ctype); 594 freelocale(rx->c_lc_ctype); 595 #else 596 (void)setlocale(LC_CTYPE, rx->old_lc_ctype); 597 free(rx->old_lc_ctype); 598 #endif 599 } 600 601 protected void 602 file_regerror(file_regex_t *rx, int rc, struct magic_set *ms) 603 { 604 char errmsg[512]; 605 606 (void)regerror(rc, &rx->rx, errmsg, sizeof(errmsg)); 607 file_magerror(ms, "regex error %d for `%s', (%s)", rc, rx->pat, 608 errmsg); 609 } 610 611 protected file_pushbuf_t * 612 file_push_buffer(struct magic_set *ms) 613 { 614 file_pushbuf_t *pb; 615 616 if (ms->event_flags & EVENT_HAD_ERR) 617 return NULL; 618 619 if ((pb = (CAST(file_pushbuf_t *, malloc(sizeof(*pb))))) == NULL) 620 return NULL; 621 622 pb->buf = ms->o.buf; 623 pb->offset = ms->offset; 624 625 ms->o.buf = NULL; 626 ms->offset = 0; 627 628 return pb; 629 } 630 631 protected char * 632 file_pop_buffer(struct magic_set *ms, file_pushbuf_t *pb) 633 { 634 char *rbuf; 635 636 if (ms->event_flags & EVENT_HAD_ERR) { 637 free(pb->buf); 638 free(pb); 639 return NULL; 640 } 641 642 rbuf = ms->o.buf; 643 644 ms->o.buf = pb->buf; 645 ms->offset = pb->offset; 646 647 free(pb); 648 return rbuf; 649 } 650 651 /* 652 * convert string to ascii printable format. 653 */ 654 protected char * 655 file_printable(char *buf, size_t bufsiz, const char *str, size_t slen) 656 { 657 char *ptr, *eptr = buf + bufsiz - 1; 658 const unsigned char *s = RCAST(const unsigned char *, str); 659 const unsigned char *es = s + slen; 660 661 for (ptr = buf; ptr < eptr && s < es && *s; s++) { 662 if (isprint(*s)) { 663 *ptr++ = *s; 664 continue; 665 } 666 if (ptr >= eptr - 3) 667 break; 668 *ptr++ = '\\'; 669 *ptr++ = ((CAST(unsigned int, *s) >> 6) & 7) + '0'; 670 *ptr++ = ((CAST(unsigned int, *s) >> 3) & 7) + '0'; 671 *ptr++ = ((CAST(unsigned int, *s) >> 0) & 7) + '0'; 672 } 673 *ptr = '\0'; 674 return buf; 675 } 676