1 /* $NetBSD: funcs.c,v 1.15 2018/10/19 00:11:48 christos Exp $ */ 2 3 /* 4 * Copyright (c) Christos Zoulas 2003. 5 * All Rights Reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice immediately at the beginning of the file, without modification, 12 * this list of conditions, and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 21 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 #include "file.h" 30 31 #ifndef lint 32 #if 0 33 FILE_RCSID("@(#)$File: funcs.c,v 1.100 2018/10/01 18:45:39 christos Exp $") 34 #else 35 __RCSID("$NetBSD: funcs.c,v 1.15 2018/10/19 00:11:48 christos Exp $"); 36 #endif 37 #endif /* lint */ 38 39 #include "magic.h" 40 #include <assert.h> 41 #include <stdarg.h> 42 #include <stdlib.h> 43 #include <string.h> 44 #include <ctype.h> 45 #if defined(HAVE_WCHAR_H) 46 #include <wchar.h> 47 #endif 48 #if defined(HAVE_WCTYPE_H) 49 #include <wctype.h> 50 #endif 51 #include <limits.h> 52 53 #ifndef SIZE_MAX 54 #define SIZE_MAX ((size_t)~0) 55 #endif 56 57 /* 58 * Like printf, only we append to a buffer. 59 */ 60 protected int 61 file_vprintf(struct magic_set *ms, const char *fmt, va_list ap) 62 { 63 int len; 64 char *buf, *newstr; 65 66 if (ms->event_flags & EVENT_HAD_ERR) 67 return 0; 68 len = vasprintf(&buf, fmt, ap); 69 if (len < 0) 70 goto out; 71 72 if (ms->o.buf != NULL) { 73 len = asprintf(&newstr, "%s%s", ms->o.buf, buf); 74 free(buf); 75 if (len < 0) 76 goto out; 77 free(ms->o.buf); 78 buf = newstr; 79 } 80 ms->o.buf = buf; 81 return 0; 82 out: 83 fprintf(stderr, "vasprintf failed (%s)", strerror(errno)); 84 return -1; 85 } 86 87 protected int 88 file_printf(struct magic_set *ms, const char *fmt, ...) 89 { 90 int rv; 91 va_list ap; 92 93 va_start(ap, fmt); 94 rv = file_vprintf(ms, fmt, ap); 95 va_end(ap); 96 return rv; 97 } 98 99 /* 100 * error - print best error message possible 101 */ 102 /*VARARGS*/ 103 __attribute__((__format__(__printf__, 3, 0))) 104 private void 105 file_error_core(struct magic_set *ms, int error, const char *f, va_list va, 106 size_t lineno) 107 { 108 /* Only the first error is ok */ 109 if (ms->event_flags & EVENT_HAD_ERR) 110 return; 111 if (lineno != 0) { 112 free(ms->o.buf); 113 ms->o.buf = NULL; 114 (void)file_printf(ms, "line %" SIZE_T_FORMAT "u:", lineno); 115 } 116 if (ms->o.buf && *ms->o.buf) 117 (void)file_printf(ms, " "); 118 (void)file_vprintf(ms, f, va); 119 if (error > 0) 120 (void)file_printf(ms, " (%s)", strerror(error)); 121 ms->event_flags |= EVENT_HAD_ERR; 122 ms->error = error; 123 } 124 125 /*VARARGS*/ 126 protected void 127 file_error(struct magic_set *ms, int error, const char *f, ...) 128 { 129 va_list va; 130 va_start(va, f); 131 file_error_core(ms, error, f, va, 0); 132 va_end(va); 133 } 134 135 /* 136 * Print an error with magic line number. 137 */ 138 /*VARARGS*/ 139 protected void 140 file_magerror(struct magic_set *ms, const char *f, ...) 141 { 142 va_list va; 143 va_start(va, f); 144 file_error_core(ms, 0, f, va, ms->line); 145 va_end(va); 146 } 147 148 protected void 149 file_oomem(struct magic_set *ms, size_t len) 150 { 151 file_error(ms, errno, "cannot allocate %" SIZE_T_FORMAT "u bytes", 152 len); 153 } 154 155 protected void 156 file_badseek(struct magic_set *ms) 157 { 158 file_error(ms, errno, "error seeking"); 159 } 160 161 protected void 162 file_badread(struct magic_set *ms) 163 { 164 file_error(ms, errno, "error reading"); 165 } 166 167 #ifndef COMPILE_ONLY 168 169 static int 170 checkdone(struct magic_set *ms, int *rv) 171 { 172 if ((ms->flags & MAGIC_CONTINUE) == 0) 173 return 1; 174 if (file_printf(ms, "\n- ") == -1) 175 *rv = -1; 176 return 0; 177 } 178 179 protected int 180 file_default(struct magic_set *ms, size_t nb) 181 { 182 if (ms->flags & MAGIC_MIME) { 183 if ((ms->flags & MAGIC_MIME_TYPE) && 184 file_printf(ms, "application/%s", 185 nb ? "octet-stream" : "x-empty") == -1) 186 return -1; 187 return 1; 188 } 189 if (ms->flags & MAGIC_APPLE) { 190 if (file_printf(ms, "UNKNUNKN") == -1) 191 return -1; 192 return 1; 193 } 194 if (ms->flags & MAGIC_EXTENSION) { 195 if (file_printf(ms, "???") == -1) 196 return -1; 197 return 1; 198 } 199 return 0; 200 } 201 202 /* 203 * The magic detection functions return: 204 * 1: found 205 * 0: not found 206 * -1: error 207 */ 208 /*ARGSUSED*/ 209 protected int 210 file_buffer(struct magic_set *ms, int fd, const char *inname __attribute__ ((__unused__)), 211 const void *buf, size_t nb) 212 { 213 int m = 0, rv = 0, looks_text = 0; 214 const char *code = NULL; 215 const char *code_mime = "binary"; 216 const char *def = "data"; 217 const char *ftype = NULL; 218 char *rbuf = NULL; 219 struct buffer b; 220 221 buffer_init(&b, fd, buf, nb); 222 ms->mode = b.st.st_mode; 223 224 if (nb == 0) { 225 def = "empty"; 226 goto simple; 227 } else if (nb == 1) { 228 def = "very short file (no magic)"; 229 goto simple; 230 } 231 232 if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) { 233 looks_text = file_encoding(ms, &b, NULL, 0, 234 &code, &code_mime, &ftype); 235 } 236 237 #ifdef __EMX__ 238 if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) { 239 m = file_os2_apptype(ms, inname, &b); 240 if ((ms->flags & MAGIC_DEBUG) != 0) 241 (void)fprintf(stderr, "[try os2_apptype %d]\n", m); 242 switch (m) { 243 case -1: 244 return -1; 245 case 0: 246 break; 247 default: 248 return 1; 249 } 250 } 251 #endif 252 #if HAVE_FORK 253 /* try compression stuff */ 254 if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) == 0) { 255 m = file_zmagic(ms, &b, inname); 256 if ((ms->flags & MAGIC_DEBUG) != 0) 257 (void)fprintf(stderr, "[try zmagic %d]\n", m); 258 if (m) { 259 goto done_encoding; 260 } 261 } 262 #endif 263 /* Check if we have a tar file */ 264 if ((ms->flags & MAGIC_NO_CHECK_TAR) == 0) { 265 m = file_is_tar(ms, &b); 266 if ((ms->flags & MAGIC_DEBUG) != 0) 267 (void)fprintf(stderr, "[try tar %d]\n", m); 268 if (m) { 269 if (checkdone(ms, &rv)) 270 goto done; 271 } 272 } 273 274 /* Check if we have a JSON file */ 275 if ((ms->flags & MAGIC_NO_CHECK_JSON) == 0) { 276 m = file_is_json(ms, &b); 277 if ((ms->flags & MAGIC_DEBUG) != 0) 278 (void)fprintf(stderr, "[try json %d]\n", m); 279 if (m) { 280 if (checkdone(ms, &rv)) 281 goto done; 282 } 283 } 284 285 /* Check if we have a CDF file */ 286 if ((ms->flags & MAGIC_NO_CHECK_CDF) == 0) { 287 m = file_trycdf(ms, &b); 288 if ((ms->flags & MAGIC_DEBUG) != 0) 289 (void)fprintf(stderr, "[try cdf %d]\n", m); 290 if (m) { 291 if (checkdone(ms, &rv)) 292 goto done; 293 } 294 } 295 #ifdef BUILTIN_ELF 296 if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && nb > 5 && fd != -1) { 297 file_pushbuf_t *pb; 298 /* 299 * We matched something in the file, so this 300 * *might* be an ELF file, and the file is at 301 * least 5 bytes long, so if it's an ELF file 302 * it has at least one byte past the ELF magic 303 * number - try extracting information from the 304 * ELF headers that cannot easily be extracted 305 * with rules in the magic file. We we don't 306 * print the information yet. 307 */ 308 if ((pb = file_push_buffer(ms)) == NULL) 309 return -1; 310 311 rv = file_tryelf(ms, &b); 312 rbuf = file_pop_buffer(ms, pb); 313 if (rv == -1) { 314 free(rbuf); 315 rbuf = NULL; 316 } 317 if ((ms->flags & MAGIC_DEBUG) != 0) 318 (void)fprintf(stderr, "[try elf %d]\n", m); 319 } 320 #endif 321 322 /* try soft magic tests */ 323 if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0) { 324 m = file_softmagic(ms, &b, NULL, NULL, BINTEST, looks_text); 325 if ((ms->flags & MAGIC_DEBUG) != 0) 326 (void)fprintf(stderr, "[try softmagic %d]\n", m); 327 if (m == 1 && rbuf) { 328 if (file_printf(ms, "%s", rbuf) == -1) 329 goto done; 330 } 331 if (m) { 332 if (checkdone(ms, &rv)) 333 goto done; 334 } 335 } 336 337 /* try text properties */ 338 if ((ms->flags & MAGIC_NO_CHECK_TEXT) == 0) { 339 340 m = file_ascmagic(ms, &b, looks_text); 341 if ((ms->flags & MAGIC_DEBUG) != 0) 342 (void)fprintf(stderr, "[try ascmagic %d]\n", m); 343 if (m) { 344 if (checkdone(ms, &rv)) 345 goto done; 346 } 347 } 348 349 simple: 350 /* give up */ 351 if (m == 0) { 352 m = 1; 353 rv = file_default(ms, nb); 354 if (rv == 0) 355 if (file_printf(ms, "%s", def) == -1) 356 rv = -1; 357 } 358 done: 359 if ((ms->flags & MAGIC_MIME_ENCODING) != 0) { 360 if (ms->flags & MAGIC_MIME_TYPE) 361 if (file_printf(ms, "; charset=") == -1) 362 rv = -1; 363 if (file_printf(ms, "%s", code_mime) == -1) 364 rv = -1; 365 } 366 #if HAVE_FORK 367 done_encoding: 368 #endif 369 free(rbuf); 370 buffer_fini(&b); 371 if (rv) 372 return rv; 373 374 return m; 375 } 376 #endif 377 378 protected int 379 file_reset(struct magic_set *ms, int checkloaded) 380 { 381 if (checkloaded && ms->mlist[0] == NULL) { 382 file_error(ms, 0, "no magic files loaded"); 383 return -1; 384 } 385 if (ms->o.buf) { 386 free(ms->o.buf); 387 ms->o.buf = NULL; 388 } 389 if (ms->o.pbuf) { 390 free(ms->o.pbuf); 391 ms->o.pbuf = NULL; 392 } 393 ms->event_flags &= ~EVENT_HAD_ERR; 394 ms->error = -1; 395 return 0; 396 } 397 398 #define OCTALIFY(n, o) \ 399 /*LINTED*/ \ 400 (void)(*(n)++ = '\\', \ 401 *(n)++ = (((uint32_t)*(o) >> 6) & 3) + '0', \ 402 *(n)++ = (((uint32_t)*(o) >> 3) & 7) + '0', \ 403 *(n)++ = (((uint32_t)*(o) >> 0) & 7) + '0', \ 404 (o)++) 405 406 protected const char * 407 file_getbuffer(struct magic_set *ms) 408 { 409 char *pbuf, *op, *np; 410 size_t psize, len; 411 412 if (ms->event_flags & EVENT_HAD_ERR) 413 return NULL; 414 415 if (ms->flags & MAGIC_RAW) 416 return ms->o.buf; 417 418 if (ms->o.buf == NULL) 419 return NULL; 420 421 /* * 4 is for octal representation, + 1 is for NUL */ 422 len = strlen(ms->o.buf); 423 if (len > (SIZE_MAX - 1) / 4) { 424 file_oomem(ms, len); 425 return NULL; 426 } 427 psize = len * 4 + 1; 428 if ((pbuf = CAST(char *, realloc(ms->o.pbuf, psize))) == NULL) { 429 file_oomem(ms, psize); 430 return NULL; 431 } 432 ms->o.pbuf = pbuf; 433 434 #if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) 435 { 436 mbstate_t state; 437 wchar_t nextchar; 438 int mb_conv = 1; 439 size_t bytesconsumed; 440 char *eop; 441 (void)memset(&state, 0, sizeof(mbstate_t)); 442 443 np = ms->o.pbuf; 444 op = ms->o.buf; 445 eop = op + len; 446 447 while (op < eop) { 448 bytesconsumed = mbrtowc(&nextchar, op, 449 (size_t)(eop - op), &state); 450 if (bytesconsumed == (size_t)(-1) || 451 bytesconsumed == (size_t)(-2)) { 452 mb_conv = 0; 453 break; 454 } 455 456 if (iswprint(nextchar)) { 457 (void)memcpy(np, op, bytesconsumed); 458 op += bytesconsumed; 459 np += bytesconsumed; 460 } else { 461 while (bytesconsumed-- > 0) 462 OCTALIFY(np, op); 463 } 464 } 465 *np = '\0'; 466 467 /* Parsing succeeded as a multi-byte sequence */ 468 if (mb_conv != 0) 469 return ms->o.pbuf; 470 } 471 #endif 472 473 for (np = ms->o.pbuf, op = ms->o.buf; *op;) { 474 if (isprint((unsigned char)*op)) { 475 *np++ = *op++; 476 } else { 477 OCTALIFY(np, op); 478 } 479 } 480 *np = '\0'; 481 return ms->o.pbuf; 482 } 483 484 protected int 485 file_check_mem(struct magic_set *ms, unsigned int level) 486 { 487 size_t len; 488 489 if (level >= ms->c.len) { 490 len = (ms->c.len = 20 + level) * sizeof(*ms->c.li); 491 ms->c.li = CAST(struct level_info *, (ms->c.li == NULL) ? 492 malloc(len) : 493 realloc(ms->c.li, len)); 494 if (ms->c.li == NULL) { 495 file_oomem(ms, len); 496 return -1; 497 } 498 } 499 ms->c.li[level].got_match = 0; 500 #ifdef ENABLE_CONDITIONALS 501 ms->c.li[level].last_match = 0; 502 ms->c.li[level].last_cond = COND_NONE; 503 #endif /* ENABLE_CONDITIONALS */ 504 return 0; 505 } 506 507 protected size_t 508 file_printedlen(const struct magic_set *ms) 509 { 510 return ms->o.buf == NULL ? 0 : strlen(ms->o.buf); 511 } 512 513 protected int 514 file_replace(struct magic_set *ms, const char *pat, const char *rep) 515 { 516 file_regex_t rx; 517 int rc, rv = -1; 518 519 rc = file_regcomp(&rx, pat, REG_EXTENDED); 520 if (rc) { 521 file_regerror(&rx, rc, ms); 522 } else { 523 regmatch_t rm; 524 int nm = 0; 525 while (file_regexec(&rx, ms->o.buf, 1, &rm, 0) == 0) { 526 ms->o.buf[rm.rm_so] = '\0'; 527 if (file_printf(ms, "%s%s", rep, 528 rm.rm_eo != 0 ? ms->o.buf + rm.rm_eo : "") == -1) 529 goto out; 530 nm++; 531 } 532 rv = nm; 533 } 534 out: 535 file_regfree(&rx); 536 return rv; 537 } 538 539 protected int 540 file_regcomp(file_regex_t *rx, const char *pat, int flags) 541 { 542 #ifdef USE_C_LOCALE 543 rx->c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0); 544 assert(rx->c_lc_ctype != NULL); 545 rx->old_lc_ctype = uselocale(rx->c_lc_ctype); 546 assert(rx->old_lc_ctype != NULL); 547 #else 548 rx->old_lc_ctype = setlocale(LC_CTYPE, "C"); 549 #endif 550 rx->pat = pat; 551 552 return rx->rc = regcomp(&rx->rx, pat, flags); 553 } 554 555 protected int 556 file_regexec(file_regex_t *rx, const char *str, size_t nmatch, 557 regmatch_t* pmatch, int eflags) 558 { 559 assert(rx->rc == 0); 560 /* XXX: force initialization because glibc does not always do this */ 561 memset(pmatch, 0, nmatch * sizeof(*pmatch)); 562 return regexec(&rx->rx, str, nmatch, pmatch, eflags); 563 } 564 565 protected void 566 file_regfree(file_regex_t *rx) 567 { 568 if (rx->rc == 0) 569 regfree(&rx->rx); 570 #ifdef USE_C_LOCALE 571 (void)uselocale(rx->old_lc_ctype); 572 freelocale(rx->c_lc_ctype); 573 #else 574 (void)setlocale(LC_CTYPE, rx->old_lc_ctype); 575 #endif 576 } 577 578 protected void 579 file_regerror(file_regex_t *rx, int rc, struct magic_set *ms) 580 { 581 char errmsg[512]; 582 583 (void)regerror(rc, &rx->rx, errmsg, sizeof(errmsg)); 584 file_magerror(ms, "regex error %d for `%s', (%s)", rc, rx->pat, 585 errmsg); 586 } 587 588 protected file_pushbuf_t * 589 file_push_buffer(struct magic_set *ms) 590 { 591 file_pushbuf_t *pb; 592 593 if (ms->event_flags & EVENT_HAD_ERR) 594 return NULL; 595 596 if ((pb = (CAST(file_pushbuf_t *, malloc(sizeof(*pb))))) == NULL) 597 return NULL; 598 599 pb->buf = ms->o.buf; 600 pb->offset = ms->offset; 601 602 ms->o.buf = NULL; 603 ms->offset = 0; 604 605 return pb; 606 } 607 608 protected char * 609 file_pop_buffer(struct magic_set *ms, file_pushbuf_t *pb) 610 { 611 char *rbuf; 612 613 if (ms->event_flags & EVENT_HAD_ERR) { 614 free(pb->buf); 615 free(pb); 616 return NULL; 617 } 618 619 rbuf = ms->o.buf; 620 621 ms->o.buf = pb->buf; 622 ms->offset = pb->offset; 623 624 free(pb); 625 return rbuf; 626 } 627 628 /* 629 * convert string to ascii printable format. 630 */ 631 protected char * 632 file_printable(char *buf, size_t bufsiz, const char *str) 633 { 634 char *ptr, *eptr; 635 const unsigned char *s = (const unsigned char *)str; 636 637 for (ptr = buf, eptr = ptr + bufsiz - 1; ptr < eptr && *s; s++) { 638 if (isprint(*s)) { 639 *ptr++ = *s; 640 continue; 641 } 642 if (ptr >= eptr - 3) 643 break; 644 *ptr++ = '\\'; 645 *ptr++ = ((CAST(unsigned int, *s) >> 6) & 7) + '0'; 646 *ptr++ = ((CAST(unsigned int, *s) >> 3) & 7) + '0'; 647 *ptr++ = ((CAST(unsigned int, *s) >> 0) & 7) + '0'; 648 } 649 *ptr = '\0'; 650 return buf; 651 } 652