1 /* $NetBSD: funcs.c,v 1.14 2018/04/15 19:45:32 christos Exp $ */ 2 3 /* 4 * Copyright (c) Christos Zoulas 2003. 5 * All Rights Reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice immediately at the beginning of the file, without modification, 12 * this list of conditions, and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 21 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 #include "file.h" 30 31 #ifndef lint 32 #if 0 33 FILE_RCSID("@(#)$File: funcs.c,v 1.94 2017/11/02 20:25:39 christos Exp $") 34 #else 35 __RCSID("$NetBSD: funcs.c,v 1.14 2018/04/15 19:45:32 christos Exp $"); 36 #endif 37 #endif /* lint */ 38 39 #include "magic.h" 40 #include <assert.h> 41 #include <stdarg.h> 42 #include <stdlib.h> 43 #include <string.h> 44 #include <ctype.h> 45 #if defined(HAVE_WCHAR_H) 46 #include <wchar.h> 47 #endif 48 #if defined(HAVE_WCTYPE_H) 49 #include <wctype.h> 50 #endif 51 #if defined(HAVE_LIMITS_H) 52 #include <limits.h> 53 #endif 54 55 #ifndef SIZE_MAX 56 #define SIZE_MAX ((size_t)~0) 57 #endif 58 59 /* 60 * Like printf, only we append to a buffer. 61 */ 62 protected int 63 file_vprintf(struct magic_set *ms, const char *fmt, va_list ap) 64 { 65 int len; 66 char *buf, *newstr; 67 68 if (ms->event_flags & EVENT_HAD_ERR) 69 return 0; 70 len = vasprintf(&buf, fmt, ap); 71 if (len < 0) 72 goto out; 73 74 if (ms->o.buf != NULL) { 75 len = asprintf(&newstr, "%s%s", ms->o.buf, buf); 76 free(buf); 77 if (len < 0) 78 goto out; 79 free(ms->o.buf); 80 buf = newstr; 81 } 82 ms->o.buf = buf; 83 return 0; 84 out: 85 fprintf(stderr, "vasprintf failed (%s)", strerror(errno)); 86 return -1; 87 } 88 89 protected int 90 file_printf(struct magic_set *ms, const char *fmt, ...) 91 { 92 int rv; 93 va_list ap; 94 95 va_start(ap, fmt); 96 rv = file_vprintf(ms, fmt, ap); 97 va_end(ap); 98 return rv; 99 } 100 101 /* 102 * error - print best error message possible 103 */ 104 /*VARARGS*/ 105 __attribute__((__format__(__printf__, 3, 0))) 106 private void 107 file_error_core(struct magic_set *ms, int error, const char *f, va_list va, 108 size_t lineno) 109 { 110 /* Only the first error is ok */ 111 if (ms->event_flags & EVENT_HAD_ERR) 112 return; 113 if (lineno != 0) { 114 free(ms->o.buf); 115 ms->o.buf = NULL; 116 file_printf(ms, "line %" SIZE_T_FORMAT "u:", lineno); 117 } 118 if (ms->o.buf && *ms->o.buf) 119 file_printf(ms, " "); 120 file_vprintf(ms, f, va); 121 if (error > 0) 122 file_printf(ms, " (%s)", strerror(error)); 123 ms->event_flags |= EVENT_HAD_ERR; 124 ms->error = error; 125 } 126 127 /*VARARGS*/ 128 protected void 129 file_error(struct magic_set *ms, int error, const char *f, ...) 130 { 131 va_list va; 132 va_start(va, f); 133 file_error_core(ms, error, f, va, 0); 134 va_end(va); 135 } 136 137 /* 138 * Print an error with magic line number. 139 */ 140 /*VARARGS*/ 141 protected void 142 file_magerror(struct magic_set *ms, const char *f, ...) 143 { 144 va_list va; 145 va_start(va, f); 146 file_error_core(ms, 0, f, va, ms->line); 147 va_end(va); 148 } 149 150 protected void 151 file_oomem(struct magic_set *ms, size_t len) 152 { 153 file_error(ms, errno, "cannot allocate %" SIZE_T_FORMAT "u bytes", 154 len); 155 } 156 157 protected void 158 file_badseek(struct magic_set *ms) 159 { 160 file_error(ms, errno, "error seeking"); 161 } 162 163 protected void 164 file_badread(struct magic_set *ms) 165 { 166 file_error(ms, errno, "error reading"); 167 } 168 169 #ifndef COMPILE_ONLY 170 171 static int 172 checkdone(struct magic_set *ms, int *rv) 173 { 174 if ((ms->flags & MAGIC_CONTINUE) == 0) 175 return 1; 176 if (file_printf(ms, "\n- ") == -1) 177 *rv = -1; 178 return 0; 179 } 180 181 /*ARGSUSED*/ 182 protected int 183 file_buffer(struct magic_set *ms, int fd, const char *inname __attribute__ ((__unused__)), 184 const void *buf, size_t nb) 185 { 186 int m = 0, rv = 0, looks_text = 0; 187 const char *code = NULL; 188 const char *code_mime = "binary"; 189 const char *type = "application/octet-stream"; 190 const char *def = "data"; 191 const char *ftype = NULL; 192 struct buffer b; 193 194 buffer_init(&b, fd, buf, nb); 195 196 if (nb == 0) { 197 def = "empty"; 198 type = "application/x-empty"; 199 goto simple; 200 } else if (nb == 1) { 201 def = "very short file (no magic)"; 202 goto simple; 203 } 204 205 if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) { 206 looks_text = file_encoding(ms, &b, NULL, 0, 207 &code, &code_mime, &ftype); 208 } 209 210 #ifdef __EMX__ 211 if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) { 212 m = file_os2_apptype(ms, inname, &b); 213 if ((ms->flags & MAGIC_DEBUG) != 0) 214 (void)fprintf(stderr, "[try os2_apptype %d]\n", m); 215 switch (m) { 216 case -1: 217 return -1; 218 case 0: 219 break; 220 default: 221 return 1; 222 } 223 } 224 #endif 225 #if HAVE_FORK 226 /* try compression stuff */ 227 if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) == 0) { 228 m = file_zmagic(ms, &b, inname); 229 if ((ms->flags & MAGIC_DEBUG) != 0) 230 (void)fprintf(stderr, "[try zmagic %d]\n", m); 231 if (m) { 232 goto done_encoding; 233 } 234 } 235 #endif 236 /* Check if we have a tar file */ 237 if ((ms->flags & MAGIC_NO_CHECK_TAR) == 0) { 238 m = file_is_tar(ms, &b); 239 if ((ms->flags & MAGIC_DEBUG) != 0) 240 (void)fprintf(stderr, "[try tar %d]\n", m); 241 if (m) { 242 if (checkdone(ms, &rv)) 243 goto done; 244 } 245 } 246 247 /* Check if we have a CDF file */ 248 if ((ms->flags & MAGIC_NO_CHECK_CDF) == 0) { 249 m = file_trycdf(ms, &b); 250 if ((ms->flags & MAGIC_DEBUG) != 0) 251 (void)fprintf(stderr, "[try cdf %d]\n", m); 252 if (m) { 253 if (checkdone(ms, &rv)) 254 goto done; 255 } 256 } 257 258 /* try soft magic tests */ 259 if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0) { 260 m = file_softmagic(ms, &b, NULL, NULL, BINTEST, looks_text); 261 if ((ms->flags & MAGIC_DEBUG) != 0) 262 (void)fprintf(stderr, "[try softmagic %d]\n", m); 263 if (m) { 264 #ifdef BUILTIN_ELF 265 if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && m == 1 && 266 nb > 5 && fd != -1) { 267 /* 268 * We matched something in the file, so this 269 * *might* be an ELF file, and the file is at 270 * least 5 bytes long, so if it's an ELF file 271 * it has at least one byte past the ELF magic 272 * number - try extracting information from the 273 * ELF headers that cannot easily * be 274 * extracted with rules in the magic file. 275 */ 276 m = file_tryelf(ms, &b); 277 if ((ms->flags & MAGIC_DEBUG) != 0) 278 (void)fprintf(stderr, "[try elf %d]\n", 279 m); 280 } 281 #endif 282 if (checkdone(ms, &rv)) 283 goto done; 284 } 285 } 286 287 /* try text properties */ 288 if ((ms->flags & MAGIC_NO_CHECK_TEXT) == 0) { 289 290 m = file_ascmagic(ms, &b, looks_text); 291 if ((ms->flags & MAGIC_DEBUG) != 0) 292 (void)fprintf(stderr, "[try ascmagic %d]\n", m); 293 if (m) { 294 if (checkdone(ms, &rv)) 295 goto done; 296 } 297 } 298 299 simple: 300 /* give up */ 301 m = 1; 302 if (ms->flags & MAGIC_MIME) { 303 if ((ms->flags & MAGIC_MIME_TYPE) && 304 file_printf(ms, "%s", type) == -1) 305 rv = -1; 306 } else if (ms->flags & MAGIC_APPLE) { 307 if (file_printf(ms, "UNKNUNKN") == -1) 308 rv = -1; 309 } else if (ms->flags & MAGIC_EXTENSION) { 310 if (file_printf(ms, "???") == -1) 311 rv = -1; 312 } else { 313 if (file_printf(ms, "%s", def) == -1) 314 rv = -1; 315 } 316 done: 317 if ((ms->flags & MAGIC_MIME_ENCODING) != 0) { 318 if (ms->flags & MAGIC_MIME_TYPE) 319 if (file_printf(ms, "; charset=") == -1) 320 rv = -1; 321 if (file_printf(ms, "%s", code_mime) == -1) 322 rv = -1; 323 } 324 #if HAVE_FORK 325 done_encoding: 326 #endif 327 buffer_fini(&b); 328 if (rv) 329 return rv; 330 331 return m; 332 } 333 #endif 334 335 protected int 336 file_reset(struct magic_set *ms, int checkloaded) 337 { 338 if (checkloaded && ms->mlist[0] == NULL) { 339 file_error(ms, 0, "no magic files loaded"); 340 return -1; 341 } 342 if (ms->o.buf) { 343 free(ms->o.buf); 344 ms->o.buf = NULL; 345 } 346 if (ms->o.pbuf) { 347 free(ms->o.pbuf); 348 ms->o.pbuf = NULL; 349 } 350 ms->event_flags &= ~EVENT_HAD_ERR; 351 ms->error = -1; 352 return 0; 353 } 354 355 #define OCTALIFY(n, o) \ 356 /*LINTED*/ \ 357 (void)(*(n)++ = '\\', \ 358 *(n)++ = (((uint32_t)*(o) >> 6) & 3) + '0', \ 359 *(n)++ = (((uint32_t)*(o) >> 3) & 7) + '0', \ 360 *(n)++ = (((uint32_t)*(o) >> 0) & 7) + '0', \ 361 (o)++) 362 363 protected const char * 364 file_getbuffer(struct magic_set *ms) 365 { 366 char *pbuf, *op, *np; 367 size_t psize, len; 368 369 if (ms->event_flags & EVENT_HAD_ERR) 370 return NULL; 371 372 if (ms->flags & MAGIC_RAW) 373 return ms->o.buf; 374 375 if (ms->o.buf == NULL) 376 return NULL; 377 378 /* * 4 is for octal representation, + 1 is for NUL */ 379 len = strlen(ms->o.buf); 380 if (len > (SIZE_MAX - 1) / 4) { 381 file_oomem(ms, len); 382 return NULL; 383 } 384 psize = len * 4 + 1; 385 if ((pbuf = CAST(char *, realloc(ms->o.pbuf, psize))) == NULL) { 386 file_oomem(ms, psize); 387 return NULL; 388 } 389 ms->o.pbuf = pbuf; 390 391 #if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) 392 { 393 mbstate_t state; 394 wchar_t nextchar; 395 int mb_conv = 1; 396 size_t bytesconsumed; 397 char *eop; 398 (void)memset(&state, 0, sizeof(mbstate_t)); 399 400 np = ms->o.pbuf; 401 op = ms->o.buf; 402 eop = op + len; 403 404 while (op < eop) { 405 bytesconsumed = mbrtowc(&nextchar, op, 406 (size_t)(eop - op), &state); 407 if (bytesconsumed == (size_t)(-1) || 408 bytesconsumed == (size_t)(-2)) { 409 mb_conv = 0; 410 break; 411 } 412 413 if (iswprint(nextchar)) { 414 (void)memcpy(np, op, bytesconsumed); 415 op += bytesconsumed; 416 np += bytesconsumed; 417 } else { 418 while (bytesconsumed-- > 0) 419 OCTALIFY(np, op); 420 } 421 } 422 *np = '\0'; 423 424 /* Parsing succeeded as a multi-byte sequence */ 425 if (mb_conv != 0) 426 return ms->o.pbuf; 427 } 428 #endif 429 430 for (np = ms->o.pbuf, op = ms->o.buf; *op;) { 431 if (isprint((unsigned char)*op)) { 432 *np++ = *op++; 433 } else { 434 OCTALIFY(np, op); 435 } 436 } 437 *np = '\0'; 438 return ms->o.pbuf; 439 } 440 441 protected int 442 file_check_mem(struct magic_set *ms, unsigned int level) 443 { 444 size_t len; 445 446 if (level >= ms->c.len) { 447 len = (ms->c.len = 20 + level) * sizeof(*ms->c.li); 448 ms->c.li = CAST(struct level_info *, (ms->c.li == NULL) ? 449 malloc(len) : 450 realloc(ms->c.li, len)); 451 if (ms->c.li == NULL) { 452 file_oomem(ms, len); 453 return -1; 454 } 455 } 456 ms->c.li[level].got_match = 0; 457 #ifdef ENABLE_CONDITIONALS 458 ms->c.li[level].last_match = 0; 459 ms->c.li[level].last_cond = COND_NONE; 460 #endif /* ENABLE_CONDITIONALS */ 461 return 0; 462 } 463 464 protected size_t 465 file_printedlen(const struct magic_set *ms) 466 { 467 return ms->o.buf == NULL ? 0 : strlen(ms->o.buf); 468 } 469 470 protected int 471 file_replace(struct magic_set *ms, const char *pat, const char *rep) 472 { 473 file_regex_t rx; 474 int rc, rv = -1; 475 476 rc = file_regcomp(&rx, pat, REG_EXTENDED); 477 if (rc) { 478 file_regerror(&rx, rc, ms); 479 } else { 480 regmatch_t rm; 481 int nm = 0; 482 while (file_regexec(&rx, ms->o.buf, 1, &rm, 0) == 0) { 483 ms->o.buf[rm.rm_so] = '\0'; 484 if (file_printf(ms, "%s%s", rep, 485 rm.rm_eo != 0 ? ms->o.buf + rm.rm_eo : "") == -1) 486 goto out; 487 nm++; 488 } 489 rv = nm; 490 } 491 out: 492 file_regfree(&rx); 493 return rv; 494 } 495 496 protected int 497 file_regcomp(file_regex_t *rx, const char *pat, int flags) 498 { 499 #ifdef USE_C_LOCALE 500 rx->c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0); 501 assert(rx->c_lc_ctype != NULL); 502 rx->old_lc_ctype = uselocale(rx->c_lc_ctype); 503 assert(rx->old_lc_ctype != NULL); 504 #else 505 rx->old_lc_ctype = setlocale(LC_CTYPE, "C"); 506 #endif 507 rx->pat = pat; 508 509 return rx->rc = regcomp(&rx->rx, pat, flags); 510 } 511 512 protected int 513 file_regexec(file_regex_t *rx, const char *str, size_t nmatch, 514 regmatch_t* pmatch, int eflags) 515 { 516 assert(rx->rc == 0); 517 /* XXX: force initialization because glibc does not always do this */ 518 memset(pmatch, 0, nmatch * sizeof(*pmatch)); 519 return regexec(&rx->rx, str, nmatch, pmatch, eflags); 520 } 521 522 protected void 523 file_regfree(file_regex_t *rx) 524 { 525 if (rx->rc == 0) 526 regfree(&rx->rx); 527 #ifdef USE_C_LOCALE 528 (void)uselocale(rx->old_lc_ctype); 529 freelocale(rx->c_lc_ctype); 530 #else 531 (void)setlocale(LC_CTYPE, rx->old_lc_ctype); 532 #endif 533 } 534 535 protected void 536 file_regerror(file_regex_t *rx, int rc, struct magic_set *ms) 537 { 538 char errmsg[512]; 539 540 (void)regerror(rc, &rx->rx, errmsg, sizeof(errmsg)); 541 file_magerror(ms, "regex error %d for `%s', (%s)", rc, rx->pat, 542 errmsg); 543 } 544 545 protected file_pushbuf_t * 546 file_push_buffer(struct magic_set *ms) 547 { 548 file_pushbuf_t *pb; 549 550 if (ms->event_flags & EVENT_HAD_ERR) 551 return NULL; 552 553 if ((pb = (CAST(file_pushbuf_t *, malloc(sizeof(*pb))))) == NULL) 554 return NULL; 555 556 pb->buf = ms->o.buf; 557 pb->offset = ms->offset; 558 559 ms->o.buf = NULL; 560 ms->offset = 0; 561 562 return pb; 563 } 564 565 protected char * 566 file_pop_buffer(struct magic_set *ms, file_pushbuf_t *pb) 567 { 568 char *rbuf; 569 570 if (ms->event_flags & EVENT_HAD_ERR) { 571 free(pb->buf); 572 free(pb); 573 return NULL; 574 } 575 576 rbuf = ms->o.buf; 577 578 ms->o.buf = pb->buf; 579 ms->offset = pb->offset; 580 581 free(pb); 582 return rbuf; 583 } 584 585 /* 586 * convert string to ascii printable format. 587 */ 588 protected char * 589 file_printable(char *buf, size_t bufsiz, const char *str) 590 { 591 char *ptr, *eptr; 592 const unsigned char *s = (const unsigned char *)str; 593 594 for (ptr = buf, eptr = ptr + bufsiz - 1; ptr < eptr && *s; s++) { 595 if (isprint(*s)) { 596 *ptr++ = *s; 597 continue; 598 } 599 if (ptr >= eptr - 3) 600 break; 601 *ptr++ = '\\'; 602 *ptr++ = ((CAST(unsigned int, *s) >> 6) & 7) + '0'; 603 *ptr++ = ((CAST(unsigned int, *s) >> 3) & 7) + '0'; 604 *ptr++ = ((CAST(unsigned int, *s) >> 0) & 7) + '0'; 605 } 606 *ptr = '\0'; 607 return buf; 608 } 609