1 /* $NetBSD: funcs.c,v 1.10 2015/01/02 21:15:32 christos Exp $ */ 2 3 /* 4 * Copyright (c) Christos Zoulas 2003. 5 * All Rights Reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice immediately at the beginning of the file, without modification, 12 * this list of conditions, and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 21 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 #include "file.h" 30 31 #ifndef lint 32 #if 0 33 FILE_RCSID("@(#)$File: funcs.c,v 1.79 2014/12/16 20:52:49 christos Exp $") 34 #else 35 __RCSID("$NetBSD: funcs.c,v 1.10 2015/01/02 21:15:32 christos Exp $"); 36 #endif 37 #endif /* lint */ 38 39 #include "magic.h" 40 #include <assert.h> 41 #include <stdarg.h> 42 #include <stdlib.h> 43 #include <string.h> 44 #include <ctype.h> 45 #if defined(HAVE_WCHAR_H) 46 #include <wchar.h> 47 #endif 48 #if defined(HAVE_WCTYPE_H) 49 #include <wctype.h> 50 #endif 51 #if defined(HAVE_LIMITS_H) 52 #include <limits.h> 53 #endif 54 55 #ifndef SIZE_MAX 56 #define SIZE_MAX ((size_t)~0) 57 #endif 58 59 /* 60 * Like printf, only we append to a buffer. 61 */ 62 protected int 63 file_vprintf(struct magic_set *ms, const char *fmt, va_list ap) 64 { 65 int len; 66 char *buf, *newstr; 67 68 if (ms->event_flags & EVENT_HAD_ERR) 69 return 0; 70 len = vasprintf(&buf, fmt, ap); 71 if (len < 0) 72 goto out; 73 74 if (ms->o.buf != NULL) { 75 len = asprintf(&newstr, "%s%s", ms->o.buf, buf); 76 free(buf); 77 if (len < 0) 78 goto out; 79 free(ms->o.buf); 80 buf = newstr; 81 } 82 ms->o.buf = buf; 83 return 0; 84 out: 85 file_error(ms, errno, "vasprintf failed"); 86 return -1; 87 } 88 89 protected int 90 file_printf(struct magic_set *ms, const char *fmt, ...) 91 { 92 int rv; 93 va_list ap; 94 95 va_start(ap, fmt); 96 rv = file_vprintf(ms, fmt, ap); 97 va_end(ap); 98 return rv; 99 } 100 101 /* 102 * error - print best error message possible 103 */ 104 /*VARARGS*/ 105 __attribute__((__format__(__printf__, 3, 0))) 106 private void 107 file_error_core(struct magic_set *ms, int error, const char *f, va_list va, 108 size_t lineno) 109 { 110 /* Only the first error is ok */ 111 if (ms->event_flags & EVENT_HAD_ERR) 112 return; 113 if (lineno != 0) { 114 free(ms->o.buf); 115 ms->o.buf = NULL; 116 file_printf(ms, "line %" SIZE_T_FORMAT "u: ", lineno); 117 } 118 file_vprintf(ms, f, va); 119 if (error > 0) 120 file_printf(ms, " (%s)", strerror(error)); 121 ms->event_flags |= EVENT_HAD_ERR; 122 ms->error = error; 123 } 124 125 /*VARARGS*/ 126 protected void 127 file_error(struct magic_set *ms, int error, const char *f, ...) 128 { 129 va_list va; 130 va_start(va, f); 131 file_error_core(ms, error, f, va, 0); 132 va_end(va); 133 } 134 135 /* 136 * Print an error with magic line number. 137 */ 138 /*VARARGS*/ 139 protected void 140 file_magerror(struct magic_set *ms, const char *f, ...) 141 { 142 va_list va; 143 va_start(va, f); 144 file_error_core(ms, 0, f, va, ms->line); 145 va_end(va); 146 } 147 148 protected void 149 file_oomem(struct magic_set *ms, size_t len) 150 { 151 file_error(ms, errno, "cannot allocate %" SIZE_T_FORMAT "u bytes", 152 len); 153 } 154 155 protected void 156 file_badseek(struct magic_set *ms) 157 { 158 file_error(ms, errno, "error seeking"); 159 } 160 161 protected void 162 file_badread(struct magic_set *ms) 163 { 164 file_error(ms, errno, "error reading"); 165 } 166 167 #ifndef COMPILE_ONLY 168 /*ARGSUSED*/ 169 protected int 170 file_buffer(struct magic_set *ms, int fd, const char *inname __attribute__ ((__unused__)), 171 const void *buf, size_t nb) 172 { 173 int m = 0, rv = 0, looks_text = 0; 174 int mime = ms->flags & MAGIC_MIME; 175 const unsigned char *ubuf = CAST(const unsigned char *, buf); 176 unichar *u8buf = NULL; 177 size_t ulen; 178 const char *code = NULL; 179 const char *code_mime = "binary"; 180 const char *type = "application/octet-stream"; 181 const char *def = "data"; 182 const char *ftype = NULL; 183 184 if (nb == 0) { 185 def = "empty"; 186 type = "application/x-empty"; 187 goto simple; 188 } else if (nb == 1) { 189 def = "very short file (no magic)"; 190 goto simple; 191 } 192 193 if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) { 194 looks_text = file_encoding(ms, ubuf, nb, &u8buf, &ulen, 195 &code, &code_mime, &ftype); 196 } 197 198 #ifdef __EMX__ 199 if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) { 200 switch (file_os2_apptype(ms, inname, buf, nb)) { 201 case -1: 202 return -1; 203 case 0: 204 break; 205 default: 206 return 1; 207 } 208 } 209 #endif 210 #if HAVE_FORK 211 /* try compression stuff */ 212 if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) == 0) 213 if ((m = file_zmagic(ms, fd, inname, ubuf, nb)) != 0) { 214 if ((ms->flags & MAGIC_DEBUG) != 0) 215 (void)fprintf(stderr, "zmagic %d\n", m); 216 goto done_encoding; 217 } 218 #endif 219 /* Check if we have a tar file */ 220 if ((ms->flags & MAGIC_NO_CHECK_TAR) == 0) 221 if ((m = file_is_tar(ms, ubuf, nb)) != 0) { 222 if ((ms->flags & MAGIC_DEBUG) != 0) 223 (void)fprintf(stderr, "tar %d\n", m); 224 goto done; 225 } 226 227 /* Check if we have a CDF file */ 228 if ((ms->flags & MAGIC_NO_CHECK_CDF) == 0) 229 if ((m = file_trycdf(ms, fd, ubuf, nb)) != 0) { 230 if ((ms->flags & MAGIC_DEBUG) != 0) 231 (void)fprintf(stderr, "cdf %d\n", m); 232 goto done; 233 } 234 235 /* try soft magic tests */ 236 if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0) 237 if ((m = file_softmagic(ms, ubuf, nb, 0, NULL, BINTEST, 238 looks_text)) != 0) { 239 if ((ms->flags & MAGIC_DEBUG) != 0) 240 (void)fprintf(stderr, "softmagic %d\n", m); 241 #ifdef BUILTIN_ELF 242 if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && m == 1 && 243 nb > 5 && fd != -1) { 244 /* 245 * We matched something in the file, so this 246 * *might* be an ELF file, and the file is at 247 * least 5 bytes long, so if it's an ELF file 248 * it has at least one byte past the ELF magic 249 * number - try extracting information from the 250 * ELF headers that cannot easily * be 251 * extracted with rules in the magic file. 252 */ 253 if ((m = file_tryelf(ms, fd, ubuf, nb)) != 0) 254 if ((ms->flags & MAGIC_DEBUG) != 0) 255 (void)fprintf(stderr, 256 "elf %d\n", m); 257 } 258 #endif 259 goto done; 260 } 261 262 /* try text properties */ 263 if ((ms->flags & MAGIC_NO_CHECK_TEXT) == 0) { 264 265 if ((m = file_ascmagic(ms, ubuf, nb, looks_text)) != 0) { 266 if ((ms->flags & MAGIC_DEBUG) != 0) 267 (void)fprintf(stderr, "ascmagic %d\n", m); 268 goto done; 269 } 270 } 271 272 simple: 273 /* give up */ 274 m = 1; 275 if ((!mime || (mime & MAGIC_MIME_TYPE)) && 276 file_printf(ms, "%s", mime ? type : def) == -1) { 277 rv = -1; 278 } 279 done: 280 if ((ms->flags & MAGIC_MIME_ENCODING) != 0) { 281 if (ms->flags & MAGIC_MIME_TYPE) 282 if (file_printf(ms, "; charset=") == -1) 283 rv = -1; 284 if (file_printf(ms, "%s", code_mime) == -1) 285 rv = -1; 286 } 287 #if HAVE_FORK 288 done_encoding: 289 #endif 290 free(u8buf); 291 if (rv) 292 return rv; 293 294 return m; 295 } 296 #endif 297 298 protected int 299 file_reset(struct magic_set *ms) 300 { 301 if (ms->mlist[0] == NULL) { 302 file_error(ms, 0, "no magic files loaded"); 303 return -1; 304 } 305 if (ms->o.buf) { 306 free(ms->o.buf); 307 ms->o.buf = NULL; 308 } 309 if (ms->o.pbuf) { 310 free(ms->o.pbuf); 311 ms->o.pbuf = NULL; 312 } 313 ms->event_flags &= ~EVENT_HAD_ERR; 314 ms->error = -1; 315 return 0; 316 } 317 318 #define OCTALIFY(n, o) \ 319 /*LINTED*/ \ 320 (void)(*(n)++ = '\\', \ 321 *(n)++ = (((uint32_t)*(o) >> 6) & 3) + '0', \ 322 *(n)++ = (((uint32_t)*(o) >> 3) & 7) + '0', \ 323 *(n)++ = (((uint32_t)*(o) >> 0) & 7) + '0', \ 324 (o)++) 325 326 protected const char * 327 file_getbuffer(struct magic_set *ms) 328 { 329 char *pbuf, *op, *np; 330 size_t psize, len; 331 332 if (ms->event_flags & EVENT_HAD_ERR) 333 return NULL; 334 335 if (ms->flags & MAGIC_RAW) 336 return ms->o.buf; 337 338 if (ms->o.buf == NULL) 339 return NULL; 340 341 /* * 4 is for octal representation, + 1 is for NUL */ 342 len = strlen(ms->o.buf); 343 if (len > (SIZE_MAX - 1) / 4) { 344 file_oomem(ms, len); 345 return NULL; 346 } 347 psize = len * 4 + 1; 348 if ((pbuf = CAST(char *, realloc(ms->o.pbuf, psize))) == NULL) { 349 file_oomem(ms, psize); 350 return NULL; 351 } 352 ms->o.pbuf = pbuf; 353 354 #if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) 355 { 356 mbstate_t state; 357 wchar_t nextchar; 358 int mb_conv = 1; 359 size_t bytesconsumed; 360 char *eop; 361 (void)memset(&state, 0, sizeof(mbstate_t)); 362 363 np = ms->o.pbuf; 364 op = ms->o.buf; 365 eop = op + len; 366 367 while (op < eop) { 368 bytesconsumed = mbrtowc(&nextchar, op, 369 (size_t)(eop - op), &state); 370 if (bytesconsumed == (size_t)(-1) || 371 bytesconsumed == (size_t)(-2)) { 372 mb_conv = 0; 373 break; 374 } 375 376 if (iswprint(nextchar)) { 377 (void)memcpy(np, op, bytesconsumed); 378 op += bytesconsumed; 379 np += bytesconsumed; 380 } else { 381 while (bytesconsumed-- > 0) 382 OCTALIFY(np, op); 383 } 384 } 385 *np = '\0'; 386 387 /* Parsing succeeded as a multi-byte sequence */ 388 if (mb_conv != 0) 389 return ms->o.pbuf; 390 } 391 #endif 392 393 for (np = ms->o.pbuf, op = ms->o.buf; *op;) { 394 if (isprint((unsigned char)*op)) { 395 *np++ = *op++; 396 } else { 397 OCTALIFY(np, op); 398 } 399 } 400 *np = '\0'; 401 return ms->o.pbuf; 402 } 403 404 protected int 405 file_check_mem(struct magic_set *ms, unsigned int level) 406 { 407 size_t len; 408 409 if (level >= ms->c.len) { 410 len = (ms->c.len += 20) * sizeof(*ms->c.li); 411 ms->c.li = CAST(struct level_info *, (ms->c.li == NULL) ? 412 malloc(len) : 413 realloc(ms->c.li, len)); 414 if (ms->c.li == NULL) { 415 file_oomem(ms, len); 416 return -1; 417 } 418 } 419 ms->c.li[level].got_match = 0; 420 #ifdef ENABLE_CONDITIONALS 421 ms->c.li[level].last_match = 0; 422 ms->c.li[level].last_cond = COND_NONE; 423 #endif /* ENABLE_CONDITIONALS */ 424 return 0; 425 } 426 427 protected size_t 428 file_printedlen(const struct magic_set *ms) 429 { 430 return ms->o.buf == NULL ? 0 : strlen(ms->o.buf); 431 } 432 433 protected int 434 file_replace(struct magic_set *ms, const char *pat, const char *rep) 435 { 436 file_regex_t rx; 437 int rc, rv = -1; 438 439 rc = file_regcomp(&rx, pat, REG_EXTENDED); 440 if (rc) { 441 file_regerror(&rx, rc, ms); 442 } else { 443 regmatch_t rm; 444 int nm = 0; 445 while (file_regexec(&rx, ms->o.buf, 1, &rm, 0) == 0) { 446 ms->o.buf[rm.rm_so] = '\0'; 447 if (file_printf(ms, "%s%s", rep, 448 rm.rm_eo != 0 ? ms->o.buf + rm.rm_eo : "") == -1) 449 goto out; 450 nm++; 451 } 452 rv = nm; 453 } 454 out: 455 file_regfree(&rx); 456 return rv; 457 } 458 459 protected int 460 file_regcomp(file_regex_t *rx, const char *pat, int flags) 461 { 462 #ifdef USE_C_LOCALE 463 rx->c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0); 464 assert(rx->c_lc_ctype != NULL); 465 rx->old_lc_ctype = uselocale(rx->c_lc_ctype); 466 assert(rx->old_lc_ctype != NULL); 467 #endif 468 rx->pat = pat; 469 470 return rx->rc = regcomp(&rx->rx, pat, flags); 471 } 472 473 protected int 474 file_regexec(file_regex_t *rx, const char *str, size_t nmatch, 475 regmatch_t* pmatch, int eflags) 476 { 477 assert(rx->rc == 0); 478 return regexec(&rx->rx, str, nmatch, pmatch, eflags); 479 } 480 481 protected void 482 file_regfree(file_regex_t *rx) 483 { 484 if (rx->rc == 0) 485 regfree(&rx->rx); 486 #ifdef USE_C_LOCALE 487 (void)uselocale(rx->old_lc_ctype); 488 freelocale(rx->c_lc_ctype); 489 #endif 490 } 491 492 protected void 493 file_regerror(file_regex_t *rx, int rc, struct magic_set *ms) 494 { 495 char errmsg[512]; 496 497 (void)regerror(rc, &rx->rx, errmsg, sizeof(errmsg)); 498 file_magerror(ms, "regex error %d for `%s', (%s)", rc, rx->pat, 499 errmsg); 500 } 501 502 protected file_pushbuf_t * 503 file_push_buffer(struct magic_set *ms) 504 { 505 file_pushbuf_t *pb; 506 507 if (ms->event_flags & EVENT_HAD_ERR) 508 return NULL; 509 510 if ((pb = (CAST(file_pushbuf_t *, malloc(sizeof(*pb))))) == NULL) 511 return NULL; 512 513 pb->buf = ms->o.buf; 514 pb->offset = ms->offset; 515 516 ms->o.buf = NULL; 517 ms->offset = 0; 518 519 return pb; 520 } 521 522 protected char * 523 file_pop_buffer(struct magic_set *ms, file_pushbuf_t *pb) 524 { 525 char *rbuf; 526 527 if (ms->event_flags & EVENT_HAD_ERR) { 528 free(pb->buf); 529 free(pb); 530 return NULL; 531 } 532 533 rbuf = ms->o.buf; 534 535 ms->o.buf = pb->buf; 536 ms->offset = pb->offset; 537 538 free(pb); 539 return rbuf; 540 } 541 542 /* 543 * convert string to ascii printable format. 544 */ 545 protected char * 546 file_printable(char *buf, size_t bufsiz, const char *str) 547 { 548 char *ptr, *eptr; 549 const unsigned char *s = (const unsigned char *)str; 550 551 for (ptr = buf, eptr = ptr + bufsiz - 1; ptr < eptr && *s; s++) { 552 if (isprint(*s)) { 553 *ptr++ = *s; 554 continue; 555 } 556 if (ptr >= eptr - 3) 557 break; 558 *ptr++ = '\\'; 559 *ptr++ = ((CAST(unsigned int, *s) >> 6) & 7) + '0'; 560 *ptr++ = ((CAST(unsigned int, *s) >> 3) & 7) + '0'; 561 *ptr++ = ((CAST(unsigned int, *s) >> 0) & 7) + '0'; 562 } 563 *ptr = '\0'; 564 return buf; 565 } 566