1 /* $NetBSD: funcs.c,v 1.8 2014/06/13 02:08:06 christos Exp $ */ 2 /* 3 * Copyright (c) Christos Zoulas 2003. 4 * All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice immediately at the beginning of the file, without modification, 11 * this list of conditions, and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 #include "file.h" 29 30 #ifndef lint 31 #if 0 32 FILE_RCSID("@(#)$File: funcs.c,v 1.72 2014/05/14 23:15:42 christos Exp $") 33 #else 34 __RCSID("$NetBSD: funcs.c,v 1.8 2014/06/13 02:08:06 christos Exp $"); 35 #endif 36 #endif /* lint */ 37 38 #include "magic.h" 39 #include <assert.h> 40 #include <stdarg.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <ctype.h> 44 #if defined(HAVE_WCHAR_H) 45 #include <wchar.h> 46 #endif 47 #if defined(HAVE_WCTYPE_H) 48 #include <wctype.h> 49 #endif 50 #if defined(HAVE_LIMITS_H) 51 #include <limits.h> 52 #endif 53 #if defined(HAVE_LOCALE_H) 54 #include <locale.h> 55 #endif 56 57 #ifndef SIZE_MAX 58 #define SIZE_MAX ((size_t)~0) 59 #endif 60 61 /* 62 * Like printf, only we append to a buffer. 63 */ 64 protected int 65 file_vprintf(struct magic_set *ms, const char *fmt, va_list ap) 66 { 67 int len; 68 char *buf, *newstr; 69 70 if (ms->event_flags & EVENT_HAD_ERR) 71 return 0; 72 len = vasprintf(&buf, fmt, ap); 73 if (len < 0) 74 goto out; 75 76 if (ms->o.buf != NULL) { 77 len = asprintf(&newstr, "%s%s", ms->o.buf, buf); 78 free(buf); 79 if (len < 0) 80 goto out; 81 free(ms->o.buf); 82 buf = newstr; 83 } 84 ms->o.buf = buf; 85 return 0; 86 out: 87 file_error(ms, errno, "vasprintf failed"); 88 return -1; 89 } 90 91 protected int 92 file_printf(struct magic_set *ms, const char *fmt, ...) 93 { 94 int rv; 95 va_list ap; 96 97 va_start(ap, fmt); 98 rv = file_vprintf(ms, fmt, ap); 99 va_end(ap); 100 return rv; 101 } 102 103 /* 104 * error - print best error message possible 105 */ 106 /*VARARGS*/ 107 __attribute__((__format__(__printf__, 3, 0))) 108 private void 109 file_error_core(struct magic_set *ms, int error, const char *f, va_list va, 110 size_t lineno) 111 { 112 /* Only the first error is ok */ 113 if (ms->event_flags & EVENT_HAD_ERR) 114 return; 115 if (lineno != 0) { 116 free(ms->o.buf); 117 ms->o.buf = NULL; 118 file_printf(ms, "line %" SIZE_T_FORMAT "u: ", lineno); 119 } 120 file_vprintf(ms, f, va); 121 if (error > 0) 122 file_printf(ms, " (%s)", strerror(error)); 123 ms->event_flags |= EVENT_HAD_ERR; 124 ms->error = error; 125 } 126 127 /*VARARGS*/ 128 protected void 129 file_error(struct magic_set *ms, int error, const char *f, ...) 130 { 131 va_list va; 132 va_start(va, f); 133 file_error_core(ms, error, f, va, 0); 134 va_end(va); 135 } 136 137 /* 138 * Print an error with magic line number. 139 */ 140 /*VARARGS*/ 141 protected void 142 file_magerror(struct magic_set *ms, const char *f, ...) 143 { 144 va_list va; 145 va_start(va, f); 146 file_error_core(ms, 0, f, va, ms->line); 147 va_end(va); 148 } 149 150 protected void 151 file_oomem(struct magic_set *ms, size_t len) 152 { 153 file_error(ms, errno, "cannot allocate %" SIZE_T_FORMAT "u bytes", 154 len); 155 } 156 157 protected void 158 file_badseek(struct magic_set *ms) 159 { 160 file_error(ms, errno, "error seeking"); 161 } 162 163 protected void 164 file_badread(struct magic_set *ms) 165 { 166 file_error(ms, errno, "error reading"); 167 } 168 169 #ifndef COMPILE_ONLY 170 /*ARGSUSED*/ 171 protected int 172 file_buffer(struct magic_set *ms, int fd, const char *inname __attribute__ ((__unused__)), 173 const void *buf, size_t nb) 174 { 175 int m = 0, rv = 0, looks_text = 0; 176 int mime = ms->flags & MAGIC_MIME; 177 const unsigned char *ubuf = CAST(const unsigned char *, buf); 178 unichar *u8buf = NULL; 179 size_t ulen; 180 const char *code = NULL; 181 const char *code_mime = "binary"; 182 const char *type = "application/octet-stream"; 183 const char *def = "data"; 184 const char *ftype = NULL; 185 186 if (nb == 0) { 187 def = "empty"; 188 type = "application/x-empty"; 189 goto simple; 190 } else if (nb == 1) { 191 def = "very short file (no magic)"; 192 goto simple; 193 } 194 195 if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) { 196 looks_text = file_encoding(ms, ubuf, nb, &u8buf, &ulen, 197 &code, &code_mime, &ftype); 198 } 199 200 #ifdef __EMX__ 201 if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) { 202 switch (file_os2_apptype(ms, inname, buf, nb)) { 203 case -1: 204 return -1; 205 case 0: 206 break; 207 default: 208 return 1; 209 } 210 } 211 #endif 212 #if HAVE_FORK 213 /* try compression stuff */ 214 if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) == 0) 215 if ((m = file_zmagic(ms, fd, inname, ubuf, nb)) != 0) { 216 if ((ms->flags & MAGIC_DEBUG) != 0) 217 (void)fprintf(stderr, "zmagic %d\n", m); 218 goto done_encoding; 219 } 220 #endif 221 /* Check if we have a tar file */ 222 if ((ms->flags & MAGIC_NO_CHECK_TAR) == 0) 223 if ((m = file_is_tar(ms, ubuf, nb)) != 0) { 224 if ((ms->flags & MAGIC_DEBUG) != 0) 225 (void)fprintf(stderr, "tar %d\n", m); 226 goto done; 227 } 228 229 /* Check if we have a CDF file */ 230 if ((ms->flags & MAGIC_NO_CHECK_CDF) == 0) 231 if ((m = file_trycdf(ms, fd, ubuf, nb)) != 0) { 232 if ((ms->flags & MAGIC_DEBUG) != 0) 233 (void)fprintf(stderr, "cdf %d\n", m); 234 goto done; 235 } 236 237 /* try soft magic tests */ 238 if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0) 239 if ((m = file_softmagic(ms, ubuf, nb, 0, BINTEST, 240 looks_text)) != 0) { 241 if ((ms->flags & MAGIC_DEBUG) != 0) 242 (void)fprintf(stderr, "softmagic %d\n", m); 243 #ifdef BUILTIN_ELF 244 if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && m == 1 && 245 nb > 5 && fd != -1) { 246 /* 247 * We matched something in the file, so this 248 * *might* be an ELF file, and the file is at 249 * least 5 bytes long, so if it's an ELF file 250 * it has at least one byte past the ELF magic 251 * number - try extracting information from the 252 * ELF headers that cannot easily * be 253 * extracted with rules in the magic file. 254 */ 255 if ((m = file_tryelf(ms, fd, ubuf, nb)) != 0) 256 if ((ms->flags & MAGIC_DEBUG) != 0) 257 (void)fprintf(stderr, 258 "elf %d\n", m); 259 } 260 #endif 261 goto done; 262 } 263 264 /* try text properties */ 265 if ((ms->flags & MAGIC_NO_CHECK_TEXT) == 0) { 266 267 if ((m = file_ascmagic(ms, ubuf, nb, looks_text)) != 0) { 268 if ((ms->flags & MAGIC_DEBUG) != 0) 269 (void)fprintf(stderr, "ascmagic %d\n", m); 270 goto done; 271 } 272 } 273 274 simple: 275 /* give up */ 276 m = 1; 277 if ((!mime || (mime & MAGIC_MIME_TYPE)) && 278 file_printf(ms, "%s", mime ? type : def) == -1) { 279 rv = -1; 280 } 281 done: 282 if ((ms->flags & MAGIC_MIME_ENCODING) != 0) { 283 if (ms->flags & MAGIC_MIME_TYPE) 284 if (file_printf(ms, "; charset=") == -1) 285 rv = -1; 286 if (file_printf(ms, "%s", code_mime) == -1) 287 rv = -1; 288 } 289 #if HAVE_FORK 290 done_encoding: 291 #endif 292 free(u8buf); 293 if (rv) 294 return rv; 295 296 return m; 297 } 298 #endif 299 300 protected int 301 file_reset(struct magic_set *ms) 302 { 303 if (ms->mlist[0] == NULL) { 304 file_error(ms, 0, "no magic files loaded"); 305 return -1; 306 } 307 if (ms->o.buf) { 308 free(ms->o.buf); 309 ms->o.buf = NULL; 310 } 311 if (ms->o.pbuf) { 312 free(ms->o.pbuf); 313 ms->o.pbuf = NULL; 314 } 315 ms->event_flags &= ~EVENT_HAD_ERR; 316 ms->error = -1; 317 return 0; 318 } 319 320 #define OCTALIFY(n, o) \ 321 /*LINTED*/ \ 322 (void)(*(n)++ = '\\', \ 323 *(n)++ = (((uint32_t)*(o) >> 6) & 3) + '0', \ 324 *(n)++ = (((uint32_t)*(o) >> 3) & 7) + '0', \ 325 *(n)++ = (((uint32_t)*(o) >> 0) & 7) + '0', \ 326 (o)++) 327 328 protected const char * 329 file_getbuffer(struct magic_set *ms) 330 { 331 char *pbuf, *op, *np; 332 size_t psize, len; 333 334 if (ms->event_flags & EVENT_HAD_ERR) 335 return NULL; 336 337 if (ms->flags & MAGIC_RAW) 338 return ms->o.buf; 339 340 if (ms->o.buf == NULL) 341 return NULL; 342 343 /* * 4 is for octal representation, + 1 is for NUL */ 344 len = strlen(ms->o.buf); 345 if (len > (SIZE_MAX - 1) / 4) { 346 file_oomem(ms, len); 347 return NULL; 348 } 349 psize = len * 4 + 1; 350 if ((pbuf = CAST(char *, realloc(ms->o.pbuf, psize))) == NULL) { 351 file_oomem(ms, psize); 352 return NULL; 353 } 354 ms->o.pbuf = pbuf; 355 356 #if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) 357 { 358 mbstate_t state; 359 wchar_t nextchar; 360 int mb_conv = 1; 361 size_t bytesconsumed; 362 char *eop; 363 (void)memset(&state, 0, sizeof(mbstate_t)); 364 365 np = ms->o.pbuf; 366 op = ms->o.buf; 367 eop = op + len; 368 369 while (op < eop) { 370 bytesconsumed = mbrtowc(&nextchar, op, 371 (size_t)(eop - op), &state); 372 if (bytesconsumed == (size_t)(-1) || 373 bytesconsumed == (size_t)(-2)) { 374 mb_conv = 0; 375 break; 376 } 377 378 if (iswprint(nextchar)) { 379 (void)memcpy(np, op, bytesconsumed); 380 op += bytesconsumed; 381 np += bytesconsumed; 382 } else { 383 while (bytesconsumed-- > 0) 384 OCTALIFY(np, op); 385 } 386 } 387 *np = '\0'; 388 389 /* Parsing succeeded as a multi-byte sequence */ 390 if (mb_conv != 0) 391 return ms->o.pbuf; 392 } 393 #endif 394 395 for (np = ms->o.pbuf, op = ms->o.buf; *op;) { 396 if (isprint((unsigned char)*op)) { 397 *np++ = *op++; 398 } else { 399 OCTALIFY(np, op); 400 } 401 } 402 *np = '\0'; 403 return ms->o.pbuf; 404 } 405 406 protected int 407 file_check_mem(struct magic_set *ms, unsigned int level) 408 { 409 size_t len; 410 411 if (level >= ms->c.len) { 412 len = (ms->c.len += 20) * sizeof(*ms->c.li); 413 ms->c.li = CAST(struct level_info *, (ms->c.li == NULL) ? 414 malloc(len) : 415 realloc(ms->c.li, len)); 416 if (ms->c.li == NULL) { 417 file_oomem(ms, len); 418 return -1; 419 } 420 } 421 ms->c.li[level].got_match = 0; 422 #ifdef ENABLE_CONDITIONALS 423 ms->c.li[level].last_match = 0; 424 ms->c.li[level].last_cond = COND_NONE; 425 #endif /* ENABLE_CONDITIONALS */ 426 return 0; 427 } 428 429 protected size_t 430 file_printedlen(const struct magic_set *ms) 431 { 432 return ms->o.buf == NULL ? 0 : strlen(ms->o.buf); 433 } 434 435 protected int 436 file_replace(struct magic_set *ms, const char *pat, const char *rep) 437 { 438 file_regex_t rx; 439 int rc, rv = -1; 440 441 rc = file_regcomp(&rx, pat, REG_EXTENDED); 442 if (rc) { 443 file_regerror(&rx, rc, ms); 444 } else { 445 regmatch_t rm; 446 int nm = 0; 447 while (file_regexec(&rx, ms->o.buf, 1, &rm, 0) == 0) { 448 ms->o.buf[rm.rm_so] = '\0'; 449 if (file_printf(ms, "%s%s", rep, 450 rm.rm_eo != 0 ? ms->o.buf + rm.rm_eo : "") == -1) 451 goto out; 452 nm++; 453 } 454 rv = nm; 455 } 456 out: 457 file_regfree(&rx); 458 return rv; 459 } 460 461 protected int 462 file_regcomp(file_regex_t *rx, const char *pat, int flags) 463 { 464 rx->old_lc_ctype = setlocale(LC_CTYPE, NULL); 465 assert(rx->old_lc_ctype != NULL); 466 rx->old_lc_ctype = strdup(rx->old_lc_ctype); 467 assert(rx->old_lc_ctype != NULL); 468 rx->pat = pat; 469 470 (void)setlocale(LC_CTYPE, "C"); 471 return rx->rc = regcomp(&rx->rx, pat, flags); 472 } 473 474 protected int 475 file_regexec(file_regex_t *rx, const char *str, size_t nmatch, 476 regmatch_t* pmatch, int eflags) 477 { 478 assert(rx->rc == 0); 479 return regexec(&rx->rx, str, nmatch, pmatch, eflags); 480 } 481 482 protected void 483 file_regfree(file_regex_t *rx) 484 { 485 if (rx->rc == 0) 486 regfree(&rx->rx); 487 (void)setlocale(LC_CTYPE, rx->old_lc_ctype); 488 free(rx->old_lc_ctype); 489 } 490 491 protected void 492 file_regerror(file_regex_t *rx, int rc, struct magic_set *ms) 493 { 494 char errmsg[512]; 495 496 (void)regerror(rc, &rx->rx, errmsg, sizeof(errmsg)); 497 file_magerror(ms, "regex error %d for `%s', (%s)", rc, rx->pat, 498 errmsg); 499 } 500