1 /* $NetBSD: funcs.c,v 1.9 2014/10/10 20:15:02 christos Exp $ */ 2 /* 3 * Copyright (c) Christos Zoulas 2003. 4 * All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice immediately at the beginning of the file, without modification, 11 * this list of conditions, and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 #include "file.h" 29 30 #ifndef lint 31 #if 0 32 FILE_RCSID("@(#)$File: funcs.c,v 1.73 2014/09/10 18:41:51 christos Exp $") 33 #else 34 __RCSID("$NetBSD: funcs.c,v 1.9 2014/10/10 20:15:02 christos Exp $"); 35 #endif 36 #endif /* lint */ 37 38 #include "magic.h" 39 #include <assert.h> 40 #include <stdarg.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <ctype.h> 44 #if defined(HAVE_WCHAR_H) 45 #include <wchar.h> 46 #endif 47 #if defined(HAVE_WCTYPE_H) 48 #include <wctype.h> 49 #endif 50 #if defined(HAVE_LIMITS_H) 51 #include <limits.h> 52 #endif 53 54 #ifndef SIZE_MAX 55 #define SIZE_MAX ((size_t)~0) 56 #endif 57 58 /* 59 * Like printf, only we append to a buffer. 60 */ 61 protected int 62 file_vprintf(struct magic_set *ms, const char *fmt, va_list ap) 63 { 64 int len; 65 char *buf, *newstr; 66 67 if (ms->event_flags & EVENT_HAD_ERR) 68 return 0; 69 len = vasprintf(&buf, fmt, ap); 70 if (len < 0) 71 goto out; 72 73 if (ms->o.buf != NULL) { 74 len = asprintf(&newstr, "%s%s", ms->o.buf, buf); 75 free(buf); 76 if (len < 0) 77 goto out; 78 free(ms->o.buf); 79 buf = newstr; 80 } 81 ms->o.buf = buf; 82 return 0; 83 out: 84 file_error(ms, errno, "vasprintf failed"); 85 return -1; 86 } 87 88 protected int 89 file_printf(struct magic_set *ms, const char *fmt, ...) 90 { 91 int rv; 92 va_list ap; 93 94 va_start(ap, fmt); 95 rv = file_vprintf(ms, fmt, ap); 96 va_end(ap); 97 return rv; 98 } 99 100 /* 101 * error - print best error message possible 102 */ 103 /*VARARGS*/ 104 __attribute__((__format__(__printf__, 3, 0))) 105 private void 106 file_error_core(struct magic_set *ms, int error, const char *f, va_list va, 107 size_t lineno) 108 { 109 /* Only the first error is ok */ 110 if (ms->event_flags & EVENT_HAD_ERR) 111 return; 112 if (lineno != 0) { 113 free(ms->o.buf); 114 ms->o.buf = NULL; 115 file_printf(ms, "line %" SIZE_T_FORMAT "u: ", lineno); 116 } 117 file_vprintf(ms, f, va); 118 if (error > 0) 119 file_printf(ms, " (%s)", strerror(error)); 120 ms->event_flags |= EVENT_HAD_ERR; 121 ms->error = error; 122 } 123 124 /*VARARGS*/ 125 protected void 126 file_error(struct magic_set *ms, int error, const char *f, ...) 127 { 128 va_list va; 129 va_start(va, f); 130 file_error_core(ms, error, f, va, 0); 131 va_end(va); 132 } 133 134 /* 135 * Print an error with magic line number. 136 */ 137 /*VARARGS*/ 138 protected void 139 file_magerror(struct magic_set *ms, const char *f, ...) 140 { 141 va_list va; 142 va_start(va, f); 143 file_error_core(ms, 0, f, va, ms->line); 144 va_end(va); 145 } 146 147 protected void 148 file_oomem(struct magic_set *ms, size_t len) 149 { 150 file_error(ms, errno, "cannot allocate %" SIZE_T_FORMAT "u bytes", 151 len); 152 } 153 154 protected void 155 file_badseek(struct magic_set *ms) 156 { 157 file_error(ms, errno, "error seeking"); 158 } 159 160 protected void 161 file_badread(struct magic_set *ms) 162 { 163 file_error(ms, errno, "error reading"); 164 } 165 166 #ifndef COMPILE_ONLY 167 /*ARGSUSED*/ 168 protected int 169 file_buffer(struct magic_set *ms, int fd, const char *inname __attribute__ ((__unused__)), 170 const void *buf, size_t nb) 171 { 172 int m = 0, rv = 0, looks_text = 0; 173 int mime = ms->flags & MAGIC_MIME; 174 const unsigned char *ubuf = CAST(const unsigned char *, buf); 175 unichar *u8buf = NULL; 176 size_t ulen; 177 const char *code = NULL; 178 const char *code_mime = "binary"; 179 const char *type = "application/octet-stream"; 180 const char *def = "data"; 181 const char *ftype = NULL; 182 183 if (nb == 0) { 184 def = "empty"; 185 type = "application/x-empty"; 186 goto simple; 187 } else if (nb == 1) { 188 def = "very short file (no magic)"; 189 goto simple; 190 } 191 192 if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) { 193 looks_text = file_encoding(ms, ubuf, nb, &u8buf, &ulen, 194 &code, &code_mime, &ftype); 195 } 196 197 #ifdef __EMX__ 198 if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) { 199 switch (file_os2_apptype(ms, inname, buf, nb)) { 200 case -1: 201 return -1; 202 case 0: 203 break; 204 default: 205 return 1; 206 } 207 } 208 #endif 209 #if HAVE_FORK 210 /* try compression stuff */ 211 if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) == 0) 212 if ((m = file_zmagic(ms, fd, inname, ubuf, nb)) != 0) { 213 if ((ms->flags & MAGIC_DEBUG) != 0) 214 (void)fprintf(stderr, "zmagic %d\n", m); 215 goto done_encoding; 216 } 217 #endif 218 /* Check if we have a tar file */ 219 if ((ms->flags & MAGIC_NO_CHECK_TAR) == 0) 220 if ((m = file_is_tar(ms, ubuf, nb)) != 0) { 221 if ((ms->flags & MAGIC_DEBUG) != 0) 222 (void)fprintf(stderr, "tar %d\n", m); 223 goto done; 224 } 225 226 /* Check if we have a CDF file */ 227 if ((ms->flags & MAGIC_NO_CHECK_CDF) == 0) 228 if ((m = file_trycdf(ms, fd, ubuf, nb)) != 0) { 229 if ((ms->flags & MAGIC_DEBUG) != 0) 230 (void)fprintf(stderr, "cdf %d\n", m); 231 goto done; 232 } 233 234 /* try soft magic tests */ 235 if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0) 236 if ((m = file_softmagic(ms, ubuf, nb, 0, BINTEST, 237 looks_text)) != 0) { 238 if ((ms->flags & MAGIC_DEBUG) != 0) 239 (void)fprintf(stderr, "softmagic %d\n", m); 240 #ifdef BUILTIN_ELF 241 if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && m == 1 && 242 nb > 5 && fd != -1) { 243 /* 244 * We matched something in the file, so this 245 * *might* be an ELF file, and the file is at 246 * least 5 bytes long, so if it's an ELF file 247 * it has at least one byte past the ELF magic 248 * number - try extracting information from the 249 * ELF headers that cannot easily * be 250 * extracted with rules in the magic file. 251 */ 252 if ((m = file_tryelf(ms, fd, ubuf, nb)) != 0) 253 if ((ms->flags & MAGIC_DEBUG) != 0) 254 (void)fprintf(stderr, 255 "elf %d\n", m); 256 } 257 #endif 258 goto done; 259 } 260 261 /* try text properties */ 262 if ((ms->flags & MAGIC_NO_CHECK_TEXT) == 0) { 263 264 if ((m = file_ascmagic(ms, ubuf, nb, looks_text)) != 0) { 265 if ((ms->flags & MAGIC_DEBUG) != 0) 266 (void)fprintf(stderr, "ascmagic %d\n", m); 267 goto done; 268 } 269 } 270 271 simple: 272 /* give up */ 273 m = 1; 274 if ((!mime || (mime & MAGIC_MIME_TYPE)) && 275 file_printf(ms, "%s", mime ? type : def) == -1) { 276 rv = -1; 277 } 278 done: 279 if ((ms->flags & MAGIC_MIME_ENCODING) != 0) { 280 if (ms->flags & MAGIC_MIME_TYPE) 281 if (file_printf(ms, "; charset=") == -1) 282 rv = -1; 283 if (file_printf(ms, "%s", code_mime) == -1) 284 rv = -1; 285 } 286 #if HAVE_FORK 287 done_encoding: 288 #endif 289 free(u8buf); 290 if (rv) 291 return rv; 292 293 return m; 294 } 295 #endif 296 297 protected int 298 file_reset(struct magic_set *ms) 299 { 300 if (ms->mlist[0] == NULL) { 301 file_error(ms, 0, "no magic files loaded"); 302 return -1; 303 } 304 if (ms->o.buf) { 305 free(ms->o.buf); 306 ms->o.buf = NULL; 307 } 308 if (ms->o.pbuf) { 309 free(ms->o.pbuf); 310 ms->o.pbuf = NULL; 311 } 312 ms->event_flags &= ~EVENT_HAD_ERR; 313 ms->error = -1; 314 return 0; 315 } 316 317 #define OCTALIFY(n, o) \ 318 /*LINTED*/ \ 319 (void)(*(n)++ = '\\', \ 320 *(n)++ = (((uint32_t)*(o) >> 6) & 3) + '0', \ 321 *(n)++ = (((uint32_t)*(o) >> 3) & 7) + '0', \ 322 *(n)++ = (((uint32_t)*(o) >> 0) & 7) + '0', \ 323 (o)++) 324 325 protected const char * 326 file_getbuffer(struct magic_set *ms) 327 { 328 char *pbuf, *op, *np; 329 size_t psize, len; 330 331 if (ms->event_flags & EVENT_HAD_ERR) 332 return NULL; 333 334 if (ms->flags & MAGIC_RAW) 335 return ms->o.buf; 336 337 if (ms->o.buf == NULL) 338 return NULL; 339 340 /* * 4 is for octal representation, + 1 is for NUL */ 341 len = strlen(ms->o.buf); 342 if (len > (SIZE_MAX - 1) / 4) { 343 file_oomem(ms, len); 344 return NULL; 345 } 346 psize = len * 4 + 1; 347 if ((pbuf = CAST(char *, realloc(ms->o.pbuf, psize))) == NULL) { 348 file_oomem(ms, psize); 349 return NULL; 350 } 351 ms->o.pbuf = pbuf; 352 353 #if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) 354 { 355 mbstate_t state; 356 wchar_t nextchar; 357 int mb_conv = 1; 358 size_t bytesconsumed; 359 char *eop; 360 (void)memset(&state, 0, sizeof(mbstate_t)); 361 362 np = ms->o.pbuf; 363 op = ms->o.buf; 364 eop = op + len; 365 366 while (op < eop) { 367 bytesconsumed = mbrtowc(&nextchar, op, 368 (size_t)(eop - op), &state); 369 if (bytesconsumed == (size_t)(-1) || 370 bytesconsumed == (size_t)(-2)) { 371 mb_conv = 0; 372 break; 373 } 374 375 if (iswprint(nextchar)) { 376 (void)memcpy(np, op, bytesconsumed); 377 op += bytesconsumed; 378 np += bytesconsumed; 379 } else { 380 while (bytesconsumed-- > 0) 381 OCTALIFY(np, op); 382 } 383 } 384 *np = '\0'; 385 386 /* Parsing succeeded as a multi-byte sequence */ 387 if (mb_conv != 0) 388 return ms->o.pbuf; 389 } 390 #endif 391 392 for (np = ms->o.pbuf, op = ms->o.buf; *op;) { 393 if (isprint((unsigned char)*op)) { 394 *np++ = *op++; 395 } else { 396 OCTALIFY(np, op); 397 } 398 } 399 *np = '\0'; 400 return ms->o.pbuf; 401 } 402 403 protected int 404 file_check_mem(struct magic_set *ms, unsigned int level) 405 { 406 size_t len; 407 408 if (level >= ms->c.len) { 409 len = (ms->c.len += 20) * sizeof(*ms->c.li); 410 ms->c.li = CAST(struct level_info *, (ms->c.li == NULL) ? 411 malloc(len) : 412 realloc(ms->c.li, len)); 413 if (ms->c.li == NULL) { 414 file_oomem(ms, len); 415 return -1; 416 } 417 } 418 ms->c.li[level].got_match = 0; 419 #ifdef ENABLE_CONDITIONALS 420 ms->c.li[level].last_match = 0; 421 ms->c.li[level].last_cond = COND_NONE; 422 #endif /* ENABLE_CONDITIONALS */ 423 return 0; 424 } 425 426 protected size_t 427 file_printedlen(const struct magic_set *ms) 428 { 429 return ms->o.buf == NULL ? 0 : strlen(ms->o.buf); 430 } 431 432 protected int 433 file_replace(struct magic_set *ms, const char *pat, const char *rep) 434 { 435 file_regex_t rx; 436 int rc, rv = -1; 437 438 rc = file_regcomp(&rx, pat, REG_EXTENDED); 439 if (rc) { 440 file_regerror(&rx, rc, ms); 441 } else { 442 regmatch_t rm; 443 int nm = 0; 444 while (file_regexec(&rx, ms->o.buf, 1, &rm, 0) == 0) { 445 ms->o.buf[rm.rm_so] = '\0'; 446 if (file_printf(ms, "%s%s", rep, 447 rm.rm_eo != 0 ? ms->o.buf + rm.rm_eo : "") == -1) 448 goto out; 449 nm++; 450 } 451 rv = nm; 452 } 453 out: 454 file_regfree(&rx); 455 return rv; 456 } 457 458 protected int 459 file_regcomp(file_regex_t *rx, const char *pat, int flags) 460 { 461 #ifdef USE_C_LOCALE 462 rx->c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0); 463 assert(rx->c_lc_ctype != NULL); 464 rx->old_lc_ctype = uselocale(rx->c_lc_ctype); 465 assert(rx->old_lc_ctype != NULL); 466 #endif 467 rx->pat = pat; 468 469 return rx->rc = regcomp(&rx->rx, pat, flags); 470 } 471 472 protected int 473 file_regexec(file_regex_t *rx, const char *str, size_t nmatch, 474 regmatch_t* pmatch, int eflags) 475 { 476 assert(rx->rc == 0); 477 return regexec(&rx->rx, str, nmatch, pmatch, eflags); 478 } 479 480 protected void 481 file_regfree(file_regex_t *rx) 482 { 483 if (rx->rc == 0) 484 regfree(&rx->rx); 485 #ifdef USE_C_LOCALE 486 (void)uselocale(rx->old_lc_ctype); 487 freelocale(rx->c_lc_ctype); 488 #endif 489 } 490 491 protected void 492 file_regerror(file_regex_t *rx, int rc, struct magic_set *ms) 493 { 494 char errmsg[512]; 495 496 (void)regerror(rc, &rx->rx, errmsg, sizeof(errmsg)); 497 file_magerror(ms, "regex error %d for `%s', (%s)", rc, rx->pat, 498 errmsg); 499 } 500