1 /* $NetBSD: funcs.c,v 1.7 2014/01/07 02:12:07 joerg Exp $ */ 2 3 /* 4 * Copyright (c) Christos Zoulas 2003. 5 * All Rights Reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice immediately at the beginning of the file, without modification, 12 * this list of conditions, and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 21 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 #include "file.h" 30 31 #ifndef lint 32 #if 0 33 FILE_RCSID("@(#)$File: funcs.c,v 1.64 2013/11/19 23:49:44 christos Exp $") 34 #else 35 __RCSID("$NetBSD: funcs.c,v 1.7 2014/01/07 02:12:07 joerg Exp $"); 36 #endif 37 #endif /* lint */ 38 39 #include "magic.h" 40 #include <stdarg.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <ctype.h> 44 #if defined(HAVE_WCHAR_H) 45 #include <wchar.h> 46 #endif 47 #if defined(HAVE_WCTYPE_H) 48 #include <wctype.h> 49 #endif 50 #if defined(HAVE_LIMITS_H) 51 #include <limits.h> 52 #endif 53 54 #ifndef SIZE_MAX 55 #define SIZE_MAX ((size_t)~0) 56 #endif 57 58 /* 59 * Like printf, only we append to a buffer. 60 */ 61 protected int 62 file_vprintf(struct magic_set *ms, const char *fmt, va_list ap) 63 { 64 int len; 65 char *buf, *newstr; 66 67 if (ms->event_flags & EVENT_HAD_ERR) 68 return 0; 69 len = vasprintf(&buf, fmt, ap); 70 if (len < 0) 71 goto out; 72 73 if (ms->o.buf != NULL) { 74 len = asprintf(&newstr, "%s%s", ms->o.buf, buf); 75 free(buf); 76 if (len < 0) 77 goto out; 78 free(ms->o.buf); 79 buf = newstr; 80 } 81 ms->o.buf = buf; 82 return 0; 83 out: 84 file_error(ms, errno, "vasprintf failed"); 85 return -1; 86 } 87 88 protected int 89 file_printf(struct magic_set *ms, const char *fmt, ...) 90 { 91 int rv; 92 va_list ap; 93 94 va_start(ap, fmt); 95 rv = file_vprintf(ms, fmt, ap); 96 va_end(ap); 97 return rv; 98 } 99 100 /* 101 * error - print best error message possible 102 */ 103 /*VARARGS*/ 104 __attribute__((__format__(__printf__, 3, 0))) 105 private void 106 file_error_core(struct magic_set *ms, int error, const char *f, va_list va, 107 size_t lineno) 108 { 109 /* Only the first error is ok */ 110 if (ms->event_flags & EVENT_HAD_ERR) 111 return; 112 if (lineno != 0) { 113 free(ms->o.buf); 114 ms->o.buf = NULL; 115 file_printf(ms, "line %" SIZE_T_FORMAT "u: ", lineno); 116 } 117 file_vprintf(ms, f, va); 118 if (error > 0) 119 file_printf(ms, " (%s)", strerror(error)); 120 ms->event_flags |= EVENT_HAD_ERR; 121 ms->error = error; 122 } 123 124 /*VARARGS*/ 125 protected void 126 file_error(struct magic_set *ms, int error, const char *f, ...) 127 { 128 va_list va; 129 va_start(va, f); 130 file_error_core(ms, error, f, va, 0); 131 va_end(va); 132 } 133 134 /* 135 * Print an error with magic line number. 136 */ 137 /*VARARGS*/ 138 protected void 139 file_magerror(struct magic_set *ms, const char *f, ...) 140 { 141 va_list va; 142 va_start(va, f); 143 file_error_core(ms, 0, f, va, ms->line); 144 va_end(va); 145 } 146 147 protected void 148 file_oomem(struct magic_set *ms, size_t len) 149 { 150 file_error(ms, errno, "cannot allocate %" SIZE_T_FORMAT "u bytes", 151 len); 152 } 153 154 protected void 155 file_badseek(struct magic_set *ms) 156 { 157 file_error(ms, errno, "error seeking"); 158 } 159 160 protected void 161 file_badread(struct magic_set *ms) 162 { 163 file_error(ms, errno, "error reading"); 164 } 165 166 #ifndef COMPILE_ONLY 167 /*ARGSUSED*/ 168 protected int 169 file_buffer(struct magic_set *ms, int fd, const char *inname __attribute__ ((__unused__)), 170 const void *buf, size_t nb) 171 { 172 int m = 0, rv = 0, looks_text = 0; 173 int mime = ms->flags & MAGIC_MIME; 174 const unsigned char *ubuf = CAST(const unsigned char *, buf); 175 unichar *u8buf = NULL; 176 size_t ulen; 177 const char *code = NULL; 178 const char *code_mime = "binary"; 179 const char *type = "application/octet-stream"; 180 const char *def = "data"; 181 182 183 184 if (nb == 0) { 185 def = "empty"; 186 type = "application/x-empty"; 187 goto simple; 188 } else if (nb == 1) { 189 def = "very short file (no magic)"; 190 goto simple; 191 } 192 193 if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) { 194 looks_text = file_encoding(ms, ubuf, nb, &u8buf, &ulen, 195 &code, &code_mime, &type); 196 } 197 198 #ifdef __EMX__ 199 if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) { 200 switch (file_os2_apptype(ms, inname, buf, nb)) { 201 case -1: 202 return -1; 203 case 0: 204 break; 205 default: 206 return 1; 207 } 208 } 209 #endif 210 #if HAVE_FORK 211 /* try compression stuff */ 212 if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) == 0) 213 if ((m = file_zmagic(ms, fd, inname, ubuf, nb)) != 0) { 214 if ((ms->flags & MAGIC_DEBUG) != 0) 215 (void)fprintf(stderr, "zmagic %d\n", m); 216 goto done_encoding; 217 } 218 #endif 219 /* Check if we have a tar file */ 220 if ((ms->flags & MAGIC_NO_CHECK_TAR) == 0) 221 if ((m = file_is_tar(ms, ubuf, nb)) != 0) { 222 if ((ms->flags & MAGIC_DEBUG) != 0) 223 (void)fprintf(stderr, "tar %d\n", m); 224 goto done; 225 } 226 227 /* Check if we have a CDF file */ 228 if ((ms->flags & MAGIC_NO_CHECK_CDF) == 0) 229 if ((m = file_trycdf(ms, fd, ubuf, nb)) != 0) { 230 if ((ms->flags & MAGIC_DEBUG) != 0) 231 (void)fprintf(stderr, "cdf %d\n", m); 232 goto done; 233 } 234 235 /* try soft magic tests */ 236 if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0) 237 if ((m = file_softmagic(ms, ubuf, nb, BINTEST, 238 looks_text)) != 0) { 239 if ((ms->flags & MAGIC_DEBUG) != 0) 240 (void)fprintf(stderr, "softmagic %d\n", m); 241 #ifdef BUILTIN_ELF 242 if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && m == 1 && 243 nb > 5 && fd != -1) { 244 /* 245 * We matched something in the file, so this 246 * *might* be an ELF file, and the file is at 247 * least 5 bytes long, so if it's an ELF file 248 * it has at least one byte past the ELF magic 249 * number - try extracting information from the 250 * ELF headers that cannot easily * be 251 * extracted with rules in the magic file. 252 */ 253 if ((m = file_tryelf(ms, fd, ubuf, nb)) != 0) 254 if ((ms->flags & MAGIC_DEBUG) != 0) 255 (void)fprintf(stderr, 256 "elf %d\n", m); 257 } 258 #endif 259 goto done; 260 } 261 262 /* try text properties */ 263 if ((ms->flags & MAGIC_NO_CHECK_TEXT) == 0) { 264 265 if ((m = file_ascmagic(ms, ubuf, nb, looks_text)) != 0) { 266 if ((ms->flags & MAGIC_DEBUG) != 0) 267 (void)fprintf(stderr, "ascmagic %d\n", m); 268 goto done; 269 } 270 271 /* try to discover text encoding */ 272 if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) { 273 if (looks_text == 0) 274 if ((m = file_ascmagic_with_encoding( ms, ubuf, 275 nb, u8buf, ulen, code, type, looks_text)) 276 != 0) { 277 if ((ms->flags & MAGIC_DEBUG) != 0) 278 (void)fprintf(stderr, 279 "ascmagic/enc %d\n", m); 280 goto done; 281 } 282 } 283 } 284 285 simple: 286 /* give up */ 287 m = 1; 288 if ((!mime || (mime & MAGIC_MIME_TYPE)) && 289 file_printf(ms, "%s", mime ? type : def) == -1) { 290 rv = -1; 291 } 292 done: 293 if ((ms->flags & MAGIC_MIME_ENCODING) != 0) { 294 if (ms->flags & MAGIC_MIME_TYPE) 295 if (file_printf(ms, "; charset=") == -1) 296 rv = -1; 297 if (file_printf(ms, "%s", code_mime) == -1) 298 rv = -1; 299 } 300 done_encoding: 301 free(u8buf); 302 if (rv) 303 return rv; 304 305 return m; 306 } 307 #endif 308 309 protected int 310 file_reset(struct magic_set *ms) 311 { 312 if (ms->mlist[0] == NULL) { 313 file_error(ms, 0, "no magic files loaded"); 314 return -1; 315 } 316 if (ms->o.buf) { 317 free(ms->o.buf); 318 ms->o.buf = NULL; 319 } 320 if (ms->o.pbuf) { 321 free(ms->o.pbuf); 322 ms->o.pbuf = NULL; 323 } 324 ms->event_flags &= ~EVENT_HAD_ERR; 325 ms->error = -1; 326 return 0; 327 } 328 329 #define OCTALIFY(n, o) \ 330 /*LINTED*/ \ 331 (void)(*(n)++ = '\\', \ 332 *(n)++ = (((uint32_t)*(o) >> 6) & 3) + '0', \ 333 *(n)++ = (((uint32_t)*(o) >> 3) & 7) + '0', \ 334 *(n)++ = (((uint32_t)*(o) >> 0) & 7) + '0', \ 335 (o)++) 336 337 protected const char * 338 file_getbuffer(struct magic_set *ms) 339 { 340 char *pbuf, *op, *np; 341 size_t psize, len; 342 343 if (ms->event_flags & EVENT_HAD_ERR) 344 return NULL; 345 346 if (ms->flags & MAGIC_RAW) 347 return ms->o.buf; 348 349 if (ms->o.buf == NULL) 350 return NULL; 351 352 /* * 4 is for octal representation, + 1 is for NUL */ 353 len = strlen(ms->o.buf); 354 if (len > (SIZE_MAX - 1) / 4) { 355 file_oomem(ms, len); 356 return NULL; 357 } 358 psize = len * 4 + 1; 359 if ((pbuf = CAST(char *, realloc(ms->o.pbuf, psize))) == NULL) { 360 file_oomem(ms, psize); 361 return NULL; 362 } 363 ms->o.pbuf = pbuf; 364 365 #if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) 366 { 367 mbstate_t state; 368 wchar_t nextchar; 369 int mb_conv = 1; 370 size_t bytesconsumed; 371 char *eop; 372 (void)memset(&state, 0, sizeof(mbstate_t)); 373 374 np = ms->o.pbuf; 375 op = ms->o.buf; 376 eop = op + len; 377 378 while (op < eop) { 379 bytesconsumed = mbrtowc(&nextchar, op, 380 (size_t)(eop - op), &state); 381 if (bytesconsumed == (size_t)(-1) || 382 bytesconsumed == (size_t)(-2)) { 383 mb_conv = 0; 384 break; 385 } 386 387 if (iswprint(nextchar)) { 388 (void)memcpy(np, op, bytesconsumed); 389 op += bytesconsumed; 390 np += bytesconsumed; 391 } else { 392 while (bytesconsumed-- > 0) 393 OCTALIFY(np, op); 394 } 395 } 396 *np = '\0'; 397 398 /* Parsing succeeded as a multi-byte sequence */ 399 if (mb_conv != 0) 400 return ms->o.pbuf; 401 } 402 #endif 403 404 for (np = ms->o.pbuf, op = ms->o.buf; *op;) { 405 if (isprint((unsigned char)*op)) { 406 *np++ = *op++; 407 } else { 408 OCTALIFY(np, op); 409 } 410 } 411 *np = '\0'; 412 return ms->o.pbuf; 413 } 414 415 protected int 416 file_check_mem(struct magic_set *ms, unsigned int level) 417 { 418 size_t len; 419 420 if (level >= ms->c.len) { 421 len = (ms->c.len += 20) * sizeof(*ms->c.li); 422 ms->c.li = CAST(struct level_info *, (ms->c.li == NULL) ? 423 malloc(len) : 424 realloc(ms->c.li, len)); 425 if (ms->c.li == NULL) { 426 file_oomem(ms, len); 427 return -1; 428 } 429 } 430 ms->c.li[level].got_match = 0; 431 #ifdef ENABLE_CONDITIONALS 432 ms->c.li[level].last_match = 0; 433 ms->c.li[level].last_cond = COND_NONE; 434 #endif /* ENABLE_CONDITIONALS */ 435 return 0; 436 } 437 438 protected size_t 439 file_printedlen(const struct magic_set *ms) 440 { 441 return ms->o.buf == NULL ? 0 : strlen(ms->o.buf); 442 } 443 444 protected int 445 file_replace(struct magic_set *ms, const char *pat, const char *rep) 446 { 447 regex_t rx; 448 int rc; 449 450 rc = regcomp(&rx, pat, REG_EXTENDED); 451 if (rc) { 452 char errmsg[512]; 453 (void)regerror(rc, &rx, errmsg, sizeof(errmsg)); 454 file_magerror(ms, "regex error %d, (%s)", rc, errmsg); 455 return -1; 456 } else { 457 regmatch_t rm; 458 int nm = 0; 459 while (regexec(&rx, ms->o.buf, 1, &rm, 0) == 0) { 460 ms->o.buf[rm.rm_so] = '\0'; 461 if (file_printf(ms, "%s%s", rep, 462 rm.rm_eo != 0 ? ms->o.buf + rm.rm_eo : "") == -1) 463 return -1; 464 nm++; 465 } 466 regfree(&rx); 467 return nm; 468 } 469 } 470