1 /* $NetBSD: funcs.c,v 1.6 2013/12/01 19:32:15 christos Exp $ */ 2 3 /* 4 * Copyright (c) Christos Zoulas 2003. 5 * All Rights Reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice immediately at the beginning of the file, without modification, 12 * this list of conditions, and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 21 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 #include "file.h" 30 31 #ifndef lint 32 #if 0 33 FILE_RCSID("@(#)$File: funcs.c,v 1.64 2013/11/19 23:49:44 christos Exp $") 34 #else 35 __RCSID("$NetBSD: funcs.c,v 1.6 2013/12/01 19:32:15 christos Exp $"); 36 #endif 37 #endif /* lint */ 38 39 #include "magic.h" 40 #include <stdarg.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <ctype.h> 44 #if defined(HAVE_WCHAR_H) 45 #include <wchar.h> 46 #endif 47 #if defined(HAVE_WCTYPE_H) 48 #include <wctype.h> 49 #endif 50 #if defined(HAVE_LIMITS_H) 51 #include <limits.h> 52 #endif 53 54 #ifndef SIZE_MAX 55 #define SIZE_MAX ((size_t)~0) 56 #endif 57 58 /* 59 * Like printf, only we append to a buffer. 60 */ 61 protected int 62 file_vprintf(struct magic_set *ms, const char *fmt, va_list ap) 63 { 64 int len; 65 char *buf, *newstr; 66 67 if (ms->event_flags & EVENT_HAD_ERR) 68 return 0; 69 len = vasprintf(&buf, fmt, ap); 70 if (len < 0) 71 goto out; 72 73 if (ms->o.buf != NULL) { 74 len = asprintf(&newstr, "%s%s", ms->o.buf, buf); 75 free(buf); 76 if (len < 0) 77 goto out; 78 free(ms->o.buf); 79 buf = newstr; 80 } 81 ms->o.buf = buf; 82 return 0; 83 out: 84 file_error(ms, errno, "vasprintf failed"); 85 return -1; 86 } 87 88 protected int 89 file_printf(struct magic_set *ms, const char *fmt, ...) 90 { 91 int rv; 92 va_list ap; 93 94 va_start(ap, fmt); 95 rv = file_vprintf(ms, fmt, ap); 96 va_end(ap); 97 return rv; 98 } 99 100 /* 101 * error - print best error message possible 102 */ 103 /*VARARGS*/ 104 private void 105 file_error_core(struct magic_set *ms, int error, const char *f, va_list va, 106 size_t lineno) 107 { 108 /* Only the first error is ok */ 109 if (ms->event_flags & EVENT_HAD_ERR) 110 return; 111 if (lineno != 0) { 112 free(ms->o.buf); 113 ms->o.buf = NULL; 114 file_printf(ms, "line %" SIZE_T_FORMAT "u: ", lineno); 115 } 116 file_vprintf(ms, f, va); 117 if (error > 0) 118 file_printf(ms, " (%s)", strerror(error)); 119 ms->event_flags |= EVENT_HAD_ERR; 120 ms->error = error; 121 } 122 123 /*VARARGS*/ 124 protected void 125 file_error(struct magic_set *ms, int error, const char *f, ...) 126 { 127 va_list va; 128 va_start(va, f); 129 file_error_core(ms, error, f, va, 0); 130 va_end(va); 131 } 132 133 /* 134 * Print an error with magic line number. 135 */ 136 /*VARARGS*/ 137 protected void 138 file_magerror(struct magic_set *ms, const char *f, ...) 139 { 140 va_list va; 141 va_start(va, f); 142 file_error_core(ms, 0, f, va, ms->line); 143 va_end(va); 144 } 145 146 protected void 147 file_oomem(struct magic_set *ms, size_t len) 148 { 149 file_error(ms, errno, "cannot allocate %" SIZE_T_FORMAT "u bytes", 150 len); 151 } 152 153 protected void 154 file_badseek(struct magic_set *ms) 155 { 156 file_error(ms, errno, "error seeking"); 157 } 158 159 protected void 160 file_badread(struct magic_set *ms) 161 { 162 file_error(ms, errno, "error reading"); 163 } 164 165 #ifndef COMPILE_ONLY 166 /*ARGSUSED*/ 167 protected int 168 file_buffer(struct magic_set *ms, int fd, const char *inname __attribute__ ((__unused__)), 169 const void *buf, size_t nb) 170 { 171 int m = 0, rv = 0, looks_text = 0; 172 int mime = ms->flags & MAGIC_MIME; 173 const unsigned char *ubuf = CAST(const unsigned char *, buf); 174 unichar *u8buf = NULL; 175 size_t ulen; 176 const char *code = NULL; 177 const char *code_mime = "binary"; 178 const char *type = "application/octet-stream"; 179 const char *def = "data"; 180 181 182 183 if (nb == 0) { 184 def = "empty"; 185 type = "application/x-empty"; 186 goto simple; 187 } else if (nb == 1) { 188 def = "very short file (no magic)"; 189 goto simple; 190 } 191 192 if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) { 193 looks_text = file_encoding(ms, ubuf, nb, &u8buf, &ulen, 194 &code, &code_mime, &type); 195 } 196 197 #ifdef __EMX__ 198 if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) { 199 switch (file_os2_apptype(ms, inname, buf, nb)) { 200 case -1: 201 return -1; 202 case 0: 203 break; 204 default: 205 return 1; 206 } 207 } 208 #endif 209 #if HAVE_FORK 210 /* try compression stuff */ 211 if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) == 0) 212 if ((m = file_zmagic(ms, fd, inname, ubuf, nb)) != 0) { 213 if ((ms->flags & MAGIC_DEBUG) != 0) 214 (void)fprintf(stderr, "zmagic %d\n", m); 215 goto done_encoding; 216 } 217 #endif 218 /* Check if we have a tar file */ 219 if ((ms->flags & MAGIC_NO_CHECK_TAR) == 0) 220 if ((m = file_is_tar(ms, ubuf, nb)) != 0) { 221 if ((ms->flags & MAGIC_DEBUG) != 0) 222 (void)fprintf(stderr, "tar %d\n", m); 223 goto done; 224 } 225 226 /* Check if we have a CDF file */ 227 if ((ms->flags & MAGIC_NO_CHECK_CDF) == 0) 228 if ((m = file_trycdf(ms, fd, ubuf, nb)) != 0) { 229 if ((ms->flags & MAGIC_DEBUG) != 0) 230 (void)fprintf(stderr, "cdf %d\n", m); 231 goto done; 232 } 233 234 /* try soft magic tests */ 235 if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0) 236 if ((m = file_softmagic(ms, ubuf, nb, BINTEST, 237 looks_text)) != 0) { 238 if ((ms->flags & MAGIC_DEBUG) != 0) 239 (void)fprintf(stderr, "softmagic %d\n", m); 240 #ifdef BUILTIN_ELF 241 if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && m == 1 && 242 nb > 5 && fd != -1) { 243 /* 244 * We matched something in the file, so this 245 * *might* be an ELF file, and the file is at 246 * least 5 bytes long, so if it's an ELF file 247 * it has at least one byte past the ELF magic 248 * number - try extracting information from the 249 * ELF headers that cannot easily * be 250 * extracted with rules in the magic file. 251 */ 252 if ((m = file_tryelf(ms, fd, ubuf, nb)) != 0) 253 if ((ms->flags & MAGIC_DEBUG) != 0) 254 (void)fprintf(stderr, 255 "elf %d\n", m); 256 } 257 #endif 258 goto done; 259 } 260 261 /* try text properties */ 262 if ((ms->flags & MAGIC_NO_CHECK_TEXT) == 0) { 263 264 if ((m = file_ascmagic(ms, ubuf, nb, looks_text)) != 0) { 265 if ((ms->flags & MAGIC_DEBUG) != 0) 266 (void)fprintf(stderr, "ascmagic %d\n", m); 267 goto done; 268 } 269 270 /* try to discover text encoding */ 271 if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) { 272 if (looks_text == 0) 273 if ((m = file_ascmagic_with_encoding( ms, ubuf, 274 nb, u8buf, ulen, code, type, looks_text)) 275 != 0) { 276 if ((ms->flags & MAGIC_DEBUG) != 0) 277 (void)fprintf(stderr, 278 "ascmagic/enc %d\n", m); 279 goto done; 280 } 281 } 282 } 283 284 simple: 285 /* give up */ 286 m = 1; 287 if ((!mime || (mime & MAGIC_MIME_TYPE)) && 288 file_printf(ms, "%s", mime ? type : def) == -1) { 289 rv = -1; 290 } 291 done: 292 if ((ms->flags & MAGIC_MIME_ENCODING) != 0) { 293 if (ms->flags & MAGIC_MIME_TYPE) 294 if (file_printf(ms, "; charset=") == -1) 295 rv = -1; 296 if (file_printf(ms, "%s", code_mime) == -1) 297 rv = -1; 298 } 299 done_encoding: 300 free(u8buf); 301 if (rv) 302 return rv; 303 304 return m; 305 } 306 #endif 307 308 protected int 309 file_reset(struct magic_set *ms) 310 { 311 if (ms->mlist[0] == NULL) { 312 file_error(ms, 0, "no magic files loaded"); 313 return -1; 314 } 315 if (ms->o.buf) { 316 free(ms->o.buf); 317 ms->o.buf = NULL; 318 } 319 if (ms->o.pbuf) { 320 free(ms->o.pbuf); 321 ms->o.pbuf = NULL; 322 } 323 ms->event_flags &= ~EVENT_HAD_ERR; 324 ms->error = -1; 325 return 0; 326 } 327 328 #define OCTALIFY(n, o) \ 329 /*LINTED*/ \ 330 (void)(*(n)++ = '\\', \ 331 *(n)++ = (((uint32_t)*(o) >> 6) & 3) + '0', \ 332 *(n)++ = (((uint32_t)*(o) >> 3) & 7) + '0', \ 333 *(n)++ = (((uint32_t)*(o) >> 0) & 7) + '0', \ 334 (o)++) 335 336 protected const char * 337 file_getbuffer(struct magic_set *ms) 338 { 339 char *pbuf, *op, *np; 340 size_t psize, len; 341 342 if (ms->event_flags & EVENT_HAD_ERR) 343 return NULL; 344 345 if (ms->flags & MAGIC_RAW) 346 return ms->o.buf; 347 348 if (ms->o.buf == NULL) 349 return NULL; 350 351 /* * 4 is for octal representation, + 1 is for NUL */ 352 len = strlen(ms->o.buf); 353 if (len > (SIZE_MAX - 1) / 4) { 354 file_oomem(ms, len); 355 return NULL; 356 } 357 psize = len * 4 + 1; 358 if ((pbuf = CAST(char *, realloc(ms->o.pbuf, psize))) == NULL) { 359 file_oomem(ms, psize); 360 return NULL; 361 } 362 ms->o.pbuf = pbuf; 363 364 #if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) 365 { 366 mbstate_t state; 367 wchar_t nextchar; 368 int mb_conv = 1; 369 size_t bytesconsumed; 370 char *eop; 371 (void)memset(&state, 0, sizeof(mbstate_t)); 372 373 np = ms->o.pbuf; 374 op = ms->o.buf; 375 eop = op + len; 376 377 while (op < eop) { 378 bytesconsumed = mbrtowc(&nextchar, op, 379 (size_t)(eop - op), &state); 380 if (bytesconsumed == (size_t)(-1) || 381 bytesconsumed == (size_t)(-2)) { 382 mb_conv = 0; 383 break; 384 } 385 386 if (iswprint(nextchar)) { 387 (void)memcpy(np, op, bytesconsumed); 388 op += bytesconsumed; 389 np += bytesconsumed; 390 } else { 391 while (bytesconsumed-- > 0) 392 OCTALIFY(np, op); 393 } 394 } 395 *np = '\0'; 396 397 /* Parsing succeeded as a multi-byte sequence */ 398 if (mb_conv != 0) 399 return ms->o.pbuf; 400 } 401 #endif 402 403 for (np = ms->o.pbuf, op = ms->o.buf; *op;) { 404 if (isprint((unsigned char)*op)) { 405 *np++ = *op++; 406 } else { 407 OCTALIFY(np, op); 408 } 409 } 410 *np = '\0'; 411 return ms->o.pbuf; 412 } 413 414 protected int 415 file_check_mem(struct magic_set *ms, unsigned int level) 416 { 417 size_t len; 418 419 if (level >= ms->c.len) { 420 len = (ms->c.len += 20) * sizeof(*ms->c.li); 421 ms->c.li = CAST(struct level_info *, (ms->c.li == NULL) ? 422 malloc(len) : 423 realloc(ms->c.li, len)); 424 if (ms->c.li == NULL) { 425 file_oomem(ms, len); 426 return -1; 427 } 428 } 429 ms->c.li[level].got_match = 0; 430 #ifdef ENABLE_CONDITIONALS 431 ms->c.li[level].last_match = 0; 432 ms->c.li[level].last_cond = COND_NONE; 433 #endif /* ENABLE_CONDITIONALS */ 434 return 0; 435 } 436 437 protected size_t 438 file_printedlen(const struct magic_set *ms) 439 { 440 return ms->o.buf == NULL ? 0 : strlen(ms->o.buf); 441 } 442 443 protected int 444 file_replace(struct magic_set *ms, const char *pat, const char *rep) 445 { 446 regex_t rx; 447 int rc; 448 449 rc = regcomp(&rx, pat, REG_EXTENDED); 450 if (rc) { 451 char errmsg[512]; 452 (void)regerror(rc, &rx, errmsg, sizeof(errmsg)); 453 file_magerror(ms, "regex error %d, (%s)", rc, errmsg); 454 return -1; 455 } else { 456 regmatch_t rm; 457 int nm = 0; 458 while (regexec(&rx, ms->o.buf, 1, &rm, 0) == 0) { 459 ms->o.buf[rm.rm_so] = '\0'; 460 if (file_printf(ms, "%s%s", rep, 461 rm.rm_eo != 0 ? ms->o.buf + rm.rm_eo : "") == -1) 462 return -1; 463 nm++; 464 } 465 regfree(&rx); 466 return nm; 467 } 468 } 469