1 /* $NetBSD: funcs.c,v 1.2 2009/05/08 17:28:01 christos Exp $ */ 2 3 /* 4 * Copyright (c) Christos Zoulas 2003. 5 * All Rights Reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice immediately at the beginning of the file, without modification, 12 * this list of conditions, and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 21 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 #include "file.h" 30 31 #ifndef lint 32 #if 0 33 FILE_RCSID("@(#)$File: funcs.c,v 1.53 2009/04/07 11:07:00 christos Exp $") 34 #else 35 __RCSID("$NetBSD: funcs.c,v 1.2 2009/05/08 17:28:01 christos Exp $"); 36 #endif 37 #endif /* lint */ 38 39 #include "magic.h" 40 #include <stdarg.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <ctype.h> 44 #if defined(HAVE_WCHAR_H) 45 #include <wchar.h> 46 #endif 47 #if defined(HAVE_WCTYPE_H) 48 #include <wctype.h> 49 #endif 50 #if defined(HAVE_LIMITS_H) 51 #include <limits.h> 52 #endif 53 54 #ifndef SIZE_MAX 55 #define SIZE_MAX ((size_t)~0) 56 #endif 57 58 /* 59 * Like printf, only we append to a buffer. 60 */ 61 protected int 62 file_vprintf(struct magic_set *ms, const char *fmt, va_list ap) 63 { 64 int len; 65 char *buf, *newstr; 66 67 len = vasprintf(&buf, fmt, ap); 68 if (len < 0) 69 goto out; 70 71 if (ms->o.buf != NULL) { 72 len = asprintf(&newstr, "%s%s", ms->o.buf, buf); 73 free(buf); 74 if (len < 0) 75 goto out; 76 free(ms->o.buf); 77 buf = newstr; 78 } 79 ms->o.buf = buf; 80 return 0; 81 out: 82 file_error(ms, errno, "vasprintf failed"); 83 return -1; 84 } 85 86 protected int 87 file_printf(struct magic_set *ms, const char *fmt, ...) 88 { 89 int rv; 90 va_list ap; 91 92 va_start(ap, fmt); 93 rv = file_vprintf(ms, fmt, ap); 94 va_end(ap); 95 return rv; 96 } 97 98 /* 99 * error - print best error message possible 100 */ 101 /*VARARGS*/ 102 private void 103 file_error_core(struct magic_set *ms, int error, const char *f, va_list va, 104 size_t lineno) 105 { 106 /* Only the first error is ok */ 107 if (ms->event_flags & EVENT_HAD_ERR) 108 return; 109 if (lineno != 0) { 110 free(ms->o.buf); 111 ms->o.buf = NULL; 112 file_printf(ms, "line %zu: ", lineno); 113 } 114 file_vprintf(ms, f, va); 115 if (error > 0) 116 file_printf(ms, " (%s)", strerror(error)); 117 ms->event_flags |= EVENT_HAD_ERR; 118 ms->error = error; 119 } 120 121 /*VARARGS*/ 122 protected void 123 file_error(struct magic_set *ms, int error, const char *f, ...) 124 { 125 va_list va; 126 va_start(va, f); 127 file_error_core(ms, error, f, va, 0); 128 va_end(va); 129 } 130 131 /* 132 * Print an error with magic line number. 133 */ 134 /*VARARGS*/ 135 protected void 136 file_magerror(struct magic_set *ms, const char *f, ...) 137 { 138 va_list va; 139 va_start(va, f); 140 file_error_core(ms, 0, f, va, ms->line); 141 va_end(va); 142 } 143 144 protected void 145 file_oomem(struct magic_set *ms, size_t len) 146 { 147 file_error(ms, errno, "cannot allocate %zu bytes", len); 148 } 149 150 protected void 151 file_badseek(struct magic_set *ms) 152 { 153 file_error(ms, errno, "error seeking"); 154 } 155 156 protected void 157 file_badread(struct magic_set *ms) 158 { 159 file_error(ms, errno, "error reading"); 160 } 161 162 #ifndef COMPILE_ONLY 163 protected int 164 file_buffer(struct magic_set *ms, int fd, const char *inname, const void *buf, 165 size_t nb) 166 { 167 int m = 0, rv = 0, looks_text = 0; 168 int mime = ms->flags & MAGIC_MIME; 169 const unsigned char *ubuf = CAST(const unsigned char *, buf); 170 unichar *u8buf = NULL; 171 size_t ulen; 172 const char *code = NULL; 173 const char *code_mime = "binary"; 174 const char *type = NULL; 175 176 177 178 if (nb == 0) { 179 if ((!mime || (mime & MAGIC_MIME_TYPE)) && 180 file_printf(ms, mime ? "application/x-empty" : 181 "empty") == -1) 182 return -1; 183 return 1; 184 } else if (nb == 1) { 185 if ((!mime || (mime & MAGIC_MIME_TYPE)) && 186 file_printf(ms, mime ? "application/octet-stream" : 187 "very short file (no magic)") == -1) 188 return -1; 189 return 1; 190 } 191 192 if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) { 193 looks_text = file_encoding(ms, ubuf, nb, &u8buf, &ulen, 194 &code, &code_mime, &type); 195 } 196 197 #ifdef __EMX__ 198 if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) { 199 switch (file_os2_apptype(ms, inname, buf, nb)) { 200 case -1: 201 return -1; 202 case 0: 203 break; 204 default: 205 return 1; 206 } 207 } 208 #endif 209 210 /* try compression stuff */ 211 if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) == 0) 212 if ((m = file_zmagic(ms, fd, inname, ubuf, nb)) != 0) { 213 if ((ms->flags & MAGIC_DEBUG) != 0) 214 (void)fprintf(stderr, "zmagic %d\n", m); 215 goto done; 216 } 217 218 /* Check if we have a tar file */ 219 if ((ms->flags & MAGIC_NO_CHECK_TAR) == 0) 220 if ((m = file_is_tar(ms, ubuf, nb)) != 0) { 221 if ((ms->flags & MAGIC_DEBUG) != 0) 222 (void)fprintf(stderr, "tar %d\n", m); 223 goto done; 224 } 225 226 /* Check if we have a CDF file */ 227 if ((ms->flags & MAGIC_NO_CHECK_CDF) == 0) 228 if ((m = file_trycdf(ms, fd, ubuf, nb)) != 0) { 229 if ((ms->flags & MAGIC_DEBUG) != 0) 230 (void)fprintf(stderr, "cdf %d\n", m); 231 goto done; 232 } 233 234 /* try soft magic tests */ 235 if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0) 236 if ((m = file_softmagic(ms, ubuf, nb, BINTEST)) != 0) { 237 if ((ms->flags & MAGIC_DEBUG) != 0) 238 (void)fprintf(stderr, "softmagic %d\n", m); 239 #ifdef BUILTIN_ELF 240 if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && m == 1 && 241 nb > 5 && fd != -1) { 242 /* 243 * We matched something in the file, so this 244 * *might* be an ELF file, and the file is at 245 * least 5 bytes long, so if it's an ELF file 246 * it has at least one byte past the ELF magic 247 * number - try extracting information from the 248 * ELF headers that cannot easily * be 249 * extracted with rules in the magic file. 250 */ 251 if ((m = file_tryelf(ms, fd, ubuf, nb)) != 0) 252 if ((ms->flags & MAGIC_DEBUG) != 0) 253 (void)fprintf(stderr, 254 "elf %d\n", m); 255 } 256 #endif 257 goto done; 258 } 259 260 /* try text properties (and possibly text tokens) */ 261 if ((ms->flags & MAGIC_NO_CHECK_TEXT) == 0) { 262 263 if ((m = file_ascmagic(ms, ubuf, nb)) != 0) { 264 if ((ms->flags & MAGIC_DEBUG) != 0) 265 (void)fprintf(stderr, "ascmagic %d\n", m); 266 goto done; 267 } 268 269 /* try to discover text encoding */ 270 if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) { 271 if (looks_text == 0) 272 if ((m = file_ascmagic_with_encoding( ms, ubuf, 273 nb, u8buf, ulen, code, type)) != 0) { 274 if ((ms->flags & MAGIC_DEBUG) != 0) 275 (void)fprintf(stderr, 276 "ascmagic/enc %d\n", m); 277 goto done; 278 } 279 } 280 } 281 282 /* give up */ 283 m = 1; 284 if ((!mime || (mime & MAGIC_MIME_TYPE)) && 285 file_printf(ms, mime ? "application/octet-stream" : "data") == -1) { 286 rv = -1; 287 } 288 done: 289 if ((ms->flags & MAGIC_MIME_ENCODING) != 0) { 290 if (ms->flags & MAGIC_MIME_TYPE) 291 if (file_printf(ms, "; charset=") == -1) 292 rv = -1; 293 if (file_printf(ms, "%s", code_mime) == -1) 294 rv = -1; 295 } 296 if (u8buf) 297 free(u8buf); 298 if (rv) 299 return rv; 300 301 return m; 302 } 303 #endif 304 305 protected int 306 file_reset(struct magic_set *ms) 307 { 308 if (ms->mlist == NULL) { 309 file_error(ms, 0, "no magic files loaded"); 310 return -1; 311 } 312 if (ms->o.buf) { 313 free(ms->o.buf); 314 ms->o.buf = NULL; 315 } 316 if (ms->o.pbuf) { 317 free(ms->o.pbuf); 318 ms->o.pbuf = NULL; 319 } 320 ms->event_flags &= ~EVENT_HAD_ERR; 321 ms->error = -1; 322 return 0; 323 } 324 325 #define OCTALIFY(n, o) \ 326 /*LINTED*/ \ 327 (void)(*(n)++ = '\\', \ 328 *(n)++ = (((uint32_t)*(o) >> 6) & 3) + '0', \ 329 *(n)++ = (((uint32_t)*(o) >> 3) & 7) + '0', \ 330 *(n)++ = (((uint32_t)*(o) >> 0) & 7) + '0', \ 331 (o)++) 332 333 protected const char * 334 file_getbuffer(struct magic_set *ms) 335 { 336 char *pbuf, *op, *np; 337 size_t psize, len; 338 339 if (ms->event_flags & EVENT_HAD_ERR) 340 return NULL; 341 342 if (ms->flags & MAGIC_RAW) 343 return ms->o.buf; 344 345 if (ms->o.buf == NULL) 346 return NULL; 347 348 /* * 4 is for octal representation, + 1 is for NUL */ 349 len = strlen(ms->o.buf); 350 if (len > (SIZE_MAX - 1) / 4) { 351 file_oomem(ms, len); 352 return NULL; 353 } 354 psize = len * 4 + 1; 355 if ((pbuf = CAST(char *, realloc(ms->o.pbuf, psize))) == NULL) { 356 file_oomem(ms, psize); 357 return NULL; 358 } 359 ms->o.pbuf = pbuf; 360 361 #if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) 362 { 363 mbstate_t state; 364 wchar_t nextchar; 365 int mb_conv = 1; 366 size_t bytesconsumed; 367 char *eop; 368 (void)memset(&state, 0, sizeof(mbstate_t)); 369 370 np = ms->o.pbuf; 371 op = ms->o.buf; 372 eop = op + len; 373 374 while (op < eop) { 375 bytesconsumed = mbrtowc(&nextchar, op, 376 (size_t)(eop - op), &state); 377 if (bytesconsumed == (size_t)(-1) || 378 bytesconsumed == (size_t)(-2)) { 379 mb_conv = 0; 380 break; 381 } 382 383 if (iswprint(nextchar)) { 384 (void)memcpy(np, op, bytesconsumed); 385 op += bytesconsumed; 386 np += bytesconsumed; 387 } else { 388 while (bytesconsumed-- > 0) 389 OCTALIFY(np, op); 390 } 391 } 392 *np = '\0'; 393 394 /* Parsing succeeded as a multi-byte sequence */ 395 if (mb_conv != 0) 396 return ms->o.pbuf; 397 } 398 #endif 399 400 for (np = ms->o.pbuf, op = ms->o.buf; *op; op++) { 401 if (isprint((unsigned char)*op)) { 402 *np++ = *op; 403 } else { 404 OCTALIFY(np, op); 405 } 406 } 407 *np = '\0'; 408 return ms->o.pbuf; 409 } 410 411 protected int 412 file_check_mem(struct magic_set *ms, unsigned int level) 413 { 414 size_t len; 415 416 if (level >= ms->c.len) { 417 len = (ms->c.len += 20) * sizeof(*ms->c.li); 418 ms->c.li = CAST(struct level_info *, (ms->c.li == NULL) ? 419 malloc(len) : 420 realloc(ms->c.li, len)); 421 if (ms->c.li == NULL) { 422 file_oomem(ms, len); 423 return -1; 424 } 425 } 426 ms->c.li[level].got_match = 0; 427 #ifdef ENABLE_CONDITIONALS 428 ms->c.li[level].last_match = 0; 429 ms->c.li[level].last_cond = COND_NONE; 430 #endif /* ENABLE_CONDITIONALS */ 431 return 0; 432 } 433