1 /* @(#)apprentice.c 1.13 09/07/11 joerg */ 2 #ifndef lint 3 static const char sccsid[] = 4 "@(#)apprentice.c 1.13 09/07/11 joerg"; 5 #endif 6 /* 7 ** find file types by using a modified "magic" file 8 ** 9 ** based on file v3.22 by Ian F. Darwin (see below) 10 ** 11 ** Modified for mkhybrid James Pearson 19/5/98 12 */ 13 14 /* 15 * apprentice - make one pass through /etc/magic, learning its secrets. 16 * 17 * Copyright (c) Ian F. Darwin, 1987. 18 * Written by Ian F. Darwin. 19 * 20 * This software is not subject to any export provision of the United States 21 * Department of Commerce, and may be exported to any country or planet. 22 * 23 * Redistribution and use in source and binary forms, with or without 24 * modification, are permitted provided that the following conditions 25 * are met: 26 * 1. Redistributions of source code must retain the above copyright 27 * notice immediately at the beginning of the file, without modification, 28 * this list of conditions, and the following disclaimer. 29 * 2. Redistributions in binary form must reproduce the above copyright 30 * notice, this list of conditions and the following disclaimer in the 31 * documentation and/or other materials provided with the distribution. 32 * 33 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 34 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 35 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 36 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 37 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 38 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 39 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 41 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 42 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 43 * SUCH DAMAGE. 44 */ 45 46 #include <stdio.h> 47 #include <stdlib.h> 48 #include <string.h> 49 #include <ctype.h> 50 #include "proto.h" 51 #include "file.h" 52 53 #ifndef lint 54 static const char moduleid[] = 55 "@(#)Id: apprentice.c,v 1.25 1997/01/15 17:23:24 christos Exp"; 56 #endif /* lint */ 57 58 int __f_nmagic = 0; /* number of valid magic[]s */ 59 #if defined(IS_MACOS_X) 60 /* 61 * The MAC OS X linker does not grok "common" varaibles. 62 * Make __f_magic a "data" variable. 63 */ 64 struct magic *__f_magic = 0; /* array of magic entries */ 65 #else 66 struct magic *__f_magic; /* array of magic entries */ 67 #endif 68 69 #define EATAB {while (isascii((unsigned char) *l) && \ 70 isspace((unsigned char) *l)) ++l;} 71 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \ 72 tolower((unsigned char) (l)) : (l)) 73 74 75 static int getvalue (struct magic *, char **); 76 static int hextoint (int); 77 static char *apgetstr (char *, char *, int, int *); 78 static int parse (char *, int *, int); 79 static void eatsize (char **); 80 81 static int maxmagic = 0; 82 83 static int apprentice_1 (char *, int); 84 85 /* 86 * init_magic - read magic file and set up mapping 87 * based on the original apprentice() 88 */ 89 int 90 init_magic( 91 char *fn /* list of magic files */ 92 ) 93 { 94 maxmagic = MAXMAGIS; 95 __f_magic = (struct magic *) calloc(sizeof(struct magic), maxmagic); 96 if (__f_magic == NULL) 97 return -1; 98 99 return(apprentice_1(fn, 0)); 100 } 101 102 static int 103 apprentice_1( 104 char *fn, /* name of magic file */ 105 int check /* non-zero? checking-only run. */ 106 ) 107 { 108 static const char hdr[] = 109 "cont\toffset\ttype\topcode\tmask\tvalue\tdesc"; 110 FILE *f; 111 char line[BUFSIZ+1]; 112 int errs = 0; 113 int lineno; 114 115 f = fopen(fn, "r"); 116 if (f==NULL) { 117 return -1; 118 } 119 120 /* parse it */ 121 if (check) /* print silly verbose header for USG compat. */ 122 (void) printf("%s\n", hdr); 123 124 for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) { 125 if (line[0]=='#') /* comment, do not parse */ 126 continue; 127 if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */ 128 continue; 129 line[strlen(line)-1] = '\0'; /* delete newline */ 130 if (parse(line, &__f_nmagic, check) != 0) 131 errs = 1; 132 } 133 134 (void) fclose(f); 135 return errs; 136 } 137 138 /* 139 * extend the sign bit if the comparison is to be signed 140 * XXX is uint32 really a good idea XXX JS 141 */ 142 UInt32_t 143 signextend(struct magic *m, UInt32_t v) 144 { 145 if (!(m->flag & UNSIGNED)) 146 switch(m->type) { 147 /* 148 * Do not remove the casts below. They are 149 * vital. When later compared with the data, 150 * the sign extension must have happened. 151 */ 152 case BYTE: 153 v = (char) v; 154 break; 155 case SHORT: 156 case BESHORT: 157 case LESHORT: 158 v = (short) v; 159 break; 160 case DATE: 161 case BEDATE: 162 case LEDATE: 163 case LONG: 164 case BELONG: 165 case LELONG: 166 v = (Int32_t) v; 167 break; 168 case STRING: 169 break; 170 default: 171 return -1; 172 } 173 return v; 174 } 175 176 /* 177 * parse one line from magic file, put into magic[index++] if valid 178 */ 179 static int 180 parse(char *l, int *ndx, int check) 181 { 182 int i = 0, nd = *ndx; 183 struct magic *m; 184 char *t, *s; 185 186 #define ALLOC_INCR 20 187 if (nd+1 >= maxmagic){ 188 maxmagic += ALLOC_INCR; 189 if ((__f_magic = (struct magic *) realloc(__f_magic, 190 sizeof(struct magic) * 191 maxmagic)) == NULL) { 192 #ifdef MAIN 193 (void) fprintf(stderr, "%s: Out of memory.\n", progname); 194 #else 195 (void) fprintf(stderr, "libfile: Out of memory.\n"); 196 #endif 197 if (check) 198 return -1; 199 else 200 exit(1); 201 } 202 memset(&__f_magic[*ndx], 0, sizeof(struct magic) * ALLOC_INCR); 203 } 204 m = &__f_magic[*ndx]; 205 m->flag = 0; 206 m->cont_level = 0; 207 208 while (*l == '>') { 209 ++l; /* step over */ 210 m->cont_level++; 211 } 212 213 if (m->cont_level != 0 && *l == '(') { 214 ++l; /* step over */ 215 m->flag |= INDIR; 216 } 217 if (m->cont_level != 0 && *l == '&') { 218 ++l; /* step over */ 219 m->flag |= ADD; 220 } 221 222 /* get offset, then skip over it */ 223 m->offset = (int) strtoul(l,&t,0); 224 /* 225 if (l == t) 226 magwarn("offset %s invalid", l); 227 */ 228 l = t; 229 230 if (m->flag & INDIR) { 231 m->in.type = LONG; 232 m->in.offset = 0; 233 /* 234 * read [.lbs][+-]nnnnn) 235 */ 236 if (*l == '.') { 237 l++; 238 switch (LOWCASE(*l)) { 239 case 'l': 240 m->in.type = LONG; 241 break; 242 case 'h': 243 case 's': 244 m->in.type = SHORT; 245 break; 246 case 'c': 247 case 'b': 248 m->in.type = BYTE; 249 break; 250 default: 251 break; 252 } 253 l++; 254 } 255 s = l; 256 if (*l == '+' || *l == '-') l++; 257 if (isdigit((unsigned char)*l)) { 258 m->in.offset = strtoul(l, &t, 0); 259 if (*s == '-') m->in.offset = - m->in.offset; 260 } 261 else 262 t = l; 263 /* 264 if (*t++ != ')') 265 magwarn("missing ')' in indirect offset"); 266 */ 267 l = t; 268 } 269 270 271 while (isascii((unsigned char)*l) && isdigit((unsigned char)*l)) 272 ++l; 273 EATAB; 274 275 #define NBYTE 4 276 #define NSHORT 5 277 #define NLONG 4 278 #define NSTRING 6 279 #define NDATE 4 280 #define NBESHORT 7 281 #define NBELONG 6 282 #define NBEDATE 6 283 #define NLESHORT 7 284 #define NLELONG 6 285 #define NLEDATE 6 286 287 if (*l == 'u') { 288 ++l; 289 m->flag |= UNSIGNED; 290 } 291 292 /* get type, skip it */ 293 if (strncmp(l, "byte", NBYTE)==0) { 294 m->type = BYTE; 295 l += NBYTE; 296 } else if (strncmp(l, "short", NSHORT)==0) { 297 m->type = SHORT; 298 l += NSHORT; 299 } else if (strncmp(l, "long", NLONG)==0) { 300 m->type = LONG; 301 l += NLONG; 302 } else if (strncmp(l, "string", NSTRING)==0) { 303 m->type = STRING; 304 l += NSTRING; 305 } else if (strncmp(l, "date", NDATE)==0) { 306 m->type = DATE; 307 l += NDATE; 308 } else if (strncmp(l, "beshort", NBESHORT)==0) { 309 m->type = BESHORT; 310 l += NBESHORT; 311 } else if (strncmp(l, "belong", NBELONG)==0) { 312 m->type = BELONG; 313 l += NBELONG; 314 } else if (strncmp(l, "bedate", NBEDATE)==0) { 315 m->type = BEDATE; 316 l += NBEDATE; 317 } else if (strncmp(l, "leshort", NLESHORT)==0) { 318 m->type = LESHORT; 319 l += NLESHORT; 320 } else if (strncmp(l, "lelong", NLELONG)==0) { 321 m->type = LELONG; 322 l += NLELONG; 323 } else if (strncmp(l, "ledate", NLEDATE)==0) { 324 m->type = LEDATE; 325 l += NLEDATE; 326 } else { 327 return -1; 328 } 329 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ 330 if (*l == '&') { 331 ++l; 332 m->mask = signextend(m, (UInt32_t)strtoul(l, &l, 0)); /* XXX JS uint32 cat may be wrong */ 333 eatsize(&l); 334 } else 335 m->mask = ~0L; 336 EATAB; 337 338 switch (*l) { 339 case '>': 340 case '<': 341 /* Old-style anding: "0 byte &0x80 dynamically linked" */ 342 case '&': 343 case '^': 344 case '=': 345 m->reln = *l; 346 ++l; 347 break; 348 case '!': 349 if (m->type != STRING) { 350 m->reln = *l; 351 ++l; 352 break; 353 } 354 /* FALL THROUGH */ 355 default: 356 if (*l == 'x' && isascii((unsigned char)l[1]) && 357 isspace((unsigned char)l[1])) { 358 m->reln = *l; 359 ++l; 360 goto GetDesc; /* Bill The Cat */ 361 } 362 m->reln = '='; 363 break; 364 } 365 EATAB; 366 367 if (getvalue(m, &l)) 368 return -1; 369 /* 370 * TODO finish this macro and start using it! 371 * #define offsetcheck {if (offset > HOWMANY-1) 372 * magwarn("offset too big"); } 373 */ 374 375 /* 376 * now get last part - the description 377 */ 378 GetDesc: 379 EATAB; 380 if (l[0] == '\b') { 381 ++l; 382 m->nospflag = 1; 383 } else if ((l[0] == '\\') && (l[1] == 'b')) { 384 ++l; 385 ++l; 386 m->nospflag = 1; 387 } else 388 m->nospflag = 0; 389 while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC) 390 /* NULLBODY */; 391 392 ++(*ndx); /* make room for next */ 393 return 0; 394 } 395 396 /* 397 * Read a numeric value from a pointer, into the value union of a magic 398 * pointer, according to the magic type. Update the string pointer to point 399 * just after the number read. Return 0 for success, non-zero for failure. 400 */ 401 static int 402 getvalue(struct magic *m, char **p) 403 { 404 int slen; 405 406 if (m->type == STRING) { 407 *p = apgetstr(*p, m->value.s, sizeof(m->value.s), &slen); 408 m->vallen = slen; 409 } else 410 if (m->reln != 'x') { 411 m->value.l = signextend(m, (UInt32_t)strtoul(*p, p, 0)); /* XXX JS uint32 cat may be wrong */ 412 eatsize(p); 413 } 414 return 0; 415 } 416 417 /* 418 * Convert a string containing C character escapes. Stop at an unescaped 419 * space or tab. 420 * Copy the converted version to "p", returning its length in *slen. 421 * Return updated scan pointer as function result. 422 */ 423 static char * 424 apgetstr(char *s, char *p, int plen, int *slen) 425 { 426 char *origs = s, *origp = p; 427 char *pmax = p + plen - 1; 428 register int c; 429 register int val; 430 431 while ((c = *s++) != '\0') { 432 if (isspace((unsigned char) c)) 433 break; 434 if (p >= pmax) { 435 fprintf(stderr, "String too long: %s\n", origs); 436 break; 437 } 438 if(c == '\\') { 439 switch(c = *s++) { 440 441 case '\0': 442 goto out; 443 444 default: 445 *p++ = (char) c; 446 break; 447 448 case 'n': 449 *p++ = '\n'; 450 break; 451 452 case 'r': 453 *p++ = '\r'; 454 break; 455 456 case 'b': 457 *p++ = '\b'; 458 break; 459 460 case 't': 461 *p++ = '\t'; 462 break; 463 464 case 'f': 465 *p++ = '\f'; 466 break; 467 468 case 'v': 469 *p++ = '\v'; 470 break; 471 472 /* \ and up to 3 octal digits */ 473 case '0': 474 case '1': 475 case '2': 476 case '3': 477 case '4': 478 case '5': 479 case '6': 480 case '7': 481 val = c - '0'; 482 c = *s++; /* try for 2 */ 483 if(c >= '0' && c <= '7') { 484 val = (val<<3) | (c - '0'); 485 c = *s++; /* try for 3 */ 486 if(c >= '0' && c <= '7') 487 val = (val<<3) | (c-'0'); 488 else 489 --s; 490 } 491 else 492 --s; 493 *p++ = (char)val; 494 break; 495 496 /* \x and up to 2 hex digits */ 497 case 'x': 498 val = 'x'; /* Default if no digits */ 499 c = hextoint(*s++); /* Get next char */ 500 if (c >= 0) { 501 val = c; 502 c = hextoint(*s++); 503 if (c >= 0) 504 val = (val << 4) + c; 505 else 506 --s; 507 } else 508 --s; 509 *p++ = (char)val; 510 break; 511 } 512 } else 513 *p++ = (char)c; 514 } 515 out: 516 *p = '\0'; 517 *slen = p - origp; 518 return s; 519 } 520 521 522 /* Single hex char to int; -1 if not a hex char. */ 523 static int 524 hextoint(int c) 525 { 526 if (!isascii((unsigned char) c)) return -1; 527 if (isdigit((unsigned char) c)) return c - '0'; 528 if ((c>='a')&&(c<='f')) return c + 10 - 'a'; 529 if ((c>='A')&&(c<='F')) return c + 10 - 'A'; 530 return -1; 531 } 532 533 534 /* 535 * Print a string containing C character escapes. 536 */ 537 void 538 showstr(FILE *fp, const char *s, int len) 539 { 540 register char c; 541 542 for (;;) { 543 c = *s++; 544 if (len == -1) { 545 if (c == '\0') 546 break; 547 } 548 else { 549 if (len-- == 0) 550 break; 551 } 552 if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */ 553 (void) fputc(c, fp); 554 else { 555 (void) fputc('\\', fp); 556 switch (c) { 557 558 case '\n': 559 (void) fputc('n', fp); 560 break; 561 562 case '\r': 563 (void) fputc('r', fp); 564 break; 565 566 case '\b': 567 (void) fputc('b', fp); 568 break; 569 570 case '\t': 571 (void) fputc('t', fp); 572 break; 573 574 case '\f': 575 (void) fputc('f', fp); 576 break; 577 578 case '\v': 579 (void) fputc('v', fp); 580 break; 581 582 default: 583 (void) fprintf(fp, "%.3o", c & 0377); 584 break; 585 } 586 } 587 } 588 } 589 590 /* 591 * eatsize(): Eat the size spec from a number [eg. 10UL] 592 */ 593 static void 594 eatsize(char **p) 595 { 596 char *l = *p; 597 598 if (LOWCASE(*l) == 'u') 599 l++; 600 601 switch (LOWCASE(*l)) { 602 case 'l': /* long */ 603 case 's': /* short */ 604 case 'h': /* short */ 605 case 'b': /* char/byte */ 606 case 'c': /* char/byte */ 607 l++; 608 /*FALLTHROUGH*/ 609 default: 610 break; 611 } 612 613 *p = l; 614 } 615