1*4887Schin /*********************************************************************** 2*4887Schin * * 3*4887Schin * This software is part of the ast package * 4*4887Schin * Copyright (c) 1985-2007 AT&T Knowledge Ventures * 5*4887Schin * and is licensed under the * 6*4887Schin * Common Public License, Version 1.0 * 7*4887Schin * by AT&T Knowledge Ventures * 8*4887Schin * * 9*4887Schin * A copy of the License is available at * 10*4887Schin * http://www.opensource.org/licenses/cpl1.0.txt * 11*4887Schin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12*4887Schin * * 13*4887Schin * Information and Software Systems Research * 14*4887Schin * AT&T Research * 15*4887Schin * Florham Park NJ * 16*4887Schin * * 17*4887Schin * Glenn Fowler <gsf@research.att.com> * 18*4887Schin * David Korn <dgk@research.att.com> * 19*4887Schin * Phong Vo <kpv@research.att.com> * 20*4887Schin * * 21*4887Schin ***********************************************************************/ 22*4887Schin #pragma prototyped 23*4887Schin /* 24*4887Schin * Glenn Fowler 25*4887Schin * AT&T Research 26*4887Schin * 27*4887Schin * library interface to file 28*4887Schin * 29*4887Schin * the sum of the hacks {s5,v10,planix} is _____ than the parts 30*4887Schin */ 31*4887Schin 32*4887Schin static const char id[] = "\n@(#)$Id: magic library (AT&T Research) 2007-01-08 $\0\n"; 33*4887Schin 34*4887Schin static const char lib[] = "libast:magic"; 35*4887Schin 36*4887Schin #include <ast.h> 37*4887Schin #include <ctype.h> 38*4887Schin #include <ccode.h> 39*4887Schin #include <dt.h> 40*4887Schin #include <modex.h> 41*4887Schin #include <error.h> 42*4887Schin #include <regex.h> 43*4887Schin #include <swap.h> 44*4887Schin 45*4887Schin #define T(m) (*m?ERROR_translate(NiL,NiL,lib,m):m) 46*4887Schin 47*4887Schin #define match(s,p) strgrpmatch(s,p,NiL,0,STR_LEFT|STR_RIGHT|STR_ICASE) 48*4887Schin 49*4887Schin #define MAXNEST 10 /* { ... } nesting limit */ 50*4887Schin #define MINITEM 4 /* magic buffer rounding */ 51*4887Schin 52*4887Schin typedef struct /* identifier dictionary entry */ 53*4887Schin { 54*4887Schin const char name[16]; /* identifier name */ 55*4887Schin int value; /* identifier value */ 56*4887Schin Dtlink_t link; /* dictionary link */ 57*4887Schin } Info_t; 58*4887Schin 59*4887Schin typedef struct Edit /* edit substitution */ 60*4887Schin { 61*4887Schin struct Edit* next; /* next in list */ 62*4887Schin regex_t* from; /* from pattern */ 63*4887Schin } Edit_t; 64*4887Schin 65*4887Schin struct Entry; 66*4887Schin 67*4887Schin typedef struct /* loop info */ 68*4887Schin { 69*4887Schin struct Entry* lab; /* call this function */ 70*4887Schin int start; /* start here */ 71*4887Schin int size; /* increment by this amount */ 72*4887Schin int count; /* dynamic loop count */ 73*4887Schin int offset; /* dynamic offset */ 74*4887Schin } Loop_t; 75*4887Schin 76*4887Schin typedef struct Entry /* magic file entry */ 77*4887Schin { 78*4887Schin struct Entry* next; /* next in list */ 79*4887Schin char* expr; /* offset expression */ 80*4887Schin union 81*4887Schin { 82*4887Schin unsigned long num; 83*4887Schin char* str; 84*4887Schin struct Entry* lab; 85*4887Schin regex_t* sub; 86*4887Schin Loop_t* loop; 87*4887Schin } value; /* comparison value */ 88*4887Schin char* desc; /* file description */ 89*4887Schin char* mime; /* file mime type */ 90*4887Schin unsigned long offset; /* offset in bytes */ 91*4887Schin unsigned long mask; /* mask before compare */ 92*4887Schin char cont; /* continuation operation */ 93*4887Schin char type; /* datum type */ 94*4887Schin char op; /* comparison operation */ 95*4887Schin char nest; /* { or } nesting operation */ 96*4887Schin char swap; /* forced swap order */ 97*4887Schin } Entry_t; 98*4887Schin 99*4887Schin #define CC_BIT 5 100*4887Schin 101*4887Schin #if (CC_MAPS*CC_BIT) <= (CHAR_BIT*2) 102*4887Schin typedef unsigned short Cctype_t; 103*4887Schin #else 104*4887Schin typedef unsigned long Cctype_t; 105*4887Schin #endif 106*4887Schin 107*4887Schin #define CC_text 0x01 108*4887Schin #define CC_control 0x02 109*4887Schin #define CC_latin 0x04 110*4887Schin #define CC_binary 0x08 111*4887Schin #define CC_utf_8 0x10 112*4887Schin 113*4887Schin #define CC_notext CC_text /* CC_text is flipped before checking */ 114*4887Schin 115*4887Schin #define CC_MASK (CC_binary|CC_latin|CC_control|CC_text) 116*4887Schin 117*4887Schin #define CCTYPE(c) (((c)>0240)?CC_binary:((c)>=0200)?CC_latin:((c)<040&&(c)!=007&&(c)!=011&&(c)!=012&&(c)!=013&&(c)!=015)?CC_control:CC_text) 118*4887Schin 119*4887Schin #define ID_NONE 0 120*4887Schin #define ID_ASM 1 121*4887Schin #define ID_C 2 122*4887Schin #define ID_COBOL 3 123*4887Schin #define ID_COPYBOOK 4 124*4887Schin #define ID_CPLUSPLUS 5 125*4887Schin #define ID_FORTRAN 6 126*4887Schin #define ID_HTML 7 127*4887Schin #define ID_INCL1 8 128*4887Schin #define ID_INCL2 9 129*4887Schin #define ID_INCL3 10 130*4887Schin #define ID_MAM1 11 131*4887Schin #define ID_MAM2 12 132*4887Schin #define ID_MAM3 13 133*4887Schin #define ID_NOTEXT 14 134*4887Schin #define ID_PL1 15 135*4887Schin #define ID_YACC 16 136*4887Schin 137*4887Schin #define ID_MAX ID_YACC 138*4887Schin 139*4887Schin #define INFO_atime 1 140*4887Schin #define INFO_blocks 2 141*4887Schin #define INFO_ctime 3 142*4887Schin #define INFO_fstype 4 143*4887Schin #define INFO_gid 5 144*4887Schin #define INFO_mode 6 145*4887Schin #define INFO_mtime 7 146*4887Schin #define INFO_name 8 147*4887Schin #define INFO_nlink 9 148*4887Schin #define INFO_size 10 149*4887Schin #define INFO_uid 11 150*4887Schin 151*4887Schin #define _MAGIC_PRIVATE_ \ 152*4887Schin Magicdisc_t* disc; /* discipline */ \ 153*4887Schin Vmalloc_t* vm; /* vmalloc region */ \ 154*4887Schin Entry_t* magic; /* parsed magic table */ \ 155*4887Schin Entry_t* magiclast; /* last entry in magic */ \ 156*4887Schin char* mime; /* MIME type */ \ 157*4887Schin unsigned char* x2n; /* CC_ALIEN=>CC_NATIVE */ \ 158*4887Schin char fbuf[SF_BUFSIZE + 1]; /* file data */ \ 159*4887Schin char xbuf[SF_BUFSIZE + 1]; /* indirect file data */ \ 160*4887Schin char nbuf[256]; /* !CC_NATIVE data */ \ 161*4887Schin char mbuf[64]; /* mime string */ \ 162*4887Schin char sbuf[64]; /* type suffix string */ \ 163*4887Schin char tbuf[2 * PATH_MAX]; /* type string */ \ 164*4887Schin Cctype_t cctype[UCHAR_MAX + 1]; /* char code types */ \ 165*4887Schin unsigned int count[UCHAR_MAX + 1]; /* char frequency count */ \ 166*4887Schin unsigned int multi[UCHAR_MAX + 1]; /* muti char count */ \ 167*4887Schin int keep[MAXNEST]; /* ckmagic nest stack */ \ 168*4887Schin char* cap[MAXNEST]; /* ckmagic mime stack */ \ 169*4887Schin char* msg[MAXNEST]; /* ckmagic text stack */ \ 170*4887Schin Entry_t* ret[MAXNEST]; /* ckmagic return stack */ \ 171*4887Schin int fbsz; /* fbuf size */ \ 172*4887Schin int fbmx; /* fbuf max size */ \ 173*4887Schin int xbsz; /* xbuf size */ \ 174*4887Schin int swap; /* swap() operation */ \ 175*4887Schin unsigned long flags; /* disc+open flags */ \ 176*4887Schin long xoff; /* xbuf offset */ \ 177*4887Schin int identifier[ID_MAX + 1]; /* Info_t identifier */ \ 178*4887Schin Sfio_t* fp; /* fbuf fp */ \ 179*4887Schin Sfio_t* tmp; /* tmp string */ \ 180*4887Schin regdisc_t redisc; /* regex discipline */ \ 181*4887Schin Dtdisc_t dtdisc; /* dict discipline */ \ 182*4887Schin Dt_t* idtab; /* identifier dict */ \ 183*4887Schin Dt_t* infotab; /* info keyword dict */ 184*4887Schin 185*4887Schin #include <magic.h> 186*4887Schin 187*4887Schin static Info_t dict[] = /* keyword dictionary */ 188*4887Schin { 189*4887Schin { "COMMON", ID_FORTRAN }, 190*4887Schin { "COMPUTE", ID_COBOL }, 191*4887Schin { "COMP", ID_COPYBOOK }, 192*4887Schin { "COMPUTATIONAL",ID_COPYBOOK }, 193*4887Schin { "DCL", ID_PL1 }, 194*4887Schin { "DEFINED", ID_PL1 }, 195*4887Schin { "DIMENSION", ID_FORTRAN }, 196*4887Schin { "DIVISION", ID_COBOL }, 197*4887Schin { "FILLER", ID_COPYBOOK }, 198*4887Schin { "FIXED", ID_PL1 }, 199*4887Schin { "FUNCTION", ID_FORTRAN }, 200*4887Schin { "HTML", ID_HTML }, 201*4887Schin { "INTEGER", ID_FORTRAN }, 202*4887Schin { "MAIN", ID_PL1 }, 203*4887Schin { "OPTIONS", ID_PL1 }, 204*4887Schin { "PERFORM", ID_COBOL }, 205*4887Schin { "PIC", ID_COPYBOOK }, 206*4887Schin { "REAL", ID_FORTRAN }, 207*4887Schin { "REDEFINES", ID_COPYBOOK }, 208*4887Schin { "S9", ID_COPYBOOK }, 209*4887Schin { "SECTION", ID_COBOL }, 210*4887Schin { "SELECT", ID_COBOL }, 211*4887Schin { "SUBROUTINE", ID_FORTRAN }, 212*4887Schin { "TEXT", ID_ASM }, 213*4887Schin { "VALUE", ID_COPYBOOK }, 214*4887Schin { "attr", ID_MAM3 }, 215*4887Schin { "binary", ID_YACC }, 216*4887Schin { "block", ID_FORTRAN }, 217*4887Schin { "bss", ID_ASM }, 218*4887Schin { "byte", ID_ASM }, 219*4887Schin { "char", ID_C }, 220*4887Schin { "class", ID_CPLUSPLUS }, 221*4887Schin { "clr", ID_NOTEXT }, 222*4887Schin { "comm", ID_ASM }, 223*4887Schin { "common", ID_FORTRAN }, 224*4887Schin { "data", ID_ASM }, 225*4887Schin { "dimension", ID_FORTRAN }, 226*4887Schin { "done", ID_MAM2 }, 227*4887Schin { "double", ID_C }, 228*4887Schin { "even", ID_ASM }, 229*4887Schin { "exec", ID_MAM3 }, 230*4887Schin { "extern", ID_C }, 231*4887Schin { "float", ID_C }, 232*4887Schin { "function", ID_FORTRAN }, 233*4887Schin { "globl", ID_ASM }, 234*4887Schin { "h", ID_INCL3 }, 235*4887Schin { "html", ID_HTML }, 236*4887Schin { "include", ID_INCL1 }, 237*4887Schin { "int", ID_C }, 238*4887Schin { "integer", ID_FORTRAN }, 239*4887Schin { "jmp", ID_NOTEXT }, 240*4887Schin { "left", ID_YACC }, 241*4887Schin { "libc", ID_INCL2 }, 242*4887Schin { "long", ID_C }, 243*4887Schin { "make", ID_MAM1 }, 244*4887Schin { "mov", ID_NOTEXT }, 245*4887Schin { "private", ID_CPLUSPLUS }, 246*4887Schin { "public", ID_CPLUSPLUS }, 247*4887Schin { "real", ID_FORTRAN }, 248*4887Schin { "register", ID_C }, 249*4887Schin { "right", ID_YACC }, 250*4887Schin { "sfio", ID_INCL2 }, 251*4887Schin { "static", ID_C }, 252*4887Schin { "stdio", ID_INCL2 }, 253*4887Schin { "struct", ID_C }, 254*4887Schin { "subroutine", ID_FORTRAN }, 255*4887Schin { "sys", ID_NOTEXT }, 256*4887Schin { "term", ID_YACC }, 257*4887Schin { "text", ID_ASM }, 258*4887Schin { "tst", ID_NOTEXT }, 259*4887Schin { "type", ID_YACC }, 260*4887Schin { "typedef", ID_C }, 261*4887Schin { "u", ID_INCL2 }, 262*4887Schin { "union", ID_YACC }, 263*4887Schin { "void", ID_C }, 264*4887Schin }; 265*4887Schin 266*4887Schin static Info_t info[] = 267*4887Schin { 268*4887Schin { "atime", INFO_atime }, 269*4887Schin { "blocks", INFO_blocks }, 270*4887Schin { "ctime", INFO_ctime }, 271*4887Schin { "fstype", INFO_fstype }, 272*4887Schin { "gid", INFO_gid }, 273*4887Schin { "mode", INFO_mode }, 274*4887Schin { "mtime", INFO_mtime }, 275*4887Schin { "name", INFO_name }, 276*4887Schin { "nlink", INFO_nlink }, 277*4887Schin { "size", INFO_size }, 278*4887Schin { "uid", INFO_uid }, 279*4887Schin }; 280*4887Schin 281*4887Schin /* 282*4887Schin * return pointer to data at offset off and size siz 283*4887Schin */ 284*4887Schin 285*4887Schin static char* 286*4887Schin getdata(register Magic_t* mp, register long off, register int siz) 287*4887Schin { 288*4887Schin register long n; 289*4887Schin 290*4887Schin if (off < 0) 291*4887Schin return 0; 292*4887Schin if (off + siz <= mp->fbsz) 293*4887Schin return mp->fbuf + off; 294*4887Schin if (off < mp->xoff || off + siz > mp->xoff + mp->xbsz) 295*4887Schin { 296*4887Schin if (off + siz > mp->fbmx) 297*4887Schin return 0; 298*4887Schin n = (off / (SF_BUFSIZE / 2)) * (SF_BUFSIZE / 2); 299*4887Schin if (sfseek(mp->fp, n, SEEK_SET) != n) 300*4887Schin return 0; 301*4887Schin if ((mp->xbsz = sfread(mp->fp, mp->xbuf, sizeof(mp->xbuf) - 1)) < 0) 302*4887Schin { 303*4887Schin mp->xoff = 0; 304*4887Schin mp->xbsz = 0; 305*4887Schin return 0; 306*4887Schin } 307*4887Schin mp->xbuf[mp->xbsz] = 0; 308*4887Schin mp->xoff = n; 309*4887Schin if (off + siz > mp->xoff + mp->xbsz) 310*4887Schin return 0; 311*4887Schin } 312*4887Schin return mp->xbuf + off - mp->xoff; 313*4887Schin } 314*4887Schin 315*4887Schin /* 316*4887Schin * @... evaluator for strexpr() 317*4887Schin */ 318*4887Schin 319*4887Schin static long 320*4887Schin indirect(const char* cs, char** e, void* handle) 321*4887Schin { 322*4887Schin register char* s = (char*)cs; 323*4887Schin register Magic_t* mp = (Magic_t*)handle; 324*4887Schin register long n = 0; 325*4887Schin register char* p; 326*4887Schin 327*4887Schin if (s) 328*4887Schin { 329*4887Schin if (*s == '@') 330*4887Schin { 331*4887Schin n = *++s == '(' ? strexpr(s, e, indirect, mp) : strtol(s, e, 0); 332*4887Schin switch (*(s = *e)) 333*4887Schin { 334*4887Schin case 'b': 335*4887Schin case 'B': 336*4887Schin s++; 337*4887Schin if (p = getdata(mp, n, 1)) 338*4887Schin n = *(unsigned char*)p; 339*4887Schin else 340*4887Schin s = (char*)cs; 341*4887Schin break; 342*4887Schin case 'h': 343*4887Schin case 'H': 344*4887Schin s++; 345*4887Schin if (p = getdata(mp, n, 2)) 346*4887Schin n = swapget(mp->swap, p, 2); 347*4887Schin else 348*4887Schin s = (char*)cs; 349*4887Schin break; 350*4887Schin case 'q': 351*4887Schin case 'Q': 352*4887Schin s++; 353*4887Schin if (p = getdata(mp, n, 8)) 354*4887Schin n = swapget(mp->swap, p, 8); 355*4887Schin else 356*4887Schin s = (char*)cs; 357*4887Schin break; 358*4887Schin default: 359*4887Schin if (isalnum(*s)) 360*4887Schin s++; 361*4887Schin if (p = getdata(mp, n, 4)) 362*4887Schin n = swapget(mp->swap, p, 4); 363*4887Schin else 364*4887Schin s = (char*)cs; 365*4887Schin break; 366*4887Schin } 367*4887Schin } 368*4887Schin *e = s; 369*4887Schin } 370*4887Schin else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 371*4887Schin (*mp->disc->errorf)(mp, mp->disc, 2, "%s in indirect expression", *e); 372*4887Schin return n; 373*4887Schin } 374*4887Schin 375*4887Schin /* 376*4887Schin * emit regex error message 377*4887Schin */ 378*4887Schin 379*4887Schin static void 380*4887Schin regmessage(Magic_t* mp, regex_t* re, int code) 381*4887Schin { 382*4887Schin char buf[128]; 383*4887Schin 384*4887Schin if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 385*4887Schin { 386*4887Schin regerror(code, re, buf, sizeof(buf)); 387*4887Schin (*mp->disc->errorf)(mp, mp->disc, 3, "regex: %s", buf); 388*4887Schin } 389*4887Schin } 390*4887Schin 391*4887Schin /* 392*4887Schin * decompose vcodex(3) method composition 393*4887Schin */ 394*4887Schin 395*4887Schin static char* 396*4887Schin vcdecomp(char* b, char* e, unsigned char* m, unsigned char* x) 397*4887Schin { 398*4887Schin unsigned char* map; 399*4887Schin int c; 400*4887Schin int n; 401*4887Schin int i; 402*4887Schin 403*4887Schin map = CCMAP(CC_ASCII, CC_NATIVE); 404*4887Schin i = 1; 405*4887Schin for (;;) 406*4887Schin { 407*4887Schin if (i) 408*4887Schin i = 0; 409*4887Schin else 410*4887Schin *b++ = '^'; 411*4887Schin while (b < e && m < x && (c = *m++)) 412*4887Schin { 413*4887Schin if (map) 414*4887Schin c = map[c]; 415*4887Schin *b++ = c; 416*4887Schin } 417*4887Schin if (b >= e) 418*4887Schin break; 419*4887Schin n = 0; 420*4887Schin while (m < x) 421*4887Schin { 422*4887Schin n = (n<<7) | (*m & 0x7f); 423*4887Schin if (!(*m++ & 0x80)) 424*4887Schin break; 425*4887Schin } 426*4887Schin if (n >= (x - m)) 427*4887Schin break; 428*4887Schin m += n; 429*4887Schin } 430*4887Schin return b; 431*4887Schin } 432*4887Schin 433*4887Schin /* 434*4887Schin * check for magic table match in buf 435*4887Schin */ 436*4887Schin 437*4887Schin static char* 438*4887Schin ckmagic(register Magic_t* mp, const char* file, char* buf, struct stat* st, unsigned long off) 439*4887Schin { 440*4887Schin register Entry_t* ep; 441*4887Schin register char* p; 442*4887Schin register char* b; 443*4887Schin register int level = 0; 444*4887Schin int call = -1; 445*4887Schin int c; 446*4887Schin char* q; 447*4887Schin char* t; 448*4887Schin char* base = 0; 449*4887Schin unsigned long num; 450*4887Schin unsigned long mask; 451*4887Schin regmatch_t matches[10]; 452*4887Schin 453*4887Schin mp->swap = 0; 454*4887Schin b = mp->msg[0] = buf; 455*4887Schin mp->mime = mp->cap[0] = 0; 456*4887Schin mp->keep[0] = 0; 457*4887Schin for (ep = mp->magic; ep; ep = ep->next) 458*4887Schin { 459*4887Schin fun: 460*4887Schin if (ep->nest == '{') 461*4887Schin { 462*4887Schin if (++level >= MAXNEST) 463*4887Schin { 464*4887Schin call = -1; 465*4887Schin level = 0; 466*4887Schin mp->keep[0] = 0; 467*4887Schin b = mp->msg[0]; 468*4887Schin mp->mime = mp->cap[0]; 469*4887Schin continue; 470*4887Schin } 471*4887Schin mp->keep[level] = mp->keep[level - 1] != 0; 472*4887Schin mp->msg[level] = b; 473*4887Schin mp->cap[level] = mp->mime; 474*4887Schin } 475*4887Schin switch (ep->cont) 476*4887Schin { 477*4887Schin case '#': 478*4887Schin if (mp->keep[level] && b > buf) 479*4887Schin { 480*4887Schin *b = 0; 481*4887Schin return buf; 482*4887Schin } 483*4887Schin mp->swap = 0; 484*4887Schin b = mp->msg[0] = buf; 485*4887Schin mp->mime = mp->cap[0] = 0; 486*4887Schin if (ep->type == ' ') 487*4887Schin continue; 488*4887Schin break; 489*4887Schin case '$': 490*4887Schin if (mp->keep[level] && call < (MAXNEST - 1)) 491*4887Schin { 492*4887Schin mp->ret[++call] = ep; 493*4887Schin ep = ep->value.lab; 494*4887Schin goto fun; 495*4887Schin } 496*4887Schin continue; 497*4887Schin case ':': 498*4887Schin ep = mp->ret[call--]; 499*4887Schin if (ep->op == 'l') 500*4887Schin goto fun; 501*4887Schin continue; 502*4887Schin case '|': 503*4887Schin if (mp->keep[level] > 1) 504*4887Schin goto checknest; 505*4887Schin /*FALLTHROUGH*/ 506*4887Schin default: 507*4887Schin if (!mp->keep[level]) 508*4887Schin { 509*4887Schin b = mp->msg[level]; 510*4887Schin mp->mime = mp->cap[level]; 511*4887Schin goto checknest; 512*4887Schin } 513*4887Schin break; 514*4887Schin } 515*4887Schin if (!ep->expr) 516*4887Schin num = ep->offset + off; 517*4887Schin else 518*4887Schin switch (ep->offset) 519*4887Schin { 520*4887Schin case 0: 521*4887Schin num = strexpr(ep->expr, NiL, indirect, mp) + off; 522*4887Schin break; 523*4887Schin case INFO_atime: 524*4887Schin num = st->st_atime; 525*4887Schin ep->type = 'D'; 526*4887Schin break; 527*4887Schin case INFO_blocks: 528*4887Schin num = iblocks(st); 529*4887Schin ep->type = 'N'; 530*4887Schin break; 531*4887Schin case INFO_ctime: 532*4887Schin num = st->st_ctime; 533*4887Schin ep->type = 'D'; 534*4887Schin break; 535*4887Schin case INFO_fstype: 536*4887Schin p = fmtfs(st); 537*4887Schin ep->type = toupper(ep->type); 538*4887Schin break; 539*4887Schin case INFO_gid: 540*4887Schin if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') 541*4887Schin { 542*4887Schin p = fmtgid(st->st_gid); 543*4887Schin ep->type = toupper(ep->type); 544*4887Schin } 545*4887Schin else 546*4887Schin { 547*4887Schin num = st->st_gid; 548*4887Schin ep->type = 'N'; 549*4887Schin } 550*4887Schin break; 551*4887Schin case INFO_mode: 552*4887Schin if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') 553*4887Schin { 554*4887Schin p = fmtmode(st->st_mode, 0); 555*4887Schin ep->type = toupper(ep->type); 556*4887Schin } 557*4887Schin else 558*4887Schin { 559*4887Schin num = modex(st->st_mode); 560*4887Schin ep->type = 'N'; 561*4887Schin } 562*4887Schin break; 563*4887Schin case INFO_mtime: 564*4887Schin num = st->st_ctime; 565*4887Schin ep->type = 'D'; 566*4887Schin break; 567*4887Schin case INFO_name: 568*4887Schin if (!base) 569*4887Schin { 570*4887Schin if (base = strrchr(file, '/')) 571*4887Schin base++; 572*4887Schin else 573*4887Schin base = (char*)file; 574*4887Schin } 575*4887Schin p = base; 576*4887Schin ep->type = toupper(ep->type); 577*4887Schin break; 578*4887Schin case INFO_nlink: 579*4887Schin num = st->st_nlink; 580*4887Schin ep->type = 'N'; 581*4887Schin break; 582*4887Schin case INFO_size: 583*4887Schin num = st->st_size; 584*4887Schin ep->type = 'N'; 585*4887Schin break; 586*4887Schin case INFO_uid: 587*4887Schin if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') 588*4887Schin { 589*4887Schin p = fmtuid(st->st_uid); 590*4887Schin ep->type = toupper(ep->type); 591*4887Schin } 592*4887Schin else 593*4887Schin { 594*4887Schin num = st->st_uid; 595*4887Schin ep->type = 'N'; 596*4887Schin } 597*4887Schin break; 598*4887Schin } 599*4887Schin switch (ep->type) 600*4887Schin { 601*4887Schin 602*4887Schin case 'b': 603*4887Schin if (!(p = getdata(mp, num, 1))) 604*4887Schin goto next; 605*4887Schin num = *(unsigned char*)p; 606*4887Schin break; 607*4887Schin 608*4887Schin case 'h': 609*4887Schin if (!(p = getdata(mp, num, 2))) 610*4887Schin goto next; 611*4887Schin num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 2); 612*4887Schin break; 613*4887Schin 614*4887Schin case 'd': 615*4887Schin case 'l': 616*4887Schin case 'v': 617*4887Schin if (!(p = getdata(mp, num, 4))) 618*4887Schin goto next; 619*4887Schin num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 4); 620*4887Schin break; 621*4887Schin 622*4887Schin case 'q': 623*4887Schin if (!(p = getdata(mp, num, 8))) 624*4887Schin goto next; 625*4887Schin num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 8); 626*4887Schin break; 627*4887Schin 628*4887Schin case 'e': 629*4887Schin if (!(p = getdata(mp, num, 0))) 630*4887Schin goto next; 631*4887Schin /*FALLTHROUGH*/ 632*4887Schin case 'E': 633*4887Schin if (!ep->value.sub) 634*4887Schin goto next; 635*4887Schin if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches))) 636*4887Schin { 637*4887Schin c = mp->fbsz; 638*4887Schin if (c >= sizeof(mp->nbuf)) 639*4887Schin c = sizeof(mp->nbuf) - 1; 640*4887Schin p = (char*)memcpy(mp->nbuf, p, c); 641*4887Schin p[c] = 0; 642*4887Schin ccmapstr(mp->x2n, p, c); 643*4887Schin if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches))) 644*4887Schin { 645*4887Schin if (c != REG_NOMATCH) 646*4887Schin regmessage(mp, ep->value.sub, c); 647*4887Schin goto next; 648*4887Schin } 649*4887Schin } 650*4887Schin p = ep->value.sub->re_sub->re_buf; 651*4887Schin q = T(ep->desc); 652*4887Schin t = *q ? q : p; 653*4887Schin if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *t && *t != ',' && *t != '.' && *t != '\b') 654*4887Schin *b++ = ' '; 655*4887Schin b += sfsprintf(b, PATH_MAX - (b - buf), *q ? q : "%s", p + (*p == '\b')); 656*4887Schin if (ep->mime) 657*4887Schin mp->mime = ep->mime; 658*4887Schin goto checknest; 659*4887Schin 660*4887Schin case 's': 661*4887Schin if (!(p = getdata(mp, num, ep->mask))) 662*4887Schin goto next; 663*4887Schin goto checkstr; 664*4887Schin case 'm': 665*4887Schin if (!(p = getdata(mp, num, 0))) 666*4887Schin goto next; 667*4887Schin /*FALLTHROUGH*/ 668*4887Schin case 'M': 669*4887Schin case 'S': 670*4887Schin checkstr: 671*4887Schin for (;;) 672*4887Schin { 673*4887Schin if (*ep->value.str == '*' && !*(ep->value.str + 1) && isprint(*p)) 674*4887Schin break; 675*4887Schin if ((ep->type == 'm' || ep->type == 'M') ? strmatch(p, ep->value.str) : !memcmp(p, ep->value.str, ep->mask)) 676*4887Schin break; 677*4887Schin if (p == mp->nbuf || ep->mask >= sizeof(mp->nbuf)) 678*4887Schin goto next; 679*4887Schin p = (char*)memcpy(mp->nbuf, p, ep->mask); 680*4887Schin p[ep->mask] = 0; 681*4887Schin ccmapstr(mp->x2n, p, ep->mask); 682*4887Schin } 683*4887Schin q = T(ep->desc); 684*4887Schin if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b') 685*4887Schin *b++ = ' '; 686*4887Schin for (t = p; (c = *t) >= 0 && c <= 0177 && isprint(c) && c != '\n'; t++); 687*4887Schin *t = 0; 688*4887Schin b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), p); 689*4887Schin *t = c; 690*4887Schin if (ep->mime) 691*4887Schin mp->mime = ep->mime; 692*4887Schin goto checknest; 693*4887Schin 694*4887Schin } 695*4887Schin if (mask = ep->mask) 696*4887Schin num &= mask; 697*4887Schin switch (ep->op) 698*4887Schin { 699*4887Schin 700*4887Schin case '=': 701*4887Schin case '@': 702*4887Schin if (num == ep->value.num) 703*4887Schin break; 704*4887Schin if (ep->cont != '#') 705*4887Schin goto next; 706*4887Schin if (!mask) 707*4887Schin mask = ~mask; 708*4887Schin if (ep->type == 'h') 709*4887Schin { 710*4887Schin if ((num = swapget(mp->swap = 1, p, 2) & mask) == ep->value.num) 711*4887Schin { 712*4887Schin if (!(mp->swap & (mp->swap + 1))) 713*4887Schin mp->swap = 7; 714*4887Schin goto swapped; 715*4887Schin } 716*4887Schin } 717*4887Schin else if (ep->type == 'l') 718*4887Schin { 719*4887Schin for (c = 1; c < 4; c++) 720*4887Schin if ((num = swapget(mp->swap = c, p, 4) & mask) == ep->value.num) 721*4887Schin { 722*4887Schin if (!(mp->swap & (mp->swap + 1))) 723*4887Schin mp->swap = 7; 724*4887Schin goto swapped; 725*4887Schin } 726*4887Schin } 727*4887Schin else if (ep->type == 'q') 728*4887Schin { 729*4887Schin for (c = 1; c < 8; c++) 730*4887Schin if ((num = swapget(mp->swap = c, p, 8) & mask) == ep->value.num) 731*4887Schin goto swapped; 732*4887Schin } 733*4887Schin goto next; 734*4887Schin 735*4887Schin case '!': 736*4887Schin if (num != ep->value.num) 737*4887Schin break; 738*4887Schin goto next; 739*4887Schin 740*4887Schin case '^': 741*4887Schin if (num ^ ep->value.num) 742*4887Schin break; 743*4887Schin goto next; 744*4887Schin 745*4887Schin case '>': 746*4887Schin if (num > ep->value.num) 747*4887Schin break; 748*4887Schin goto next; 749*4887Schin 750*4887Schin case '<': 751*4887Schin if (num < ep->value.num) 752*4887Schin break; 753*4887Schin goto next; 754*4887Schin 755*4887Schin case 'l': 756*4887Schin if (num > 0 && mp->keep[level] && call < (MAXNEST - 1)) 757*4887Schin { 758*4887Schin if (!ep->value.loop->count) 759*4887Schin { 760*4887Schin ep->value.loop->count = num; 761*4887Schin ep->value.loop->offset = off; 762*4887Schin off = ep->value.loop->start; 763*4887Schin } 764*4887Schin else if (!--ep->value.loop->count) 765*4887Schin { 766*4887Schin off = ep->value.loop->offset; 767*4887Schin goto next; 768*4887Schin } 769*4887Schin else 770*4887Schin off += ep->value.loop->size; 771*4887Schin mp->ret[++call] = ep; 772*4887Schin ep = ep->value.loop->lab; 773*4887Schin goto fun; 774*4887Schin } 775*4887Schin goto next; 776*4887Schin 777*4887Schin case 'm': 778*4887Schin c = mp->swap; 779*4887Schin t = ckmagic(mp, file, b + (b > buf), st, num); 780*4887Schin mp->swap = c; 781*4887Schin if (!t) 782*4887Schin goto next; 783*4887Schin if (b > buf) 784*4887Schin *b = ' '; 785*4887Schin b += strlen(b); 786*4887Schin break; 787*4887Schin 788*4887Schin case 'r': 789*4887Schin #if _UWIN 790*4887Schin { 791*4887Schin char* e; 792*4887Schin Sfio_t* rp; 793*4887Schin Sfio_t* gp; 794*4887Schin 795*4887Schin if (!(t = strrchr(file, '.'))) 796*4887Schin goto next; 797*4887Schin sfprintf(mp->tmp, "/reg/classes_root/%s", t); 798*4887Schin if (!(t = sfstruse(mp->tmp)) || !(rp = sfopen(NiL, t, "r"))) 799*4887Schin goto next; 800*4887Schin *ep->desc = 0; 801*4887Schin *ep->mime = 0; 802*4887Schin gp = 0; 803*4887Schin while (t = sfgetr(rp, '\n', 1)) 804*4887Schin { 805*4887Schin if (strneq(t, "Content Type=", 13)) 806*4887Schin { 807*4887Schin ep->mime = vmnewof(mp->vm, ep->mime, char, sfvalue(rp), 0); 808*4887Schin strcpy(ep->mime, t + 13); 809*4887Schin if (gp) 810*4887Schin break; 811*4887Schin } 812*4887Schin else 813*4887Schin { 814*4887Schin sfprintf(mp->tmp, "/reg/classes_root/%s", t); 815*4887Schin if ((e = sfstruse(mp->tmp)) && (gp = sfopen(NiL, e, "r"))) 816*4887Schin { 817*4887Schin ep->desc = vmnewof(mp->vm, ep->desc, char, strlen(t), 1); 818*4887Schin strcpy(ep->desc, t); 819*4887Schin if (*ep->mime) 820*4887Schin break; 821*4887Schin } 822*4887Schin } 823*4887Schin } 824*4887Schin sfclose(rp); 825*4887Schin if (!gp) 826*4887Schin goto next; 827*4887Schin if (!*ep->mime) 828*4887Schin { 829*4887Schin t = T(ep->desc); 830*4887Schin if (!strncasecmp(t, "microsoft", 9)) 831*4887Schin t += 9; 832*4887Schin while (isspace(*t)) 833*4887Schin t++; 834*4887Schin e = "application/x-ms-"; 835*4887Schin ep->mime = vmnewof(mp->vm, ep->mime, char, strlen(t), strlen(e)); 836*4887Schin e = strcopy(ep->mime, e); 837*4887Schin while ((c = *t++) && c != '.' && c != ' ') 838*4887Schin *e++ = isupper(c) ? tolower(c) : c; 839*4887Schin *e = 0; 840*4887Schin } 841*4887Schin while (t = sfgetr(gp, '\n', 1)) 842*4887Schin if (*t && !streq(t, "\"\"")) 843*4887Schin { 844*4887Schin ep->desc = vmnewof(mp->vm, ep->desc, char, sfvalue(gp), 0); 845*4887Schin strcpy(ep->desc, t); 846*4887Schin break; 847*4887Schin } 848*4887Schin sfclose(gp); 849*4887Schin if (!*ep->desc) 850*4887Schin goto next; 851*4887Schin if (!t) 852*4887Schin for (t = T(ep->desc); *t; t++) 853*4887Schin if (*t == '.') 854*4887Schin *t = ' '; 855*4887Schin if (!mp->keep[level]) 856*4887Schin mp->keep[level] = 2; 857*4887Schin mp->mime = ep->mime; 858*4887Schin break; 859*4887Schin } 860*4887Schin #else 861*4887Schin if (ep->cont == '#' && !mp->keep[level]) 862*4887Schin mp->keep[level] = 1; 863*4887Schin goto next; 864*4887Schin #endif 865*4887Schin 866*4887Schin case 'v': 867*4887Schin if (!(p = getdata(mp, num, 4))) 868*4887Schin goto next; 869*4887Schin c = 0; 870*4887Schin do 871*4887Schin { 872*4887Schin num++; 873*4887Schin c = (c<<7) | (*p & 0x7f); 874*4887Schin } while (*p++ & 0x80); 875*4887Schin if (!(p = getdata(mp, num, c))) 876*4887Schin goto next; 877*4887Schin if (mp->keep[level]++ && b > buf && *(b - 1) != ' ') 878*4887Schin { 879*4887Schin *b++ = ','; 880*4887Schin *b++ = ' '; 881*4887Schin } 882*4887Schin b = vcdecomp(b, buf + PATH_MAX, (unsigned char*)p, (unsigned char*)p + c); 883*4887Schin goto checknest; 884*4887Schin 885*4887Schin } 886*4887Schin swapped: 887*4887Schin q = T(ep->desc); 888*4887Schin if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b') 889*4887Schin *b++ = ' '; 890*4887Schin if (ep->type == 'd' || ep->type == 'D') 891*4887Schin b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), fmttime("%?%l", (time_t)num)); 892*4887Schin else if (ep->type == 'v') 893*4887Schin b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), fmtversion(num)); 894*4887Schin else 895*4887Schin b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), num); 896*4887Schin if (ep->mime && *ep->mime) 897*4887Schin mp->mime = ep->mime; 898*4887Schin checknest: 899*4887Schin if (ep->nest == '}') 900*4887Schin { 901*4887Schin if (!mp->keep[level]) 902*4887Schin { 903*4887Schin b = mp->msg[level]; 904*4887Schin mp->mime = mp->cap[level]; 905*4887Schin } 906*4887Schin else if (level > 0) 907*4887Schin mp->keep[level - 1] = mp->keep[level]; 908*4887Schin if (--level < 0) 909*4887Schin { 910*4887Schin level = 0; 911*4887Schin mp->keep[0] = 0; 912*4887Schin } 913*4887Schin } 914*4887Schin continue; 915*4887Schin next: 916*4887Schin if (ep->cont == '&') 917*4887Schin mp->keep[level] = 0; 918*4887Schin goto checknest; 919*4887Schin } 920*4887Schin if (mp->keep[level] && b > buf) 921*4887Schin { 922*4887Schin *b = 0; 923*4887Schin return buf; 924*4887Schin } 925*4887Schin return 0; 926*4887Schin } 927*4887Schin 928*4887Schin /* 929*4887Schin * check english language stats 930*4887Schin */ 931*4887Schin 932*4887Schin static int 933*4887Schin ckenglish(register Magic_t* mp, int pun, int badpun) 934*4887Schin { 935*4887Schin register char* s; 936*4887Schin register int vowl = 0; 937*4887Schin register int freq = 0; 938*4887Schin register int rare = 0; 939*4887Schin 940*4887Schin if (5 * badpun > pun) 941*4887Schin return 0; 942*4887Schin if (2 * mp->count[';'] > mp->count['E'] + mp->count['e']) 943*4887Schin return 0; 944*4887Schin if ((mp->count['>'] + mp->count['<'] + mp->count['/']) > mp->count['E'] + mp->count['e']) 945*4887Schin return 0; 946*4887Schin for (s = "aeiou"; *s; s++) 947*4887Schin vowl += mp->count[toupper(*s)] + mp->count[*s]; 948*4887Schin for (s = "etaion"; *s; s++) 949*4887Schin freq += mp->count[toupper(*s)] + mp->count[*s]; 950*4887Schin for (s = "vjkqxz"; *s; s++) 951*4887Schin rare += mp->count[toupper(*s)] + mp->count[*s]; 952*4887Schin return 5 * vowl >= mp->fbsz - mp->count[' '] && freq >= 10 * rare; 953*4887Schin } 954*4887Schin 955*4887Schin /* 956*4887Schin * check programming language stats 957*4887Schin */ 958*4887Schin 959*4887Schin static char* 960*4887Schin cklang(register Magic_t* mp, const char* file, char* buf, struct stat* st) 961*4887Schin { 962*4887Schin register int c; 963*4887Schin register unsigned char* b; 964*4887Schin register unsigned char* e; 965*4887Schin register int q; 966*4887Schin register char* s; 967*4887Schin char* t; 968*4887Schin char* base; 969*4887Schin char* suff; 970*4887Schin char* t1; 971*4887Schin char* t2; 972*4887Schin char* t3; 973*4887Schin int n; 974*4887Schin int badpun; 975*4887Schin int code; 976*4887Schin int pun; 977*4887Schin Cctype_t flags; 978*4887Schin Info_t* ip; 979*4887Schin 980*4887Schin b = (unsigned char*)mp->fbuf; 981*4887Schin e = b + mp->fbsz; 982*4887Schin memzero(mp->count, sizeof(mp->count)); 983*4887Schin memzero(mp->multi, sizeof(mp->multi)); 984*4887Schin memzero(mp->identifier, sizeof(mp->identifier)); 985*4887Schin 986*4887Schin /* 987*4887Schin * check character coding 988*4887Schin */ 989*4887Schin 990*4887Schin flags = 0; 991*4887Schin while (b < e) 992*4887Schin flags |= mp->cctype[*b++]; 993*4887Schin b = (unsigned char*)mp->fbuf; 994*4887Schin code = 0; 995*4887Schin q = CC_ASCII; 996*4887Schin n = CC_MASK; 997*4887Schin for (c = 0; c < CC_MAPS; c++) 998*4887Schin { 999*4887Schin flags ^= CC_text; 1000*4887Schin if ((flags & CC_MASK) < n) 1001*4887Schin { 1002*4887Schin n = flags & CC_MASK; 1003*4887Schin q = c; 1004*4887Schin } 1005*4887Schin flags >>= CC_BIT; 1006*4887Schin } 1007*4887Schin flags = n; 1008*4887Schin if (!(flags & (CC_binary|CC_notext))) 1009*4887Schin { 1010*4887Schin if (q != CC_NATIVE) 1011*4887Schin { 1012*4887Schin code = q; 1013*4887Schin ccmaps(mp->fbuf, mp->fbsz, q, CC_NATIVE); 1014*4887Schin } 1015*4887Schin if (b[0] == '#' && b[1] == '!') 1016*4887Schin { 1017*4887Schin for (b += 2; b < e && isspace(*b); b++); 1018*4887Schin for (s = (char*)b; b < e && isprint(*b); b++); 1019*4887Schin c = *b; 1020*4887Schin *b = 0; 1021*4887Schin if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) || match(s, "/*bin*/*") || !access(s, F_OK)) 1022*4887Schin { 1023*4887Schin if (t = strrchr(s, '/')) 1024*4887Schin s = t + 1; 1025*4887Schin for (t = s; *t; t++) 1026*4887Schin if (isspace(*t)) 1027*4887Schin { 1028*4887Schin *t = 0; 1029*4887Schin break; 1030*4887Schin } 1031*4887Schin sfsprintf(mp->mbuf, sizeof(mp->mbuf), "application/x-%s", *s ? s : "sh"); 1032*4887Schin mp->mime = mp->mbuf; 1033*4887Schin if (match(s, "*sh")) 1034*4887Schin { 1035*4887Schin t1 = T("command"); 1036*4887Schin if (streq(s, "sh")) 1037*4887Schin *s = 0; 1038*4887Schin else 1039*4887Schin { 1040*4887Schin *b++ = ' '; 1041*4887Schin *b = 0; 1042*4887Schin } 1043*4887Schin } 1044*4887Schin else 1045*4887Schin { 1046*4887Schin t1 = T("interpreter"); 1047*4887Schin *b++ = ' '; 1048*4887Schin *b = 0; 1049*4887Schin } 1050*4887Schin sfsprintf(mp->sbuf, sizeof(mp->sbuf), T("%s%s script"), s, t1); 1051*4887Schin s = mp->sbuf; 1052*4887Schin goto qualify; 1053*4887Schin } 1054*4887Schin *b = c; 1055*4887Schin b = (unsigned char*)mp->fbuf; 1056*4887Schin } 1057*4887Schin badpun = 0; 1058*4887Schin pun = 0; 1059*4887Schin q = 0; 1060*4887Schin s = 0; 1061*4887Schin t = 0; 1062*4887Schin while (b < e) 1063*4887Schin { 1064*4887Schin c = *b++; 1065*4887Schin mp->count[c]++; 1066*4887Schin if (c == q && (q != '*' || *b == '/' && b++)) 1067*4887Schin { 1068*4887Schin mp->multi[q]++; 1069*4887Schin q = 0; 1070*4887Schin } 1071*4887Schin else if (c == '\\') 1072*4887Schin { 1073*4887Schin s = 0; 1074*4887Schin b++; 1075*4887Schin } 1076*4887Schin else if (!q) 1077*4887Schin { 1078*4887Schin if (isalpha(c) || c == '_') 1079*4887Schin { 1080*4887Schin if (!s) 1081*4887Schin s = (char*)b - 1; 1082*4887Schin } 1083*4887Schin else if (!isdigit(c)) 1084*4887Schin { 1085*4887Schin if (s) 1086*4887Schin { 1087*4887Schin if (s > mp->fbuf) 1088*4887Schin switch (*(s - 1)) 1089*4887Schin { 1090*4887Schin case ':': 1091*4887Schin if (*b == ':') 1092*4887Schin mp->multi[':']++; 1093*4887Schin break; 1094*4887Schin case '.': 1095*4887Schin if (((char*)b - s) == 3 && (s == (mp->fbuf + 1) || *(s - 2) == '\n')) 1096*4887Schin mp->multi['.']++; 1097*4887Schin break; 1098*4887Schin case '\n': 1099*4887Schin case '\\': 1100*4887Schin if (*b == '{') 1101*4887Schin t = (char*)b + 1; 1102*4887Schin break; 1103*4887Schin case '{': 1104*4887Schin if (s == t && *b == '}') 1105*4887Schin mp->multi['X']++; 1106*4887Schin break; 1107*4887Schin } 1108*4887Schin if (!mp->idtab) 1109*4887Schin { 1110*4887Schin if (mp->idtab = dtnew(mp->vm, &mp->dtdisc, Dthash)) 1111*4887Schin for (q = 0; q < elementsof(dict); q++) 1112*4887Schin dtinsert(mp->idtab, &dict[q]); 1113*4887Schin else if (mp->disc->errorf) 1114*4887Schin (*mp->disc->errorf)(mp, mp->disc, 3, "out of space"); 1115*4887Schin q = 0; 1116*4887Schin } 1117*4887Schin if (mp->idtab) 1118*4887Schin { 1119*4887Schin *(b - 1) = 0; 1120*4887Schin if (ip = (Info_t*)dtmatch(mp->idtab, s)) 1121*4887Schin mp->identifier[ip->value]++; 1122*4887Schin *(b - 1) = c; 1123*4887Schin } 1124*4887Schin s = 0; 1125*4887Schin } 1126*4887Schin switch (c) 1127*4887Schin { 1128*4887Schin case '\t': 1129*4887Schin if (b == (unsigned char*)(mp->fbuf + 1) || *(b - 2) == '\n') 1130*4887Schin mp->multi['\t']++; 1131*4887Schin break; 1132*4887Schin case '"': 1133*4887Schin case '\'': 1134*4887Schin q = c; 1135*4887Schin break; 1136*4887Schin case '/': 1137*4887Schin if (*b == '*') 1138*4887Schin q = *b++; 1139*4887Schin else if (*b == '/') 1140*4887Schin q = '\n'; 1141*4887Schin break; 1142*4887Schin case '$': 1143*4887Schin if (*b == '(' && *(b + 1) != ' ') 1144*4887Schin mp->multi['$']++; 1145*4887Schin break; 1146*4887Schin case '{': 1147*4887Schin case '}': 1148*4887Schin case '[': 1149*4887Schin case ']': 1150*4887Schin case '(': 1151*4887Schin mp->multi[c]++; 1152*4887Schin break; 1153*4887Schin case ')': 1154*4887Schin mp->multi[c]++; 1155*4887Schin goto punctuation; 1156*4887Schin case ':': 1157*4887Schin if (*b == ':' && isspace(*(b + 1)) && b > (unsigned char*)(mp->fbuf + 1) && isspace(*(b - 2))) 1158*4887Schin mp->multi[':']++; 1159*4887Schin goto punctuation; 1160*4887Schin case '.': 1161*4887Schin case ',': 1162*4887Schin case '%': 1163*4887Schin case ';': 1164*4887Schin case '?': 1165*4887Schin punctuation: 1166*4887Schin pun++; 1167*4887Schin if (*b != ' ' && *b != '\n') 1168*4887Schin badpun++; 1169*4887Schin break; 1170*4887Schin } 1171*4887Schin } 1172*4887Schin } 1173*4887Schin } 1174*4887Schin } 1175*4887Schin else 1176*4887Schin while (b < e) 1177*4887Schin mp->count[*b++]++; 1178*4887Schin base = (t1 = strrchr(file, '/')) ? t1 + 1 : (char*)file; 1179*4887Schin suff = (t1 = strrchr(base, '.')) ? t1 + 1 : ""; 1180*4887Schin if (!flags) 1181*4887Schin { 1182*4887Schin if (match(suff, "*sh|bat|cmd")) 1183*4887Schin goto id_sh; 1184*4887Schin if (match(base, "*@(mkfile)")) 1185*4887Schin goto id_mk; 1186*4887Schin if (match(base, "*@(makefile|.mk)")) 1187*4887Schin goto id_make; 1188*4887Schin if (match(base, "*@(mamfile|.mam)")) 1189*4887Schin goto id_mam; 1190*4887Schin if (match(suff, "[cly]?(pp|xx|++)|cc|ll|yy")) 1191*4887Schin goto id_c; 1192*4887Schin if (match(suff, "f")) 1193*4887Schin goto id_fortran; 1194*4887Schin if (match(suff, "htm+(l)")) 1195*4887Schin goto id_html; 1196*4887Schin if (match(suff, "cpy")) 1197*4887Schin goto id_copybook; 1198*4887Schin if (match(suff, "cob|cbl|cb2")) 1199*4887Schin goto id_cobol; 1200*4887Schin if (match(suff, "pl[1i]")) 1201*4887Schin goto id_pl1; 1202*4887Schin if (match(suff, "tex")) 1203*4887Schin goto id_tex; 1204*4887Schin if (match(suff, "asm|s")) 1205*4887Schin goto id_asm; 1206*4887Schin if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) && (!suff || suff != strchr(suff, '.'))) 1207*4887Schin { 1208*4887Schin id_sh: 1209*4887Schin s = T("command script"); 1210*4887Schin mp->mime = "application/sh"; 1211*4887Schin goto qualify; 1212*4887Schin } 1213*4887Schin if (strmatch(mp->fbuf, "From * [0-9][0-9]:[0-9][0-9]:[0-9][0-9] *")) 1214*4887Schin { 1215*4887Schin s = T("mail message"); 1216*4887Schin mp->mime = "message/rfc822"; 1217*4887Schin goto qualify; 1218*4887Schin } 1219*4887Schin if (match(base, "*@(mkfile)")) 1220*4887Schin { 1221*4887Schin id_mk: 1222*4887Schin s = "mkfile"; 1223*4887Schin mp->mime = "application/mk"; 1224*4887Schin goto qualify; 1225*4887Schin } 1226*4887Schin if (match(base, "*@(makefile|.mk)") || mp->multi['\t'] >= mp->count[':'] && (mp->multi['$'] > 0 || mp->multi[':'] > 0)) 1227*4887Schin { 1228*4887Schin id_make: 1229*4887Schin s = "makefile"; 1230*4887Schin mp->mime = "application/make"; 1231*4887Schin goto qualify; 1232*4887Schin } 1233*4887Schin if (mp->multi['.'] >= 3) 1234*4887Schin { 1235*4887Schin s = T("nroff input"); 1236*4887Schin mp->mime = "application/x-troff"; 1237*4887Schin goto qualify; 1238*4887Schin } 1239*4887Schin if (mp->multi['X'] >= 3) 1240*4887Schin { 1241*4887Schin s = T("TeX input"); 1242*4887Schin mp->mime = "application/x-tex"; 1243*4887Schin goto qualify; 1244*4887Schin } 1245*4887Schin if (mp->fbsz < SF_BUFSIZE && 1246*4887Schin (mp->multi['('] == mp->multi[')'] && 1247*4887Schin mp->multi['{'] == mp->multi['}'] && 1248*4887Schin mp->multi['['] == mp->multi[']']) || 1249*4887Schin mp->fbsz >= SF_BUFSIZE && 1250*4887Schin (mp->multi['('] >= mp->multi[')'] && 1251*4887Schin mp->multi['{'] >= mp->multi['}'] && 1252*4887Schin mp->multi['['] >= mp->multi[']'])) 1253*4887Schin { 1254*4887Schin c = mp->identifier[ID_INCL1]; 1255*4887Schin if (c >= 2 && mp->identifier[ID_INCL2] >= c && mp->identifier[ID_INCL3] >= c && mp->count['.'] >= c || 1256*4887Schin mp->identifier[ID_C] >= 5 && mp->count[';'] >= 5 || 1257*4887Schin mp->count['='] >= 20 && mp->count[';'] >= 20) 1258*4887Schin { 1259*4887Schin id_c: 1260*4887Schin t1 = ""; 1261*4887Schin t2 = "c "; 1262*4887Schin t3 = T("program"); 1263*4887Schin switch (*suff) 1264*4887Schin { 1265*4887Schin case 'c': 1266*4887Schin case 'C': 1267*4887Schin mp->mime = "application/x-cc"; 1268*4887Schin break; 1269*4887Schin case 'l': 1270*4887Schin case 'L': 1271*4887Schin t1 = "lex "; 1272*4887Schin mp->mime = "application/x-lex"; 1273*4887Schin break; 1274*4887Schin default: 1275*4887Schin t3 = T("header"); 1276*4887Schin if (mp->identifier[ID_YACC] < 5 || mp->count['%'] < 5) 1277*4887Schin { 1278*4887Schin mp->mime = "application/x-cc"; 1279*4887Schin break; 1280*4887Schin } 1281*4887Schin /*FALLTHROUGH*/ 1282*4887Schin case 'y': 1283*4887Schin case 'Y': 1284*4887Schin t1 = "yacc "; 1285*4887Schin mp->mime = "application/x-yacc"; 1286*4887Schin break; 1287*4887Schin } 1288*4887Schin if (mp->identifier[ID_CPLUSPLUS] >= 3) 1289*4887Schin { 1290*4887Schin t2 = "c++ "; 1291*4887Schin mp->mime = "application/x-c++"; 1292*4887Schin } 1293*4887Schin sfsprintf(mp->sbuf, sizeof(mp->sbuf), "%s%s%s", t1, t2, t3); 1294*4887Schin s = mp->sbuf; 1295*4887Schin goto qualify; 1296*4887Schin } 1297*4887Schin } 1298*4887Schin if (mp->identifier[ID_MAM1] >= 2 && mp->identifier[ID_MAM3] >= 2 && 1299*4887Schin (mp->fbsz < SF_BUFSIZE && mp->identifier[ID_MAM1] == mp->identifier[ID_MAM2] || 1300*4887Schin mp->fbsz >= SF_BUFSIZE && mp->identifier[ID_MAM1] >= mp->identifier[ID_MAM2])) 1301*4887Schin { 1302*4887Schin id_mam: 1303*4887Schin s = T("mam program"); 1304*4887Schin mp->mime = "application/x-mam"; 1305*4887Schin goto qualify; 1306*4887Schin } 1307*4887Schin if (mp->identifier[ID_FORTRAN] >= 8) 1308*4887Schin { 1309*4887Schin id_fortran: 1310*4887Schin s = T("fortran program"); 1311*4887Schin mp->mime = "application/x-fortran"; 1312*4887Schin goto qualify; 1313*4887Schin } 1314*4887Schin if (mp->identifier[ID_HTML] > 0 && mp->count['<'] >= 8 && (c = mp->count['<'] - mp->count['>']) >= -2 && c <= 2) 1315*4887Schin { 1316*4887Schin id_html: 1317*4887Schin s = T("html input"); 1318*4887Schin mp->mime = "text/html"; 1319*4887Schin goto qualify; 1320*4887Schin } 1321*4887Schin if (mp->identifier[ID_COPYBOOK] > 0 && mp->identifier[ID_COBOL] == 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2) 1322*4887Schin { 1323*4887Schin id_copybook: 1324*4887Schin s = T("cobol copybook"); 1325*4887Schin mp->mime = "application/x-cobol"; 1326*4887Schin goto qualify; 1327*4887Schin } 1328*4887Schin if (mp->identifier[ID_COBOL] > 0 && mp->identifier[ID_COPYBOOK] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2) 1329*4887Schin { 1330*4887Schin id_cobol: 1331*4887Schin s = T("cobol program"); 1332*4887Schin mp->mime = "application/x-cobol"; 1333*4887Schin goto qualify; 1334*4887Schin } 1335*4887Schin if (mp->identifier[ID_PL1] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2) 1336*4887Schin { 1337*4887Schin id_pl1: 1338*4887Schin s = T("pl1 program"); 1339*4887Schin mp->mime = "application/x-pl1"; 1340*4887Schin goto qualify; 1341*4887Schin } 1342*4887Schin if (mp->count['{'] >= 6 && (c = mp->count['{'] - mp->count['}']) >= -2 && c <= 2 && mp->count['\\'] >= mp->count['{']) 1343*4887Schin { 1344*4887Schin id_tex: 1345*4887Schin s = T("TeX input"); 1346*4887Schin mp->mime = "text/tex"; 1347*4887Schin goto qualify; 1348*4887Schin } 1349*4887Schin if (mp->identifier[ID_ASM] >= 4) 1350*4887Schin { 1351*4887Schin id_asm: 1352*4887Schin s = T("as program"); 1353*4887Schin mp->mime = "application/x-as"; 1354*4887Schin goto qualify; 1355*4887Schin } 1356*4887Schin if (ckenglish(mp, pun, badpun)) 1357*4887Schin { 1358*4887Schin s = T("english text"); 1359*4887Schin mp->mime = "text/plain"; 1360*4887Schin goto qualify; 1361*4887Schin } 1362*4887Schin } 1363*4887Schin else if (streq(base, "core")) 1364*4887Schin { 1365*4887Schin mp->mime = "x-system/core"; 1366*4887Schin return T("core dump"); 1367*4887Schin } 1368*4887Schin if (flags & (CC_binary|CC_notext)) 1369*4887Schin { 1370*4887Schin b = (unsigned char*)mp->fbuf; 1371*4887Schin e = b + mp->fbsz; 1372*4887Schin n = 0; 1373*4887Schin for (;;) 1374*4887Schin { 1375*4887Schin c = *b++; 1376*4887Schin q = 0; 1377*4887Schin while (c & 0x80) 1378*4887Schin { 1379*4887Schin c <<= 1; 1380*4887Schin q++; 1381*4887Schin } 1382*4887Schin switch (q) 1383*4887Schin { 1384*4887Schin case 4: 1385*4887Schin if (b < e && (*b++ & 0xc0) != 0x80) 1386*4887Schin break; 1387*4887Schin case 3: 1388*4887Schin if (b < e && (*b++ & 0xc0) != 0x80) 1389*4887Schin break; 1390*4887Schin case 2: 1391*4887Schin if (b < e && (*b++ & 0xc0) != 0x80) 1392*4887Schin break; 1393*4887Schin n = 1; 1394*4887Schin case 0: 1395*4887Schin if (b >= e) 1396*4887Schin { 1397*4887Schin if (n) 1398*4887Schin { 1399*4887Schin flags &= ~(CC_binary|CC_notext); 1400*4887Schin flags |= CC_utf_8; 1401*4887Schin } 1402*4887Schin break; 1403*4887Schin } 1404*4887Schin continue; 1405*4887Schin } 1406*4887Schin break; 1407*4887Schin } 1408*4887Schin } 1409*4887Schin if (flags & (CC_binary|CC_notext)) 1410*4887Schin { 1411*4887Schin unsigned long d = 0; 1412*4887Schin 1413*4887Schin if ((q = mp->fbsz / UCHAR_MAX) >= 2) 1414*4887Schin { 1415*4887Schin /* 1416*4887Schin * compression/encryption via standard deviation 1417*4887Schin */ 1418*4887Schin 1419*4887Schin 1420*4887Schin for (c = 0; c < UCHAR_MAX; c++) 1421*4887Schin { 1422*4887Schin pun = mp->count[c] - q; 1423*4887Schin d += pun * pun; 1424*4887Schin } 1425*4887Schin d /= mp->fbsz; 1426*4887Schin } 1427*4887Schin if (d <= 0) 1428*4887Schin s = T("binary"); 1429*4887Schin else if (d < 4) 1430*4887Schin s = T("encrypted"); 1431*4887Schin else if (d < 16) 1432*4887Schin s = T("packed"); 1433*4887Schin else if (d < 64) 1434*4887Schin s = T("compressed"); 1435*4887Schin else if (d < 256) 1436*4887Schin s = T("delta"); 1437*4887Schin else 1438*4887Schin s = T("data"); 1439*4887Schin mp->mime = "application/octet-stream"; 1440*4887Schin return s; 1441*4887Schin } 1442*4887Schin mp->mime = "text/plain"; 1443*4887Schin if (flags & CC_utf_8) 1444*4887Schin s = (flags & CC_control) ? T("utf-8 text with control characters") : T("utf-8 text"); 1445*4887Schin else if (flags & CC_latin) 1446*4887Schin s = (flags & CC_control) ? T("latin text with control characters") : T("latin text"); 1447*4887Schin else 1448*4887Schin s = (flags & CC_control) ? T("text with control characters") : T("text"); 1449*4887Schin qualify: 1450*4887Schin if (!flags && mp->count['\n'] >= mp->count['\r'] && mp->count['\n'] <= (mp->count['\r'] + 1) && mp->count['\r']) 1451*4887Schin { 1452*4887Schin t = "dos "; 1453*4887Schin mp->mime = "text/dos"; 1454*4887Schin } 1455*4887Schin else 1456*4887Schin t = ""; 1457*4887Schin if (code) 1458*4887Schin { 1459*4887Schin if (code == CC_ASCII) 1460*4887Schin sfsprintf(buf, PATH_MAX, "ascii %s%s", t, s); 1461*4887Schin else 1462*4887Schin { 1463*4887Schin sfsprintf(buf, PATH_MAX, "ebcdic%d %s%s", code - 1, t, s); 1464*4887Schin mp->mime = "text/ebcdic"; 1465*4887Schin } 1466*4887Schin s = buf; 1467*4887Schin } 1468*4887Schin else if (*t) 1469*4887Schin { 1470*4887Schin sfsprintf(buf, PATH_MAX, "%s%s", t, s); 1471*4887Schin s = buf; 1472*4887Schin } 1473*4887Schin return s; 1474*4887Schin } 1475*4887Schin 1476*4887Schin /* 1477*4887Schin * return the basic magic string for file,st in buf,size 1478*4887Schin */ 1479*4887Schin 1480*4887Schin static char* 1481*4887Schin type(register Magic_t* mp, const char* file, struct stat* st, char* buf, int size) 1482*4887Schin { 1483*4887Schin register char* s; 1484*4887Schin register char* t; 1485*4887Schin 1486*4887Schin mp->mime = 0; 1487*4887Schin if (!S_ISREG(st->st_mode)) 1488*4887Schin { 1489*4887Schin if (S_ISDIR(st->st_mode)) 1490*4887Schin { 1491*4887Schin mp->mime = "x-system/dir"; 1492*4887Schin return T("directory"); 1493*4887Schin } 1494*4887Schin if (S_ISLNK(st->st_mode)) 1495*4887Schin { 1496*4887Schin mp->mime = "x-system/lnk"; 1497*4887Schin s = buf; 1498*4887Schin s += sfsprintf(s, PATH_MAX, T("symbolic link to ")); 1499*4887Schin if (pathgetlink(file, s, size - (s - buf)) < 0) 1500*4887Schin return T("cannot read symbolic link text"); 1501*4887Schin return buf; 1502*4887Schin } 1503*4887Schin if (S_ISBLK(st->st_mode)) 1504*4887Schin { 1505*4887Schin mp->mime = "x-system/blk"; 1506*4887Schin sfsprintf(buf, PATH_MAX, T("block special (%s)"), fmtdev(st)); 1507*4887Schin return buf; 1508*4887Schin } 1509*4887Schin if (S_ISCHR(st->st_mode)) 1510*4887Schin { 1511*4887Schin mp->mime = "x-system/chr"; 1512*4887Schin sfsprintf(buf, PATH_MAX, T("character special (%s)"), fmtdev(st)); 1513*4887Schin return buf; 1514*4887Schin } 1515*4887Schin if (S_ISFIFO(st->st_mode)) 1516*4887Schin { 1517*4887Schin mp->mime = "x-system/fifo"; 1518*4887Schin return "fifo"; 1519*4887Schin } 1520*4887Schin #ifdef S_ISSOCK 1521*4887Schin if (S_ISSOCK(st->st_mode)) 1522*4887Schin { 1523*4887Schin mp->mime = "x-system/sock"; 1524*4887Schin return "socket"; 1525*4887Schin } 1526*4887Schin #endif 1527*4887Schin } 1528*4887Schin if (!(mp->fbmx = st->st_size)) 1529*4887Schin s = T("empty"); 1530*4887Schin else if (!mp->fp) 1531*4887Schin s = T("cannot read"); 1532*4887Schin else 1533*4887Schin { 1534*4887Schin mp->fbsz = sfread(mp->fp, mp->fbuf, sizeof(mp->fbuf) - 1); 1535*4887Schin if (mp->fbsz < 0) 1536*4887Schin s = fmterror(errno); 1537*4887Schin else if (mp->fbsz == 0) 1538*4887Schin s = T("empty"); 1539*4887Schin else 1540*4887Schin { 1541*4887Schin mp->fbuf[mp->fbsz] = 0; 1542*4887Schin mp->xoff = 0; 1543*4887Schin mp->xbsz = 0; 1544*4887Schin if (!(s = ckmagic(mp, file, buf, st, 0))) 1545*4887Schin s = cklang(mp, file, buf, st); 1546*4887Schin } 1547*4887Schin } 1548*4887Schin if (!mp->mime) 1549*4887Schin mp->mime = "application/unknown"; 1550*4887Schin else if ((t = strchr(mp->mime, '%')) && *(t + 1) == 's' && !*(t + 2)) 1551*4887Schin { 1552*4887Schin register char* b; 1553*4887Schin register char* be; 1554*4887Schin register char* m; 1555*4887Schin register char* me; 1556*4887Schin 1557*4887Schin b = mp->mime; 1558*4887Schin me = (m = mp->mime = mp->fbuf) + sizeof(mp->fbuf) - 1; 1559*4887Schin while (m < me && b < t) 1560*4887Schin *m++ = *b++; 1561*4887Schin b = t = s; 1562*4887Schin for (;;) 1563*4887Schin { 1564*4887Schin if (!(be = strchr(t, ' '))) 1565*4887Schin { 1566*4887Schin be = b + strlen(b); 1567*4887Schin break; 1568*4887Schin } 1569*4887Schin if (*(be - 1) == ',' || strneq(be + 1, "data", 4) || strneq(be + 1, "file", 4)) 1570*4887Schin break; 1571*4887Schin b = t; 1572*4887Schin t = be + 1; 1573*4887Schin } 1574*4887Schin while (m < me && b < be) 1575*4887Schin if ((*m++ = *b++) == ' ') 1576*4887Schin *(m - 1) = '-'; 1577*4887Schin *m = 0; 1578*4887Schin } 1579*4887Schin return s; 1580*4887Schin } 1581*4887Schin 1582*4887Schin /* 1583*4887Schin * low level for magicload() 1584*4887Schin */ 1585*4887Schin 1586*4887Schin static int 1587*4887Schin load(register Magic_t* mp, char* file, register Sfio_t* fp) 1588*4887Schin { 1589*4887Schin register Entry_t* ep; 1590*4887Schin register char* p; 1591*4887Schin register char* p2; 1592*4887Schin char* p3; 1593*4887Schin char* next; 1594*4887Schin int n; 1595*4887Schin int lge; 1596*4887Schin int lev; 1597*4887Schin int ent; 1598*4887Schin int old; 1599*4887Schin int cont; 1600*4887Schin Info_t* ip; 1601*4887Schin Entry_t* ret; 1602*4887Schin Entry_t* first; 1603*4887Schin Entry_t* last = 0; 1604*4887Schin Entry_t* fun['z' - 'a' + 1]; 1605*4887Schin 1606*4887Schin memzero(fun, sizeof(fun)); 1607*4887Schin cont = '$'; 1608*4887Schin ent = 0; 1609*4887Schin lev = 0; 1610*4887Schin old = 0; 1611*4887Schin ret = 0; 1612*4887Schin error_info.file = file; 1613*4887Schin error_info.line = 0; 1614*4887Schin first = ep = vmnewof(mp->vm, 0, Entry_t, 1, 0); 1615*4887Schin while (p = sfgetr(fp, '\n', 1)) 1616*4887Schin { 1617*4887Schin error_info.line++; 1618*4887Schin for (; isspace(*p); p++); 1619*4887Schin 1620*4887Schin /* 1621*4887Schin * nesting 1622*4887Schin */ 1623*4887Schin 1624*4887Schin switch (*p) 1625*4887Schin { 1626*4887Schin case 0: 1627*4887Schin case '#': 1628*4887Schin cont = '#'; 1629*4887Schin continue; 1630*4887Schin case '{': 1631*4887Schin if (++lev < MAXNEST) 1632*4887Schin ep->nest = *p; 1633*4887Schin else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 1634*4887Schin (*mp->disc->errorf)(mp, mp->disc, 1, "{ ... } operator nesting too deep -- %d max", MAXNEST); 1635*4887Schin continue; 1636*4887Schin case '}': 1637*4887Schin if (!last || lev <= 0) 1638*4887Schin { 1639*4887Schin if (mp->disc->errorf) 1640*4887Schin (*mp->disc->errorf)(mp, mp->disc, 2, "`%c': invalid nesting", *p); 1641*4887Schin } 1642*4887Schin else if (lev-- == ent) 1643*4887Schin { 1644*4887Schin ent = 0; 1645*4887Schin ep->cont = ':'; 1646*4887Schin ep->offset = ret->offset; 1647*4887Schin ep->nest = ' '; 1648*4887Schin ep->type = ' '; 1649*4887Schin ep->op = ' '; 1650*4887Schin ep->desc = "[RETURN]"; 1651*4887Schin last = ep; 1652*4887Schin ep = ret->next = vmnewof(mp->vm, 0, Entry_t, 1, 0); 1653*4887Schin ret = 0; 1654*4887Schin } 1655*4887Schin else 1656*4887Schin last->nest = *p; 1657*4887Schin continue; 1658*4887Schin default: 1659*4887Schin if (*(p + 1) == '{' || *(p + 1) == '(' && *p != '+' && *p != '>' && *p != '&' && *p != '|') 1660*4887Schin { 1661*4887Schin n = *p++; 1662*4887Schin if (n >= 'a' && n <= 'z') 1663*4887Schin n -= 'a'; 1664*4887Schin else 1665*4887Schin { 1666*4887Schin if (mp->disc->errorf) 1667*4887Schin (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n); 1668*4887Schin n = 0; 1669*4887Schin } 1670*4887Schin if (ret && mp->disc->errorf) 1671*4887Schin (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a'); 1672*4887Schin if (*p == '{') 1673*4887Schin { 1674*4887Schin ent = ++lev; 1675*4887Schin ret = ep; 1676*4887Schin ep->desc = "[FUNCTION]"; 1677*4887Schin } 1678*4887Schin else 1679*4887Schin { 1680*4887Schin if (*(p + 1) != ')' && mp->disc->errorf) 1681*4887Schin (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function call argument list", n + 'a'); 1682*4887Schin ep->desc = "[CALL]"; 1683*4887Schin } 1684*4887Schin ep->cont = cont; 1685*4887Schin ep->offset = n; 1686*4887Schin ep->nest = ' '; 1687*4887Schin ep->type = ' '; 1688*4887Schin ep->op = ' '; 1689*4887Schin last = ep; 1690*4887Schin ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0); 1691*4887Schin if (ret) 1692*4887Schin fun[n] = last->value.lab = ep; 1693*4887Schin else if (!(last->value.lab = fun[n]) && mp->disc->errorf) 1694*4887Schin (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a'); 1695*4887Schin continue; 1696*4887Schin } 1697*4887Schin if (!ep->nest) 1698*4887Schin ep->nest = (lev > 0 && lev != ent) ? ('0' + lev - !!ent) : ' '; 1699*4887Schin break; 1700*4887Schin } 1701*4887Schin 1702*4887Schin /* 1703*4887Schin * continuation 1704*4887Schin */ 1705*4887Schin 1706*4887Schin cont = '$'; 1707*4887Schin switch (*p) 1708*4887Schin { 1709*4887Schin case '>': 1710*4887Schin old = 1; 1711*4887Schin if (*(p + 1) == *p) 1712*4887Schin { 1713*4887Schin /* 1714*4887Schin * old style nesting push 1715*4887Schin */ 1716*4887Schin 1717*4887Schin p++; 1718*4887Schin old = 2; 1719*4887Schin if (!lev && last) 1720*4887Schin { 1721*4887Schin lev = 1; 1722*4887Schin last->nest = '{'; 1723*4887Schin if (last->cont == '>') 1724*4887Schin last->cont = '&'; 1725*4887Schin ep->nest = '1'; 1726*4887Schin } 1727*4887Schin } 1728*4887Schin /*FALLTHROUGH*/ 1729*4887Schin case '+': 1730*4887Schin case '&': 1731*4887Schin case '|': 1732*4887Schin ep->cont = *p++; 1733*4887Schin break; 1734*4887Schin default: 1735*4887Schin if ((mp->flags & MAGIC_VERBOSE) && !isalpha(*p) && mp->disc->errorf) 1736*4887Schin (*mp->disc->errorf)(mp, mp->disc, 1, "`%c': invalid line continuation operator", *p); 1737*4887Schin /*FALLTHROUGH*/ 1738*4887Schin case '*': 1739*4887Schin case '0': case '1': case '2': case '3': case '4': 1740*4887Schin case '5': case '6': case '7': case '8': case '9': 1741*4887Schin ep->cont = (lev > 0) ? '&' : '#'; 1742*4887Schin break; 1743*4887Schin } 1744*4887Schin switch (old) 1745*4887Schin { 1746*4887Schin case 1: 1747*4887Schin old = 0; 1748*4887Schin if (lev) 1749*4887Schin { 1750*4887Schin /* 1751*4887Schin * old style nesting pop 1752*4887Schin */ 1753*4887Schin 1754*4887Schin lev = 0; 1755*4887Schin if (last) 1756*4887Schin last->nest = '}'; 1757*4887Schin ep->nest = ' '; 1758*4887Schin if (ep->cont == '&') 1759*4887Schin ep->cont = '#'; 1760*4887Schin } 1761*4887Schin break; 1762*4887Schin case 2: 1763*4887Schin old = 1; 1764*4887Schin break; 1765*4887Schin } 1766*4887Schin if (isdigit(*p)) 1767*4887Schin { 1768*4887Schin /* 1769*4887Schin * absolute offset 1770*4887Schin */ 1771*4887Schin 1772*4887Schin ep->offset = strton(p, &next, NiL, 0); 1773*4887Schin p2 = next; 1774*4887Schin } 1775*4887Schin else 1776*4887Schin { 1777*4887Schin for (p2 = p; *p2 && !isspace(*p2); p2++); 1778*4887Schin if (!*p2) 1779*4887Schin { 1780*4887Schin if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 1781*4887Schin (*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p); 1782*4887Schin continue; 1783*4887Schin } 1784*4887Schin 1785*4887Schin /* 1786*4887Schin * offset expression 1787*4887Schin */ 1788*4887Schin 1789*4887Schin *p2++ = 0; 1790*4887Schin ep->expr = vmstrdup(mp->vm, p); 1791*4887Schin if (isalpha(*p)) 1792*4887Schin ep->offset = (ip = (Info_t*)dtmatch(mp->infotab, p)) ? ip->value : 0; 1793*4887Schin else if (*p == '(' && ep->cont == '>') 1794*4887Schin { 1795*4887Schin /* 1796*4887Schin * convert old style indirection to @ 1797*4887Schin */ 1798*4887Schin 1799*4887Schin p = ep->expr + 1; 1800*4887Schin for (;;) 1801*4887Schin { 1802*4887Schin switch (*p++) 1803*4887Schin { 1804*4887Schin case 0: 1805*4887Schin case '@': 1806*4887Schin case '(': 1807*4887Schin break; 1808*4887Schin case ')': 1809*4887Schin break; 1810*4887Schin default: 1811*4887Schin continue; 1812*4887Schin } 1813*4887Schin break; 1814*4887Schin } 1815*4887Schin if (*--p == ')') 1816*4887Schin { 1817*4887Schin *p = 0; 1818*4887Schin *ep->expr = '@'; 1819*4887Schin } 1820*4887Schin } 1821*4887Schin } 1822*4887Schin for (; isspace(*p2); p2++); 1823*4887Schin for (p = p2; *p2 && !isspace(*p2); p2++); 1824*4887Schin if (!*p2) 1825*4887Schin { 1826*4887Schin if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 1827*4887Schin (*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p); 1828*4887Schin continue; 1829*4887Schin } 1830*4887Schin *p2++ = 0; 1831*4887Schin 1832*4887Schin /* 1833*4887Schin * type 1834*4887Schin */ 1835*4887Schin 1836*4887Schin if ((*p == 'b' || *p == 'l') && *(p + 1) == 'e') 1837*4887Schin { 1838*4887Schin ep->swap = ~(*p == 'l' ? 7 : 0); 1839*4887Schin p += 2; 1840*4887Schin } 1841*4887Schin if (*p == 's') 1842*4887Schin { 1843*4887Schin if (*(p + 1) == 'h') 1844*4887Schin ep->type = 'h'; 1845*4887Schin else 1846*4887Schin ep->type = 's'; 1847*4887Schin } 1848*4887Schin else if (*p == 'a') 1849*4887Schin ep->type = 's'; 1850*4887Schin else 1851*4887Schin ep->type = *p; 1852*4887Schin if (p = strchr(p, '&')) 1853*4887Schin { 1854*4887Schin /* 1855*4887Schin * old style mask 1856*4887Schin */ 1857*4887Schin 1858*4887Schin ep->mask = strton(++p, NiL, NiL, 0); 1859*4887Schin } 1860*4887Schin for (; isspace(*p2); p2++); 1861*4887Schin if (ep->mask) 1862*4887Schin *--p2 = '='; 1863*4887Schin 1864*4887Schin /* 1865*4887Schin * comparison operation 1866*4887Schin */ 1867*4887Schin 1868*4887Schin p = p2; 1869*4887Schin if (p2 = strchr(p, '\t')) 1870*4887Schin *p2++ = 0; 1871*4887Schin else 1872*4887Schin { 1873*4887Schin int qe = 0; 1874*4887Schin int qn = 0; 1875*4887Schin 1876*4887Schin /* 1877*4887Schin * assume balanced {}[]()\\""'' field 1878*4887Schin */ 1879*4887Schin 1880*4887Schin for (p2 = p;;) 1881*4887Schin { 1882*4887Schin switch (n = *p2++) 1883*4887Schin { 1884*4887Schin case 0: 1885*4887Schin break; 1886*4887Schin case '{': 1887*4887Schin if (!qe) 1888*4887Schin qe = '}'; 1889*4887Schin if (qe == '}') 1890*4887Schin qn++; 1891*4887Schin continue; 1892*4887Schin case '(': 1893*4887Schin if (!qe) 1894*4887Schin qe = ')'; 1895*4887Schin if (qe == ')') 1896*4887Schin qn++; 1897*4887Schin continue; 1898*4887Schin case '[': 1899*4887Schin if (!qe) 1900*4887Schin qe = ']'; 1901*4887Schin if (qe == ']') 1902*4887Schin qn++; 1903*4887Schin continue; 1904*4887Schin case '}': 1905*4887Schin case ')': 1906*4887Schin case ']': 1907*4887Schin if (qe == n && qn > 0) 1908*4887Schin qn--; 1909*4887Schin continue; 1910*4887Schin case '"': 1911*4887Schin case '\'': 1912*4887Schin if (!qe) 1913*4887Schin qe = n; 1914*4887Schin else if (qe == n) 1915*4887Schin qe = 0; 1916*4887Schin continue; 1917*4887Schin case '\\': 1918*4887Schin if (*p2) 1919*4887Schin p2++; 1920*4887Schin continue; 1921*4887Schin default: 1922*4887Schin if (!qe && isspace(n)) 1923*4887Schin break; 1924*4887Schin continue; 1925*4887Schin } 1926*4887Schin if (n) 1927*4887Schin *(p2 - 1) = 0; 1928*4887Schin else 1929*4887Schin p2--; 1930*4887Schin break; 1931*4887Schin } 1932*4887Schin } 1933*4887Schin lge = 0; 1934*4887Schin if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') 1935*4887Schin ep->op = '='; 1936*4887Schin else 1937*4887Schin { 1938*4887Schin if (*p == '&') 1939*4887Schin { 1940*4887Schin ep->mask = strton(++p, &next, NiL, 0); 1941*4887Schin p = next; 1942*4887Schin } 1943*4887Schin switch (*p) 1944*4887Schin { 1945*4887Schin case '=': 1946*4887Schin case '>': 1947*4887Schin case '<': 1948*4887Schin case '*': 1949*4887Schin ep->op = *p++; 1950*4887Schin if (*p == '=') 1951*4887Schin { 1952*4887Schin p++; 1953*4887Schin switch (ep->op) 1954*4887Schin { 1955*4887Schin case '>': 1956*4887Schin lge = -1; 1957*4887Schin break; 1958*4887Schin case '<': 1959*4887Schin lge = 1; 1960*4887Schin break; 1961*4887Schin } 1962*4887Schin } 1963*4887Schin break; 1964*4887Schin case '!': 1965*4887Schin case '@': 1966*4887Schin ep->op = *p++; 1967*4887Schin if (*p == '=') 1968*4887Schin p++; 1969*4887Schin break; 1970*4887Schin case 'x': 1971*4887Schin p++; 1972*4887Schin ep->op = '*'; 1973*4887Schin break; 1974*4887Schin default: 1975*4887Schin ep->op = '='; 1976*4887Schin if (ep->mask) 1977*4887Schin ep->value.num = ep->mask; 1978*4887Schin break; 1979*4887Schin } 1980*4887Schin } 1981*4887Schin if (ep->op != '*' && !ep->value.num) 1982*4887Schin { 1983*4887Schin if (ep->type == 'e') 1984*4887Schin { 1985*4887Schin if (ep->value.sub = vmnewof(mp->vm, 0, regex_t, 1, 0)) 1986*4887Schin { 1987*4887Schin ep->value.sub->re_disc = &mp->redisc; 1988*4887Schin if (!(n = regcomp(ep->value.sub, p, REG_DELIMITED|REG_LENIENT|REG_NULL|REG_DISCIPLINE))) 1989*4887Schin { 1990*4887Schin p += ep->value.sub->re_npat; 1991*4887Schin if (!(n = regsubcomp(ep->value.sub, p, NiL, 0, 0))) 1992*4887Schin p += ep->value.sub->re_npat; 1993*4887Schin } 1994*4887Schin if (n) 1995*4887Schin { 1996*4887Schin regmessage(mp, ep->value.sub, n); 1997*4887Schin ep->value.sub = 0; 1998*4887Schin } 1999*4887Schin else if (*p && mp->disc->errorf) 2000*4887Schin (*mp->disc->errorf)(mp, mp->disc, 1, "invalid characters after substitution: %s", p); 2001*4887Schin } 2002*4887Schin } 2003*4887Schin else if (ep->type == 'm') 2004*4887Schin { 2005*4887Schin ep->mask = stresc(p) + 1; 2006*4887Schin ep->value.str = vmnewof(mp->vm, 0, char, ep->mask + 1, 0); 2007*4887Schin memcpy(ep->value.str, p, ep->mask); 2008*4887Schin if ((!ep->expr || !ep->offset) && !strmatch(ep->value.str, "\\!\\(*\\)")) 2009*4887Schin ep->value.str[ep->mask - 1] = '*'; 2010*4887Schin } 2011*4887Schin else if (ep->type == 's') 2012*4887Schin { 2013*4887Schin ep->mask = stresc(p); 2014*4887Schin ep->value.str = vmnewof(mp->vm, 0, char, ep->mask, 0); 2015*4887Schin memcpy(ep->value.str, p, ep->mask); 2016*4887Schin } 2017*4887Schin else if (*p == '\'') 2018*4887Schin { 2019*4887Schin stresc(p); 2020*4887Schin ep->value.num = *(unsigned char*)(p + 1) + lge; 2021*4887Schin } 2022*4887Schin else if (strmatch(p, "+([a-z])\\(*\\)")) 2023*4887Schin { 2024*4887Schin char* t; 2025*4887Schin 2026*4887Schin t = p; 2027*4887Schin ep->type = 'V'; 2028*4887Schin ep->op = *p; 2029*4887Schin while (*p && *p++ != '('); 2030*4887Schin switch (ep->op) 2031*4887Schin { 2032*4887Schin case 'l': 2033*4887Schin n = *p++; 2034*4887Schin if (n < 'a' || n > 'z') 2035*4887Schin { 2036*4887Schin if (mp->disc->errorf) 2037*4887Schin (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n); 2038*4887Schin } 2039*4887Schin else if (!fun[n -= 'a']) 2040*4887Schin { 2041*4887Schin if (mp->disc->errorf) 2042*4887Schin (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a'); 2043*4887Schin } 2044*4887Schin else 2045*4887Schin { 2046*4887Schin ep->value.loop = vmnewof(mp->vm, 0, Loop_t, 1, 0); 2047*4887Schin ep->value.loop->lab = fun[n]; 2048*4887Schin while (*p && *p++ != ','); 2049*4887Schin ep->value.loop->start = strton(p, &t, NiL, 0); 2050*4887Schin while (*t && *t++ != ','); 2051*4887Schin ep->value.loop->size = strton(t, &t, NiL, 0); 2052*4887Schin } 2053*4887Schin break; 2054*4887Schin case 'm': 2055*4887Schin case 'r': 2056*4887Schin ep->desc = vmnewof(mp->vm, 0, char, 32, 0); 2057*4887Schin ep->mime = vmnewof(mp->vm, 0, char, 32, 0); 2058*4887Schin break; 2059*4887Schin case 'v': 2060*4887Schin break; 2061*4887Schin default: 2062*4887Schin if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 2063*4887Schin (*mp->disc->errorf)(mp, mp->disc, 1, "%-.*s: unknown function", p - t, t); 2064*4887Schin break; 2065*4887Schin } 2066*4887Schin } 2067*4887Schin else 2068*4887Schin { 2069*4887Schin ep->value.num = strton(p, NiL, NiL, 0) + lge; 2070*4887Schin if (ep->op == '@') 2071*4887Schin ep->value.num = swapget(0, (char*)&ep->value.num, sizeof(ep->value.num)); 2072*4887Schin } 2073*4887Schin } 2074*4887Schin 2075*4887Schin /* 2076*4887Schin * file description 2077*4887Schin */ 2078*4887Schin 2079*4887Schin if (p2) 2080*4887Schin { 2081*4887Schin for (; isspace(*p2); p2++); 2082*4887Schin if (p = strchr(p2, '\t')) 2083*4887Schin { 2084*4887Schin /* 2085*4887Schin * check for message catalog index 2086*4887Schin */ 2087*4887Schin 2088*4887Schin *p++ = 0; 2089*4887Schin if (isalpha(*p2)) 2090*4887Schin { 2091*4887Schin for (p3 = p2; isalnum(*p3); p3++); 2092*4887Schin if (*p3++ == ':') 2093*4887Schin { 2094*4887Schin for (; isdigit(*p3); p3++); 2095*4887Schin if (!*p3) 2096*4887Schin { 2097*4887Schin for (p2 = p; isspace(*p2); p2++); 2098*4887Schin if (p = strchr(p2, '\t')) 2099*4887Schin *p++ = 0; 2100*4887Schin } 2101*4887Schin } 2102*4887Schin } 2103*4887Schin } 2104*4887Schin stresc(p2); 2105*4887Schin ep->desc = vmstrdup(mp->vm, p2); 2106*4887Schin if (p) 2107*4887Schin { 2108*4887Schin for (; isspace(*p); p++); 2109*4887Schin if (*p) 2110*4887Schin ep->mime = vmstrdup(mp->vm, p); 2111*4887Schin } 2112*4887Schin } 2113*4887Schin else 2114*4887Schin ep->desc = ""; 2115*4887Schin 2116*4887Schin /* 2117*4887Schin * get next entry 2118*4887Schin */ 2119*4887Schin 2120*4887Schin last = ep; 2121*4887Schin ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0); 2122*4887Schin } 2123*4887Schin if (last) 2124*4887Schin { 2125*4887Schin last->next = 0; 2126*4887Schin if (mp->magiclast) 2127*4887Schin mp->magiclast->next = first; 2128*4887Schin else 2129*4887Schin mp->magic = first; 2130*4887Schin mp->magiclast = last; 2131*4887Schin } 2132*4887Schin vmfree(mp->vm, ep); 2133*4887Schin if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 2134*4887Schin { 2135*4887Schin if (lev < 0) 2136*4887Schin (*mp->disc->errorf)(mp, mp->disc, 1, "too many } operators"); 2137*4887Schin else if (lev > 0) 2138*4887Schin (*mp->disc->errorf)(mp, mp->disc, 1, "not enough } operators"); 2139*4887Schin if (ret) 2140*4887Schin (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a'); 2141*4887Schin } 2142*4887Schin error_info.file = 0; 2143*4887Schin error_info.line = 0; 2144*4887Schin return 0; 2145*4887Schin } 2146*4887Schin 2147*4887Schin /* 2148*4887Schin * load a magic file into mp 2149*4887Schin */ 2150*4887Schin 2151*4887Schin int 2152*4887Schin magicload(register Magic_t* mp, const char* file, unsigned long flags) 2153*4887Schin { 2154*4887Schin register char* s; 2155*4887Schin register char* e; 2156*4887Schin register char* t; 2157*4887Schin int n; 2158*4887Schin int found; 2159*4887Schin int list; 2160*4887Schin Sfio_t* fp; 2161*4887Schin 2162*4887Schin mp->flags = mp->disc->flags | flags; 2163*4887Schin found = 0; 2164*4887Schin if (list = !(s = (char*)file) || !*s || (*s == '-' || *s == '.') && !*(s + 1)) 2165*4887Schin { 2166*4887Schin if (!(s = getenv(MAGIC_FILE_ENV)) || !*s) 2167*4887Schin s = MAGIC_FILE; 2168*4887Schin } 2169*4887Schin for (;;) 2170*4887Schin { 2171*4887Schin if (!list) 2172*4887Schin e = 0; 2173*4887Schin else if (e = strchr(s, ':')) 2174*4887Schin { 2175*4887Schin /* 2176*4887Schin * ok, so ~ won't work for the last list element 2177*4887Schin * we do it for MAGIC_FILES_ENV anyway 2178*4887Schin */ 2179*4887Schin 2180*4887Schin if ((strneq(s, "~/", n = 2) || strneq(s, "$HOME/", n = 6) || strneq(s, "${HOME}/", n = 8)) && (t = getenv("HOME"))) 2181*4887Schin { 2182*4887Schin sfputr(mp->tmp, t, -1); 2183*4887Schin s += n - 1; 2184*4887Schin } 2185*4887Schin sfwrite(mp->tmp, s, e - s); 2186*4887Schin if (!(s = sfstruse(mp->tmp))) 2187*4887Schin goto nospace; 2188*4887Schin } 2189*4887Schin if (!*s || streq(s, "-")) 2190*4887Schin s = MAGIC_FILE; 2191*4887Schin if (!(fp = sfopen(NiL, s, "r"))) 2192*4887Schin { 2193*4887Schin if (list) 2194*4887Schin { 2195*4887Schin if (!(t = pathpath(mp->fbuf, s, "", PATH_REGULAR|PATH_READ)) && !strchr(s, '/')) 2196*4887Schin { 2197*4887Schin strcpy(mp->fbuf, s); 2198*4887Schin sfprintf(mp->tmp, "%s/%s", MAGIC_DIR, mp->fbuf); 2199*4887Schin if (!(s = sfstruse(mp->tmp))) 2200*4887Schin goto nospace; 2201*4887Schin if (!(t = pathpath(mp->fbuf, s, "", PATH_REGULAR|PATH_READ))) 2202*4887Schin goto next; 2203*4887Schin } 2204*4887Schin if (!(fp = sfopen(NiL, t, "r"))) 2205*4887Schin goto next; 2206*4887Schin } 2207*4887Schin else 2208*4887Schin { 2209*4887Schin if (mp->disc->errorf) 2210*4887Schin (*mp->disc->errorf)(mp, mp->disc, 3, "%s: cannot open magic file", s); 2211*4887Schin return -1; 2212*4887Schin } 2213*4887Schin } 2214*4887Schin found = 1; 2215*4887Schin n = load(mp, s, fp); 2216*4887Schin sfclose(fp); 2217*4887Schin if (n && !list) 2218*4887Schin return -1; 2219*4887Schin next: 2220*4887Schin if (!e) 2221*4887Schin break; 2222*4887Schin s = e + 1; 2223*4887Schin } 2224*4887Schin if (!found) 2225*4887Schin { 2226*4887Schin if (mp->flags & MAGIC_VERBOSE) 2227*4887Schin { 2228*4887Schin if (mp->disc->errorf) 2229*4887Schin (*mp->disc->errorf)(mp, mp->disc, 2, "cannot find magic file"); 2230*4887Schin } 2231*4887Schin return -1; 2232*4887Schin } 2233*4887Schin return 0; 2234*4887Schin nospace: 2235*4887Schin if (mp->disc->errorf) 2236*4887Schin (*mp->disc->errorf)(mp, mp->disc, 3, "out of space"); 2237*4887Schin return -1; 2238*4887Schin } 2239*4887Schin 2240*4887Schin /* 2241*4887Schin * open a magic session 2242*4887Schin */ 2243*4887Schin 2244*4887Schin Magic_t* 2245*4887Schin magicopen(Magicdisc_t* disc) 2246*4887Schin { 2247*4887Schin register Magic_t* mp; 2248*4887Schin register int i; 2249*4887Schin register int n; 2250*4887Schin register int f; 2251*4887Schin register int c; 2252*4887Schin register Vmalloc_t* vm; 2253*4887Schin unsigned char* map[CC_MAPS + 1]; 2254*4887Schin 2255*4887Schin if (!(vm = vmopen(Vmdcheap, Vmbest, 0))) 2256*4887Schin return 0; 2257*4887Schin if (!(mp = vmnewof(vm, 0, Magic_t, 1, 0))) 2258*4887Schin { 2259*4887Schin vmclose(vm); 2260*4887Schin return 0; 2261*4887Schin } 2262*4887Schin mp->id = lib; 2263*4887Schin mp->disc = disc; 2264*4887Schin mp->vm = vm; 2265*4887Schin mp->flags = disc->flags; 2266*4887Schin mp->redisc.re_version = REG_VERSION; 2267*4887Schin mp->redisc.re_flags = REG_NOFREE; 2268*4887Schin mp->redisc.re_errorf = (regerror_t)disc->errorf; 2269*4887Schin mp->redisc.re_resizef = (regresize_t)vmgetmem; 2270*4887Schin mp->redisc.re_resizehandle = (void*)mp->vm; 2271*4887Schin mp->dtdisc.key = offsetof(Info_t, name); 2272*4887Schin mp->dtdisc.link = offsetof(Info_t, link); 2273*4887Schin if (!(mp->tmp = sfstropen()) || !(mp->infotab = dtnew(mp->vm, &mp->dtdisc, Dthash))) 2274*4887Schin goto bad; 2275*4887Schin for (n = 0; n < elementsof(info); n++) 2276*4887Schin dtinsert(mp->infotab, &info[n]); 2277*4887Schin for (i = 0; i < CC_MAPS; i++) 2278*4887Schin map[i] = ccmap(i, CC_ASCII); 2279*4887Schin mp->x2n = ccmap(CC_ALIEN, CC_NATIVE); 2280*4887Schin for (n = 0; n <= UCHAR_MAX; n++) 2281*4887Schin { 2282*4887Schin f = 0; 2283*4887Schin i = CC_MAPS; 2284*4887Schin while (--i >= 0) 2285*4887Schin { 2286*4887Schin c = ccmapchr(map[i], n); 2287*4887Schin f = (f << CC_BIT) | CCTYPE(c); 2288*4887Schin } 2289*4887Schin mp->cctype[n] = f; 2290*4887Schin } 2291*4887Schin return mp; 2292*4887Schin bad: 2293*4887Schin magicclose(mp); 2294*4887Schin return 0; 2295*4887Schin } 2296*4887Schin 2297*4887Schin /* 2298*4887Schin * close a magicopen() session 2299*4887Schin */ 2300*4887Schin 2301*4887Schin int 2302*4887Schin magicclose(register Magic_t* mp) 2303*4887Schin { 2304*4887Schin if (!mp) 2305*4887Schin return -1; 2306*4887Schin if (mp->tmp) 2307*4887Schin sfstrclose(mp->tmp); 2308*4887Schin if (mp->vm) 2309*4887Schin vmclose(mp->vm); 2310*4887Schin return 0; 2311*4887Schin } 2312*4887Schin 2313*4887Schin /* 2314*4887Schin * return the magic string for file with optional stat info st 2315*4887Schin */ 2316*4887Schin 2317*4887Schin char* 2318*4887Schin magictype(register Magic_t* mp, Sfio_t* fp, const char* file, register struct stat* st) 2319*4887Schin { 2320*4887Schin off_t off; 2321*4887Schin char* s; 2322*4887Schin 2323*4887Schin mp->flags = mp->disc->flags; 2324*4887Schin mp->mime = 0; 2325*4887Schin if (!st) 2326*4887Schin s = T("cannot stat"); 2327*4887Schin else 2328*4887Schin { 2329*4887Schin if (mp->fp = fp) 2330*4887Schin off = sfseek(mp->fp, (off_t)0, SEEK_CUR); 2331*4887Schin s = type(mp, file, st, mp->tbuf, sizeof(mp->tbuf)); 2332*4887Schin if (mp->fp) 2333*4887Schin sfseek(mp->fp, off, SEEK_SET); 2334*4887Schin if (!(mp->flags & MAGIC_MIME)) 2335*4887Schin { 2336*4887Schin if (S_ISREG(st->st_mode) && (st->st_size > 0) && (st->st_size < 128)) 2337*4887Schin sfprintf(mp->tmp, "%s ", T("short")); 2338*4887Schin sfprintf(mp->tmp, "%s", s); 2339*4887Schin if (!mp->fp && (st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH))) 2340*4887Schin sfprintf(mp->tmp, ", %s", S_ISDIR(st->st_mode) ? T("searchable") : T("executable")); 2341*4887Schin if (st->st_mode & S_ISUID) 2342*4887Schin sfprintf(mp->tmp, ", setuid=%s", fmtuid(st->st_uid)); 2343*4887Schin if (st->st_mode & S_ISGID) 2344*4887Schin sfprintf(mp->tmp, ", setgid=%s", fmtgid(st->st_gid)); 2345*4887Schin if (st->st_mode & S_ISVTX) 2346*4887Schin sfprintf(mp->tmp, ", sticky"); 2347*4887Schin if (!(s = sfstruse(mp->tmp))) 2348*4887Schin s = T("out of space"); 2349*4887Schin } 2350*4887Schin } 2351*4887Schin if (mp->flags & MAGIC_MIME) 2352*4887Schin s = mp->mime; 2353*4887Schin if (!s) 2354*4887Schin s = T("error"); 2355*4887Schin return s; 2356*4887Schin } 2357*4887Schin 2358*4887Schin /* 2359*4887Schin * list the magic table in mp on sp 2360*4887Schin */ 2361*4887Schin 2362*4887Schin int 2363*4887Schin magiclist(register Magic_t* mp, register Sfio_t* sp) 2364*4887Schin { 2365*4887Schin register Entry_t* ep = mp->magic; 2366*4887Schin register Entry_t* rp = 0; 2367*4887Schin 2368*4887Schin mp->flags = mp->disc->flags; 2369*4887Schin sfprintf(sp, "cont\toffset\ttype\top\tmask\tvalue\tmime\tdesc\n"); 2370*4887Schin while (ep) 2371*4887Schin { 2372*4887Schin sfprintf(sp, "%c %c\t", ep->cont, ep->nest); 2373*4887Schin if (ep->expr) 2374*4887Schin sfprintf(sp, "%s", ep->expr); 2375*4887Schin else 2376*4887Schin sfprintf(sp, "%ld", ep->offset); 2377*4887Schin sfprintf(sp, "\t%s%c\t%c\t%lo\t", ep->swap == (char)~3 ? "L" : ep->swap == (char)~0 ? "B" : "", ep->type, ep->op, ep->mask); 2378*4887Schin switch (ep->type) 2379*4887Schin { 2380*4887Schin case 'm': 2381*4887Schin case 's': 2382*4887Schin sfputr(sp, fmtesc(ep->value.str), -1); 2383*4887Schin break; 2384*4887Schin case 'V': 2385*4887Schin switch (ep->op) 2386*4887Schin { 2387*4887Schin case 'l': 2388*4887Schin sfprintf(sp, "loop(%d,%d,%d,%d)", ep->value.loop->start, ep->value.loop->size, ep->value.loop->count, ep->value.loop->offset); 2389*4887Schin break; 2390*4887Schin case 'v': 2391*4887Schin sfprintf(sp, "vcodex()"); 2392*4887Schin break; 2393*4887Schin default: 2394*4887Schin sfprintf(sp, "%p", ep->value.str); 2395*4887Schin break; 2396*4887Schin } 2397*4887Schin break; 2398*4887Schin default: 2399*4887Schin sfprintf(sp, "%lo", ep->value.num); 2400*4887Schin break; 2401*4887Schin } 2402*4887Schin sfprintf(sp, "\t%s\t%s\n", ep->mime ? ep->mime : "", fmtesc(ep->desc)); 2403*4887Schin if (ep->cont == '$' && !ep->value.lab->mask) 2404*4887Schin { 2405*4887Schin rp = ep; 2406*4887Schin ep = ep->value.lab; 2407*4887Schin } 2408*4887Schin else 2409*4887Schin { 2410*4887Schin if (ep->cont == ':') 2411*4887Schin { 2412*4887Schin ep = rp; 2413*4887Schin ep->value.lab->mask = 1; 2414*4887Schin } 2415*4887Schin ep = ep->next; 2416*4887Schin } 2417*4887Schin } 2418*4887Schin return 0; 2419*4887Schin } 2420