1bd389b36SDavid du Colombier #include <u.h> 2bd389b36SDavid du Colombier #include <libc.h> 3bd389b36SDavid du Colombier #include <bio.h> 4bd389b36SDavid du Colombier #include <ctype.h> 5bd389b36SDavid du Colombier #include <mach.h> 63e12c5d1SDavid du Colombier 73e12c5d1SDavid du Colombier /* 83e12c5d1SDavid du Colombier * file - determine type of file 93e12c5d1SDavid du Colombier */ 103e12c5d1SDavid du Colombier #define LENDIAN(p) ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24)) 113e12c5d1SDavid du Colombier 129a747e4fSDavid du Colombier uchar buf[6001]; 133e12c5d1SDavid du Colombier short cfreq[140]; 143e12c5d1SDavid du Colombier short wfreq[50]; 153e12c5d1SDavid du Colombier int nbuf; 169a747e4fSDavid du Colombier Dir* mbuf; 173e12c5d1SDavid du Colombier int fd; 183e12c5d1SDavid du Colombier char *fname; 193e12c5d1SDavid du Colombier char *slash; 203e12c5d1SDavid du Colombier 213e12c5d1SDavid du Colombier enum 223e12c5d1SDavid du Colombier { 233e12c5d1SDavid du Colombier Cword, 243e12c5d1SDavid du Colombier Fword, 253e12c5d1SDavid du Colombier Aword, 26219b2ee8SDavid du Colombier Alword, 277dd7cddfSDavid du Colombier Lword, 283e12c5d1SDavid du Colombier I1, 293e12c5d1SDavid du Colombier I2, 303e12c5d1SDavid du Colombier I3, 313e12c5d1SDavid du Colombier Clatin = 128, 323e12c5d1SDavid du Colombier Cbinary, 333e12c5d1SDavid du Colombier Cnull, 343e12c5d1SDavid du Colombier Ceascii, 353e12c5d1SDavid du Colombier Cutf, 363e12c5d1SDavid du Colombier }; 373e12c5d1SDavid du Colombier struct 383e12c5d1SDavid du Colombier { 393e12c5d1SDavid du Colombier char* word; 403e12c5d1SDavid du Colombier int class; 413e12c5d1SDavid du Colombier } dict[] = 423e12c5d1SDavid du Colombier { 437dd7cddfSDavid du Colombier "PATH", Lword, 44219b2ee8SDavid du Colombier "TEXT", Aword, 45219b2ee8SDavid du Colombier "adt", Alword, 46219b2ee8SDavid du Colombier "aggr", Alword, 47219b2ee8SDavid du Colombier "alef", Alword, 487dd7cddfSDavid du Colombier "array", Lword, 49219b2ee8SDavid du Colombier "block", Fword, 50219b2ee8SDavid du Colombier "char", Cword, 51219b2ee8SDavid du Colombier "common", Fword, 527dd7cddfSDavid du Colombier "con", Lword, 53219b2ee8SDavid du Colombier "data", Fword, 54219b2ee8SDavid du Colombier "dimension", Fword, 55219b2ee8SDavid du Colombier "double", Cword, 56219b2ee8SDavid du Colombier "extern", Cword, 57219b2ee8SDavid du Colombier "bio", I2, 58219b2ee8SDavid du Colombier "float", Cword, 597dd7cddfSDavid du Colombier "fn", Lword, 60219b2ee8SDavid du Colombier "function", Fword, 61219b2ee8SDavid du Colombier "h", I3, 627dd7cddfSDavid du Colombier "implement", Lword, 637dd7cddfSDavid du Colombier "import", Lword, 64219b2ee8SDavid du Colombier "include", I1, 65219b2ee8SDavid du Colombier "int", Cword, 66219b2ee8SDavid du Colombier "integer", Fword, 677dd7cddfSDavid du Colombier "iota", Lword, 68219b2ee8SDavid du Colombier "libc", I2, 69219b2ee8SDavid du Colombier "long", Cword, 707dd7cddfSDavid du Colombier "module", Lword, 71219b2ee8SDavid du Colombier "real", Fword, 727dd7cddfSDavid du Colombier "ref", Lword, 73219b2ee8SDavid du Colombier "register", Cword, 747dd7cddfSDavid du Colombier "self", Lword, 75219b2ee8SDavid du Colombier "short", Cword, 76219b2ee8SDavid du Colombier "static", Cword, 77219b2ee8SDavid du Colombier "stdio", I2, 78219b2ee8SDavid du Colombier "struct", Cword, 79219b2ee8SDavid du Colombier "subroutine", Fword, 80219b2ee8SDavid du Colombier "u", I2, 81219b2ee8SDavid du Colombier "void", Cword, 82219b2ee8SDavid du Colombier }; 83219b2ee8SDavid du Colombier 84219b2ee8SDavid du Colombier /* codes for 'mode' field in language structure */ 85219b2ee8SDavid du Colombier enum { 86219b2ee8SDavid du Colombier Normal = 0, 87219b2ee8SDavid du Colombier First, /* first entry for language spanning several ranges */ 88219b2ee8SDavid du Colombier Multi, /* later entries " " " ... */ 89219b2ee8SDavid du Colombier Shared, /* codes used in several languages */ 903e12c5d1SDavid du Colombier }; 913e12c5d1SDavid du Colombier 923e12c5d1SDavid du Colombier struct 933e12c5d1SDavid du Colombier { 94219b2ee8SDavid du Colombier int mode; /* see enum above */ 953e12c5d1SDavid du Colombier int count; 963e12c5d1SDavid du Colombier int low; 973e12c5d1SDavid du Colombier int high; 983e12c5d1SDavid du Colombier char *name; 993e12c5d1SDavid du Colombier 1003e12c5d1SDavid du Colombier } language[] = 1013e12c5d1SDavid du Colombier { 102219b2ee8SDavid du Colombier Normal, 0, 0x0100, 0x01FF, "Extended Latin", 103219b2ee8SDavid du Colombier Normal, 0, 0x0370, 0x03FF, "Greek", 104219b2ee8SDavid du Colombier Normal, 0, 0x0400, 0x04FF, "Cyrillic", 105219b2ee8SDavid du Colombier Normal, 0, 0x0530, 0x058F, "Armenian", 106219b2ee8SDavid du Colombier Normal, 0, 0x0590, 0x05FF, "Hebrew", 107219b2ee8SDavid du Colombier Normal, 0, 0x0600, 0x06FF, "Arabic", 108219b2ee8SDavid du Colombier Normal, 0, 0x0900, 0x097F, "Devanagari", 109219b2ee8SDavid du Colombier Normal, 0, 0x0980, 0x09FF, "Bengali", 110219b2ee8SDavid du Colombier Normal, 0, 0x0A00, 0x0A7F, "Gurmukhi", 111219b2ee8SDavid du Colombier Normal, 0, 0x0A80, 0x0AFF, "Gujarati", 112219b2ee8SDavid du Colombier Normal, 0, 0x0B00, 0x0B7F, "Oriya", 113219b2ee8SDavid du Colombier Normal, 0, 0x0B80, 0x0BFF, "Tamil", 114219b2ee8SDavid du Colombier Normal, 0, 0x0C00, 0x0C7F, "Telugu", 115219b2ee8SDavid du Colombier Normal, 0, 0x0C80, 0x0CFF, "Kannada", 116219b2ee8SDavid du Colombier Normal, 0, 0x0D00, 0x0D7F, "Malayalam", 117219b2ee8SDavid du Colombier Normal, 0, 0x0E00, 0x0E7F, "Thai", 118219b2ee8SDavid du Colombier Normal, 0, 0x0E80, 0x0EFF, "Lao", 119219b2ee8SDavid du Colombier Normal, 0, 0x1000, 0x105F, "Tibetan", 120219b2ee8SDavid du Colombier Normal, 0, 0x10A0, 0x10FF, "Georgian", 121219b2ee8SDavid du Colombier Normal, 0, 0x3040, 0x30FF, "Japanese", 122219b2ee8SDavid du Colombier Normal, 0, 0x3100, 0x312F, "Chinese", 123219b2ee8SDavid du Colombier First, 0, 0x3130, 0x318F, "Korean", 124219b2ee8SDavid du Colombier Multi, 0, 0x3400, 0x3D2F, "Korean", 125219b2ee8SDavid du Colombier Shared, 0, 0x4e00, 0x9fff, "CJK", 126219b2ee8SDavid du Colombier Normal, 0, 0, 0, 0, /* terminal entry */ 1273e12c5d1SDavid du Colombier }; 1283e12c5d1SDavid du Colombier 1293e12c5d1SDavid du Colombier 1303e12c5d1SDavid du Colombier enum 1313e12c5d1SDavid du Colombier { 1323e12c5d1SDavid du Colombier Fascii, /* printable ascii */ 1333e12c5d1SDavid du Colombier Flatin, /* latin 1*/ 1345e492409SDavid du Colombier Futf, /* UTF character set */ 1353e12c5d1SDavid du Colombier Fbinary, /* binary */ 1363e12c5d1SDavid du Colombier Feascii, /* ASCII with control chars */ 1373e12c5d1SDavid du Colombier Fnull, /* NULL in file */ 1383e12c5d1SDavid du Colombier } guess; 1393e12c5d1SDavid du Colombier 1403e12c5d1SDavid du Colombier void bump_utf_count(Rune); 1417dd7cddfSDavid du Colombier int cistrncmp(char*, char*, int); 1423e12c5d1SDavid du Colombier void filetype(int); 1433e12c5d1SDavid du Colombier int getfontnum(uchar*, uchar**); 1443e12c5d1SDavid du Colombier int isas(void); 1453e12c5d1SDavid du Colombier int isc(void); 1463e12c5d1SDavid du Colombier int iscint(void); 1473e12c5d1SDavid du Colombier int isenglish(void); 1487dd7cddfSDavid du Colombier int ishp(void); 1497dd7cddfSDavid du Colombier int ishtml(void); 1509a747e4fSDavid du Colombier int isrfc822(void); 151d9306527SDavid du Colombier int ismbox(void); 1527dd7cddfSDavid du Colombier int islimbo(void); 1533e12c5d1SDavid du Colombier int ismung(void); 1543e12c5d1SDavid du Colombier int isp9bit(void); 1553e12c5d1SDavid du Colombier int isp9font(void); 156fb7f0c93SDavid du Colombier int isrtf(void); 157f2e8132aSDavid du Colombier int ismsdos(void); 158b7327ca2SDavid du Colombier int iself(void); 1593e12c5d1SDavid du Colombier int istring(void); 1603306492aSDavid du Colombier int isoffstr(void); 161ddb951e3SDavid du Colombier int iff(void); 1623e12c5d1SDavid du Colombier int long0(void); 1633306492aSDavid du Colombier int longoff(void); 1644b30ca09SDavid du Colombier int istar(void); 1650c547597SDavid du Colombier int isface(void); 1660c547597SDavid du Colombier int isexec(void); 1673e12c5d1SDavid du Colombier int p9bitnum(uchar*); 1683e12c5d1SDavid du Colombier int p9subfont(uchar*); 1693e12c5d1SDavid du Colombier void print_utf(void); 1703e12c5d1SDavid du Colombier void type(char*, int); 1713e12c5d1SDavid du Colombier int utf_count(void); 1723e12c5d1SDavid du Colombier void wordfreq(void); 1733e12c5d1SDavid du Colombier 1743e12c5d1SDavid du Colombier int (*call[])(void) = 1753e12c5d1SDavid du Colombier { 1763e12c5d1SDavid du Colombier long0, /* recognizable by first 4 bytes */ 1773e12c5d1SDavid du Colombier istring, /* recognizable by first string */ 1780c547597SDavid du Colombier iself, /* ELF (foreign) executable */ 1790c547597SDavid du Colombier isexec, /* native executables */ 180ddb951e3SDavid du Colombier iff, /* interchange file format (strings) */ 1813306492aSDavid du Colombier longoff, /* recognizable by 4 bytes at some offset */ 1823306492aSDavid du Colombier isoffstr, /* recognizable by string at some offset */ 1839a747e4fSDavid du Colombier isrfc822, /* email file */ 184d9306527SDavid du Colombier ismbox, /* mail box */ 1854b30ca09SDavid du Colombier istar, /* recognizable by tar checksum */ 186643074abSDavid du Colombier ishtml, /* html keywords */ 187219b2ee8SDavid du Colombier iscint, /* compiler/assembler intermediate */ 1887dd7cddfSDavid du Colombier islimbo, /* limbo source */ 189219b2ee8SDavid du Colombier isc, /* c & alef compiler key words */ 1903e12c5d1SDavid du Colombier isas, /* assembler key words */ 1913e12c5d1SDavid du Colombier ismung, /* entropy compressed/encrypted */ 1923e12c5d1SDavid du Colombier isp9font, /* plan 9 font */ 1937dd7cddfSDavid du Colombier isp9bit, /* plan 9 image (as from /dev/window) */ 1947dd7cddfSDavid du Colombier isenglish, /* char frequency English */ 195fb7f0c93SDavid du Colombier isrtf, /* rich text format */ 196f2e8132aSDavid du Colombier ismsdos, /* msdos exe (virus file attachement) */ 1970c547597SDavid du Colombier isface, /* ascii face file */ 1983e12c5d1SDavid du Colombier 0 1993e12c5d1SDavid du Colombier }; 2003e12c5d1SDavid du Colombier 2017dd7cddfSDavid du Colombier int mime; 2027dd7cddfSDavid du Colombier 2037dd7cddfSDavid du Colombier #define OCTET "application/octet-stream\n" 2047dd7cddfSDavid du Colombier #define PLAIN "text/plain\n" 2057dd7cddfSDavid du Colombier 2063e12c5d1SDavid du Colombier void 2073e12c5d1SDavid du Colombier main(int argc, char *argv[]) 2083e12c5d1SDavid du Colombier { 2093e12c5d1SDavid du Colombier int i, j, maxlen; 2103e12c5d1SDavid du Colombier char *cp; 2113e12c5d1SDavid du Colombier Rune r; 2123e12c5d1SDavid du Colombier 2137dd7cddfSDavid du Colombier ARGBEGIN{ 2147dd7cddfSDavid du Colombier case 'm': 2157dd7cddfSDavid du Colombier mime = 1; 2167dd7cddfSDavid du Colombier break; 2177dd7cddfSDavid du Colombier default: 2187dd7cddfSDavid du Colombier fprint(2, "usage: file [-m] [file...]\n"); 2197dd7cddfSDavid du Colombier exits("usage"); 2207dd7cddfSDavid du Colombier }ARGEND; 2217dd7cddfSDavid du Colombier 2223e12c5d1SDavid du Colombier maxlen = 0; 2237dd7cddfSDavid du Colombier if(mime == 0 || argc > 1){ 2247dd7cddfSDavid du Colombier for(i = 0; i < argc; i++) { 2253e12c5d1SDavid du Colombier for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp)) 2263e12c5d1SDavid du Colombier ; 2273e12c5d1SDavid du Colombier if(j > maxlen) 2283e12c5d1SDavid du Colombier maxlen = j; 2293e12c5d1SDavid du Colombier } 2307dd7cddfSDavid du Colombier } 2317dd7cddfSDavid du Colombier if (argc <= 0) { 2327dd7cddfSDavid du Colombier if(!mime) 2333e12c5d1SDavid du Colombier print ("stdin: "); 2343e12c5d1SDavid du Colombier filetype(0); 2353e12c5d1SDavid du Colombier } 2363e12c5d1SDavid du Colombier else { 2377dd7cddfSDavid du Colombier for(i = 0; i < argc; i++) 2383e12c5d1SDavid du Colombier type(argv[i], maxlen); 2393e12c5d1SDavid du Colombier } 2403e12c5d1SDavid du Colombier exits(0); 2413e12c5d1SDavid du Colombier } 2423e12c5d1SDavid du Colombier 2433e12c5d1SDavid du Colombier void 2443e12c5d1SDavid du Colombier type(char *file, int nlen) 2453e12c5d1SDavid du Colombier { 2463e12c5d1SDavid du Colombier Rune r; 2473e12c5d1SDavid du Colombier int i; 2483e12c5d1SDavid du Colombier char *p; 2493e12c5d1SDavid du Colombier 2507dd7cddfSDavid du Colombier if(nlen > 0){ 2513e12c5d1SDavid du Colombier slash = 0; 2523e12c5d1SDavid du Colombier for (i = 0, p = file; *p; i++) { 2533e12c5d1SDavid du Colombier if (*p == '/') /* find rightmost slash */ 2543e12c5d1SDavid du Colombier slash = p; 2553e12c5d1SDavid du Colombier p += chartorune(&r, p); /* count runes */ 2563e12c5d1SDavid du Colombier } 2573e12c5d1SDavid du Colombier print("%s:%*s",file, nlen-i+1, ""); 2587dd7cddfSDavid du Colombier } 2593e12c5d1SDavid du Colombier fname = file; 2603e12c5d1SDavid du Colombier if ((fd = open(file, OREAD)) < 0) { 2613e12c5d1SDavid du Colombier print("cannot open\n"); 2623e12c5d1SDavid du Colombier return; 2633e12c5d1SDavid du Colombier } 2643e12c5d1SDavid du Colombier filetype(fd); 2653e12c5d1SDavid du Colombier close(fd); 2663e12c5d1SDavid du Colombier } 2673e12c5d1SDavid du Colombier 2685e492409SDavid du Colombier /* 2695e492409SDavid du Colombier * Unicode 4.0 4-byte runes. 2705e492409SDavid du Colombier */ 2715e492409SDavid du Colombier typedef int Rune1; 2725e492409SDavid du Colombier 2735e492409SDavid du Colombier enum { 2745e492409SDavid du Colombier UTFmax1 = 4, 2755e492409SDavid du Colombier }; 2765e492409SDavid du Colombier 2775e492409SDavid du Colombier int 2785e492409SDavid du Colombier fullrune1(char *p, int n) 2795e492409SDavid du Colombier { 2805e492409SDavid du Colombier int c; 2815e492409SDavid du Colombier 2825e492409SDavid du Colombier if(n >= 1) { 2835e492409SDavid du Colombier c = *(uchar*)p; 2845e492409SDavid du Colombier if(c < 0x80) 2855e492409SDavid du Colombier return 1; 2865e492409SDavid du Colombier if(n >= 2 && c < 0xE0) 2875e492409SDavid du Colombier return 1; 2885e492409SDavid du Colombier if(n >= 3 && c < 0xF0) 2895e492409SDavid du Colombier return 1; 2905e492409SDavid du Colombier if(n >= 4) 2915e492409SDavid du Colombier return 1; 2925e492409SDavid du Colombier } 2935e492409SDavid du Colombier return 0; 2945e492409SDavid du Colombier } 2955e492409SDavid du Colombier 2965e492409SDavid du Colombier int 2975e492409SDavid du Colombier chartorune1(Rune1 *rune, char *str) 2985e492409SDavid du Colombier { 2995e492409SDavid du Colombier int c, c1, c2, c3, n; 3005e492409SDavid du Colombier Rune r; 3015e492409SDavid du Colombier 3025e492409SDavid du Colombier c = *(uchar*)str; 3035e492409SDavid du Colombier if(c < 0xF0){ 3045e492409SDavid du Colombier r = 0; 3055e492409SDavid du Colombier n = chartorune(&r, str); 3065e492409SDavid du Colombier *rune = r; 3075e492409SDavid du Colombier return n; 3085e492409SDavid du Colombier } 3095e492409SDavid du Colombier c &= ~0xF0; 3105e492409SDavid du Colombier c1 = *(uchar*)(str+1) & ~0x80; 3115e492409SDavid du Colombier c2 = *(uchar*)(str+2) & ~0x80; 3125e492409SDavid du Colombier c3 = *(uchar*)(str+3) & ~0x80; 3135e492409SDavid du Colombier n = (c<<18) | (c1<<12) | (c2<<6) | c3; 3145e492409SDavid du Colombier if(n < 0x10000 || n > 0x10FFFF){ 3155e492409SDavid du Colombier *rune = Runeerror; 3165e492409SDavid du Colombier return 1; 3175e492409SDavid du Colombier } 3185e492409SDavid du Colombier *rune = n; 3195e492409SDavid du Colombier return 4; 3205e492409SDavid du Colombier } 3215e492409SDavid du Colombier 3223e12c5d1SDavid du Colombier void 3233e12c5d1SDavid du Colombier filetype(int fd) 3243e12c5d1SDavid du Colombier { 3255e492409SDavid du Colombier Rune1 r; 326219b2ee8SDavid du Colombier int i, f, n; 327219b2ee8SDavid du Colombier char *p, *eob; 3283e12c5d1SDavid du Colombier 3299a747e4fSDavid du Colombier free(mbuf); 3309a747e4fSDavid du Colombier mbuf = dirfstat(fd); 3319a747e4fSDavid du Colombier if(mbuf == nil){ 3329a747e4fSDavid du Colombier print("cannot stat: %r\n"); 3333e12c5d1SDavid du Colombier return; 3343e12c5d1SDavid du Colombier } 3359a747e4fSDavid du Colombier if(mbuf->mode & DMDIR) { 3367dd7cddfSDavid du Colombier print(mime ? "text/directory\n" : "directory\n"); 3373e12c5d1SDavid du Colombier return; 3383e12c5d1SDavid du Colombier } 3399a747e4fSDavid du Colombier if(mbuf->type != 'M' && mbuf->type != '|') { 3407dd7cddfSDavid du Colombier print(mime ? OCTET : "special file #%c/%s\n", 3419a747e4fSDavid du Colombier mbuf->type, mbuf->name); 3423e12c5d1SDavid du Colombier return; 3433e12c5d1SDavid du Colombier } 3449a747e4fSDavid du Colombier nbuf = read(fd, buf, sizeof(buf)-1); 3453e12c5d1SDavid du Colombier 3463e12c5d1SDavid du Colombier if(nbuf < 0) { 3473e12c5d1SDavid du Colombier print("cannot read\n"); 3483e12c5d1SDavid du Colombier return; 3493e12c5d1SDavid du Colombier } 3503e12c5d1SDavid du Colombier if(nbuf == 0) { 3517dd7cddfSDavid du Colombier print(mime ? PLAIN : "empty file\n"); 3523e12c5d1SDavid du Colombier return; 3533e12c5d1SDavid du Colombier } 3549a747e4fSDavid du Colombier buf[nbuf] = 0; 3553e12c5d1SDavid du Colombier 3563e12c5d1SDavid du Colombier /* 3573e12c5d1SDavid du Colombier * build histogram table 3583e12c5d1SDavid du Colombier */ 3593e12c5d1SDavid du Colombier memset(cfreq, 0, sizeof(cfreq)); 3603e12c5d1SDavid du Colombier for (i = 0; language[i].name; i++) 3613e12c5d1SDavid du Colombier language[i].count = 0; 362219b2ee8SDavid du Colombier eob = (char *)buf+nbuf; 363219b2ee8SDavid du Colombier for(n = 0, p = (char *)buf; p < eob; n++) { 3645e492409SDavid du Colombier if (!fullrune1(p, eob-p) && eob-p < UTFmax1) 365219b2ee8SDavid du Colombier break; 3665e492409SDavid du Colombier p += chartorune1(&r, p); 3673e12c5d1SDavid du Colombier if (r == 0) 3683e12c5d1SDavid du Colombier f = Cnull; 3693e12c5d1SDavid du Colombier else if (r <= 0x7f) { 3703e12c5d1SDavid du Colombier if (!isprint(r) && !isspace(r)) 3713e12c5d1SDavid du Colombier f = Ceascii; /* ASCII control char */ 3723e12c5d1SDavid du Colombier else f = r; 3735e492409SDavid du Colombier } else if (r == 0x80) { 374219b2ee8SDavid du Colombier bump_utf_count(r); 375219b2ee8SDavid du Colombier f = Cutf; 3763e12c5d1SDavid du Colombier } else if (r < 0xA0) 3773e12c5d1SDavid du Colombier f = Cbinary; /* Invalid Runes */ 3783e12c5d1SDavid du Colombier else if (r <= 0xff) 3793e12c5d1SDavid du Colombier f = Clatin; /* Latin 1 */ 3803e12c5d1SDavid du Colombier else { 3813e12c5d1SDavid du Colombier bump_utf_count(r); 3823e12c5d1SDavid du Colombier f = Cutf; /* UTF extension */ 3833e12c5d1SDavid du Colombier } 3843e12c5d1SDavid du Colombier cfreq[f]++; /* ASCII chars peg directly */ 3853e12c5d1SDavid du Colombier } 3863e12c5d1SDavid du Colombier /* 3873e12c5d1SDavid du Colombier * gross classify 3883e12c5d1SDavid du Colombier */ 3893e12c5d1SDavid du Colombier if (cfreq[Cbinary]) 3903e12c5d1SDavid du Colombier guess = Fbinary; 3913e12c5d1SDavid du Colombier else if (cfreq[Cutf]) 3923e12c5d1SDavid du Colombier guess = Futf; 3933e12c5d1SDavid du Colombier else if (cfreq[Clatin]) 3943e12c5d1SDavid du Colombier guess = Flatin; 3953e12c5d1SDavid du Colombier else if (cfreq[Ceascii]) 3963e12c5d1SDavid du Colombier guess = Feascii; 3975e492409SDavid du Colombier else if (cfreq[Cnull]) 3983306492aSDavid du Colombier guess = Fbinary; 3995e492409SDavid du Colombier else 4005e492409SDavid du Colombier guess = Fascii; 4013e12c5d1SDavid du Colombier /* 4023e12c5d1SDavid du Colombier * lookup dictionary words 4033e12c5d1SDavid du Colombier */ 404219b2ee8SDavid du Colombier memset(wfreq, 0, sizeof(wfreq)); 4057dd7cddfSDavid du Colombier if(guess == Fascii || guess == Flatin || guess == Futf) 4063e12c5d1SDavid du Colombier wordfreq(); 4073e12c5d1SDavid du Colombier /* 4083e12c5d1SDavid du Colombier * call individual classify routines 4093e12c5d1SDavid du Colombier */ 4103e12c5d1SDavid du Colombier for(i=0; call[i]; i++) 4113e12c5d1SDavid du Colombier if((*call[i])()) 4123e12c5d1SDavid du Colombier return; 4133e12c5d1SDavid du Colombier 4143e12c5d1SDavid du Colombier /* 4153e12c5d1SDavid du Colombier * if all else fails, 4163e12c5d1SDavid du Colombier * print out gross classification 4173e12c5d1SDavid du Colombier */ 41880ee5cbfSDavid du Colombier if (nbuf < 100 && !mime) 4197dd7cddfSDavid du Colombier print(mime ? PLAIN : "short "); 4203e12c5d1SDavid du Colombier if (guess == Fascii) 4217dd7cddfSDavid du Colombier print(mime ? PLAIN : "Ascii\n"); 4223e12c5d1SDavid du Colombier else if (guess == Feascii) 4237dd7cddfSDavid du Colombier print(mime ? PLAIN : "extended ascii\n"); 4243e12c5d1SDavid du Colombier else if (guess == Flatin) 4257dd7cddfSDavid du Colombier print(mime ? PLAIN : "latin ascii\n"); 4263e12c5d1SDavid du Colombier else if (guess == Futf && utf_count() < 4) 4273e12c5d1SDavid du Colombier print_utf(); 4287dd7cddfSDavid du Colombier else print(mime ? OCTET : "binary\n"); 4293e12c5d1SDavid du Colombier } 4303e12c5d1SDavid du Colombier 4313e12c5d1SDavid du Colombier void 4323e12c5d1SDavid du Colombier bump_utf_count(Rune r) 4333e12c5d1SDavid du Colombier { 4343e12c5d1SDavid du Colombier int low, high, mid; 4353e12c5d1SDavid du Colombier 4363e12c5d1SDavid du Colombier high = sizeof(language)/sizeof(language[0])-1; 4373e12c5d1SDavid du Colombier for (low = 0; low < high;) { 4383e12c5d1SDavid du Colombier mid = (low+high)/2; 4393e12c5d1SDavid du Colombier if (r >= language[mid].low) { 4403e12c5d1SDavid du Colombier if (r <= language[mid].high) { 4413e12c5d1SDavid du Colombier language[mid].count++; 4423e12c5d1SDavid du Colombier break; 4433e12c5d1SDavid du Colombier } else low = mid+1; 4443e12c5d1SDavid du Colombier } else high = mid; 4453e12c5d1SDavid du Colombier } 4463e12c5d1SDavid du Colombier } 4473e12c5d1SDavid du Colombier 4483e12c5d1SDavid du Colombier int 4493e12c5d1SDavid du Colombier utf_count(void) 4503e12c5d1SDavid du Colombier { 4513e12c5d1SDavid du Colombier int i, count; 4523e12c5d1SDavid du Colombier 453219b2ee8SDavid du Colombier count = 0; 454219b2ee8SDavid du Colombier for (i = 0; language[i].name; i++) 4553e12c5d1SDavid du Colombier if (language[i].count > 0) 456219b2ee8SDavid du Colombier switch (language[i].mode) { 457219b2ee8SDavid du Colombier case Normal: 458219b2ee8SDavid du Colombier case First: 4593e12c5d1SDavid du Colombier count++; 460219b2ee8SDavid du Colombier break; 461219b2ee8SDavid du Colombier default: 462219b2ee8SDavid du Colombier break; 463219b2ee8SDavid du Colombier } 4643e12c5d1SDavid du Colombier return count; 4653e12c5d1SDavid du Colombier } 4663e12c5d1SDavid du Colombier 467219b2ee8SDavid du Colombier int 468219b2ee8SDavid du Colombier chkascii(void) 469219b2ee8SDavid du Colombier { 470219b2ee8SDavid du Colombier int i; 471219b2ee8SDavid du Colombier 472219b2ee8SDavid du Colombier for (i = 'a'; i < 'z'; i++) 473219b2ee8SDavid du Colombier if (cfreq[i]) 474219b2ee8SDavid du Colombier return 1; 475219b2ee8SDavid du Colombier for (i = 'A'; i < 'Z'; i++) 476219b2ee8SDavid du Colombier if (cfreq[i]) 477219b2ee8SDavid du Colombier return 1; 478219b2ee8SDavid du Colombier return 0; 479219b2ee8SDavid du Colombier } 480219b2ee8SDavid du Colombier 481219b2ee8SDavid du Colombier int 482219b2ee8SDavid du Colombier find_first(char *name) 483219b2ee8SDavid du Colombier { 484219b2ee8SDavid du Colombier int i; 485219b2ee8SDavid du Colombier 486219b2ee8SDavid du Colombier for (i = 0; language[i].name != 0; i++) 487219b2ee8SDavid du Colombier if (language[i].mode == First 488219b2ee8SDavid du Colombier && strcmp(language[i].name, name) == 0) 489219b2ee8SDavid du Colombier return i; 490219b2ee8SDavid du Colombier return -1; 491219b2ee8SDavid du Colombier } 492219b2ee8SDavid du Colombier 4933e12c5d1SDavid du Colombier void 4943e12c5d1SDavid du Colombier print_utf(void) 4953e12c5d1SDavid du Colombier { 496219b2ee8SDavid du Colombier int i, printed, j; 4973e12c5d1SDavid du Colombier 4987dd7cddfSDavid du Colombier if(mime){ 4997dd7cddfSDavid du Colombier print(PLAIN); 5007dd7cddfSDavid du Colombier return; 5017dd7cddfSDavid du Colombier } 502219b2ee8SDavid du Colombier if (chkascii()) { 503219b2ee8SDavid du Colombier printed = 1; 504219b2ee8SDavid du Colombier print("Ascii"); 505219b2ee8SDavid du Colombier } else 506219b2ee8SDavid du Colombier printed = 0; 507219b2ee8SDavid du Colombier for (i = 0; language[i].name; i++) 5083e12c5d1SDavid du Colombier if (language[i].count) { 509219b2ee8SDavid du Colombier switch(language[i].mode) { 510219b2ee8SDavid du Colombier case Multi: 511219b2ee8SDavid du Colombier j = find_first(language[i].name); 512219b2ee8SDavid du Colombier if (j < 0) 513219b2ee8SDavid du Colombier break; 514219b2ee8SDavid du Colombier if (language[j].count > 0) 515219b2ee8SDavid du Colombier break; 516219b2ee8SDavid du Colombier /* Fall through */ 517219b2ee8SDavid du Colombier case Normal: 518219b2ee8SDavid du Colombier case First: 5193e12c5d1SDavid du Colombier if (printed) 5203e12c5d1SDavid du Colombier print(" & "); 5213e12c5d1SDavid du Colombier else printed = 1; 5223e12c5d1SDavid du Colombier print("%s", language[i].name); 523219b2ee8SDavid du Colombier break; 524219b2ee8SDavid du Colombier case Shared: 525219b2ee8SDavid du Colombier default: 526219b2ee8SDavid du Colombier break; 527219b2ee8SDavid du Colombier } 5283e12c5d1SDavid du Colombier } 5293e12c5d1SDavid du Colombier if(!printed) 5303e12c5d1SDavid du Colombier print("UTF"); 5313e12c5d1SDavid du Colombier print(" text\n"); 5323e12c5d1SDavid du Colombier } 5333e12c5d1SDavid du Colombier 5343e12c5d1SDavid du Colombier void 5353e12c5d1SDavid du Colombier wordfreq(void) 5363e12c5d1SDavid du Colombier { 537219b2ee8SDavid du Colombier int low, high, mid, r; 538219b2ee8SDavid du Colombier uchar *p, *p2, c; 5393e12c5d1SDavid du Colombier 540219b2ee8SDavid du Colombier p = buf; 541219b2ee8SDavid du Colombier for(;;) { 542219b2ee8SDavid du Colombier while (p < buf+nbuf && !isalpha(*p)) 543219b2ee8SDavid du Colombier p++; 544219b2ee8SDavid du Colombier if (p >= buf+nbuf) 545219b2ee8SDavid du Colombier return; 546219b2ee8SDavid du Colombier p2 = p; 547219b2ee8SDavid du Colombier while(p < buf+nbuf && isalpha(*p)) 548219b2ee8SDavid du Colombier p++; 549219b2ee8SDavid du Colombier c = *p; 550219b2ee8SDavid du Colombier *p = 0; 5513e12c5d1SDavid du Colombier high = sizeof(dict)/sizeof(dict[0]); 5523e12c5d1SDavid du Colombier for(low = 0;low < high;) { 5533e12c5d1SDavid du Colombier mid = (low+high)/2; 554219b2ee8SDavid du Colombier r = strcmp(dict[mid].word, (char*)p2); 555219b2ee8SDavid du Colombier if(r == 0) { 5563e12c5d1SDavid du Colombier wfreq[dict[mid].class]++; 5573e12c5d1SDavid du Colombier break; 5583e12c5d1SDavid du Colombier } 559219b2ee8SDavid du Colombier if(r < 0) 5603e12c5d1SDavid du Colombier low = mid+1; 5613e12c5d1SDavid du Colombier else 5623e12c5d1SDavid du Colombier high = mid; 5633e12c5d1SDavid du Colombier } 564219b2ee8SDavid du Colombier *p++ = c; 5653e12c5d1SDavid du Colombier } 5663e12c5d1SDavid du Colombier } 5673e12c5d1SDavid du Colombier 5689a747e4fSDavid du Colombier typedef struct Filemagic Filemagic; 5699a747e4fSDavid du Colombier struct Filemagic { 5709a747e4fSDavid du Colombier ulong x; 5719a747e4fSDavid du Colombier ulong mask; 5729a747e4fSDavid du Colombier char *desc; 5739a747e4fSDavid du Colombier char *mime; 5749a747e4fSDavid du Colombier }; 5759a747e4fSDavid du Colombier 5763306492aSDavid du Colombier /* 5773306492aSDavid du Colombier * integers in this table must be as seen on a little-endian machine 5783306492aSDavid du Colombier * when read from a file. 5793306492aSDavid du Colombier */ 5809a747e4fSDavid du Colombier Filemagic long0tab[] = { 5819a747e4fSDavid du Colombier 0xF16DF16D, 0xFFFFFFFF, "pac1 audio file\n", OCTET, 5823306492aSDavid du Colombier /* "pac1" */ 5839a747e4fSDavid du Colombier 0x31636170, 0xFFFFFFFF, "pac3 audio file\n", OCTET, 5843306492aSDavid du Colombier /* "pXc2 */ 5853306492aSDavid du Colombier 0x32630070, 0xFFFF00FF, "pac4 audio file\n", OCTET, 5869a747e4fSDavid du Colombier 0xBA010000, 0xFFFFFFFF, "mpeg system stream\n", OCTET, 5879a747e4fSDavid du Colombier 0x30800CC0, 0xFFFFFFFF, "inferno .dis executable\n", OCTET, 588fb7f0c93SDavid du Colombier 0x04034B50, 0xFFFFFFFF, "zip archive\n", "application/zip", 5899a747e4fSDavid du Colombier 070707, 0xFFFF, "cpio archive\n", OCTET, 590fb7f0c93SDavid du Colombier 0x2F7, 0xFFFF, "tex dvi\n", "application/dvi", 5919552e201SDavid du Colombier 0xfaff, 0xfeff, "mp3 audio\n", "audio/mpeg", 592ee7057f8SDavid du Colombier 0xfeff0000, 0xffffffff, "utf-32be\n", "text/plain charset=utf-32be", 593ee7057f8SDavid du Colombier 0xfffe, 0xffffffff, "utf-32le\n", "text/plain charset=utf-32le", 594ee7057f8SDavid du Colombier 0xfeff, 0xffff, "utf-16be\n", "text/plain charset=utf-16be", 595ee7057f8SDavid du Colombier 0xfffe, 0xffff, "utf-16le\n", "text/plain charset=utf-16le", 5963306492aSDavid du Colombier /* 5973306492aSDavid du Colombier * venti & fossil magic numbers are stored big-endian on disk, 5983306492aSDavid du Colombier * thus the numbers appear reversed in this table. 5993306492aSDavid du Colombier */ 6003306492aSDavid du Colombier 0xad4e5cd1, 0xFFFFFFFF, "venti arena\n", OCTET, 6019a747e4fSDavid du Colombier }; 6029a747e4fSDavid du Colombier 6039a747e4fSDavid du Colombier int 6049a747e4fSDavid du Colombier filemagic(Filemagic *tab, int ntab, ulong x) 6059a747e4fSDavid du Colombier { 6069a747e4fSDavid du Colombier int i; 6079a747e4fSDavid du Colombier 6089a747e4fSDavid du Colombier for(i=0; i<ntab; i++) 6099a747e4fSDavid du Colombier if((x&tab[i].mask) == tab[i].x){ 6109a747e4fSDavid du Colombier print(mime ? tab[i].mime : tab[i].desc); 6119a747e4fSDavid du Colombier return 1; 6129a747e4fSDavid du Colombier } 6139a747e4fSDavid du Colombier return 0; 6149a747e4fSDavid du Colombier } 6159a747e4fSDavid du Colombier 6163e12c5d1SDavid du Colombier int 6173e12c5d1SDavid du Colombier long0(void) 6183e12c5d1SDavid du Colombier { 6193306492aSDavid du Colombier return filemagic(long0tab, nelem(long0tab), LENDIAN(buf)); 6203306492aSDavid du Colombier } 6213e12c5d1SDavid du Colombier 6223306492aSDavid du Colombier typedef struct Fileoffmag Fileoffmag; 6233306492aSDavid du Colombier struct Fileoffmag { 6243306492aSDavid du Colombier ulong off; 6253306492aSDavid du Colombier Filemagic; 6263306492aSDavid du Colombier }; 6273306492aSDavid du Colombier 6283306492aSDavid du Colombier /* 6293306492aSDavid du Colombier * integers in this table must be as seen on a little-endian machine 6303306492aSDavid du Colombier * when read from a file. 6313306492aSDavid du Colombier */ 6323306492aSDavid du Colombier Fileoffmag longofftab[] = { 6333306492aSDavid du Colombier /* 6343306492aSDavid du Colombier * venti & fossil magic numbers are stored big-endian on disk, 6353306492aSDavid du Colombier * thus the numbers appear reversed in this table. 6363306492aSDavid du Colombier */ 6373306492aSDavid du Colombier 256*1024, 0xe7a5e4a9, 0xFFFFFFFF, "venti arenas partition\n", OCTET, 6383306492aSDavid du Colombier 256*1024, 0xc75e5cd1, 0xFFFFFFFF, "venti index section\n", OCTET, 6393306492aSDavid du Colombier 128*1024, 0x89ae7637, 0xFFFFFFFF, "fossil write buffer\n", OCTET, 6403306492aSDavid du Colombier }; 6413306492aSDavid du Colombier 6423306492aSDavid du Colombier int 6433306492aSDavid du Colombier fileoffmagic(Fileoffmag *tab, int ntab) 6443306492aSDavid du Colombier { 6453306492aSDavid du Colombier int i; 6463306492aSDavid du Colombier ulong x; 6473306492aSDavid du Colombier Fileoffmag *tp; 6483306492aSDavid du Colombier uchar buf[sizeof(long)]; 6493306492aSDavid du Colombier 6503306492aSDavid du Colombier for(i=0; i<ntab; i++) { 6513306492aSDavid du Colombier tp = tab + i; 6523306492aSDavid du Colombier seek(fd, tp->off, 0); 6533306492aSDavid du Colombier if (read(fd, buf, sizeof buf) != sizeof buf) 6543306492aSDavid du Colombier continue; 6550c547597SDavid du Colombier x = LENDIAN(buf); 6563306492aSDavid du Colombier if((x&tp->mask) == tp->x){ 6573306492aSDavid du Colombier print(mime? tp->mime: tp->desc); 6580c547597SDavid du Colombier return 1; 6593306492aSDavid du Colombier } 6603306492aSDavid du Colombier } 6610c547597SDavid du Colombier return 0; 6620c547597SDavid du Colombier } 6630c547597SDavid du Colombier 6640c547597SDavid du Colombier int 6653306492aSDavid du Colombier longoff(void) 6663306492aSDavid du Colombier { 6673306492aSDavid du Colombier return fileoffmagic(longofftab, nelem(longofftab)); 6683306492aSDavid du Colombier } 6693306492aSDavid du Colombier 6703306492aSDavid du Colombier int 6710c547597SDavid du Colombier isexec(void) 6720c547597SDavid du Colombier { 6730c547597SDavid du Colombier Fhdr f; 6740c547597SDavid du Colombier 6753e12c5d1SDavid du Colombier seek(fd, 0, 0); /* reposition to start of file */ 6763e12c5d1SDavid du Colombier if(crackhdr(fd, &f)) { 6777dd7cddfSDavid du Colombier print(mime ? OCTET : "%s\n", f.name); 6783e12c5d1SDavid du Colombier return 1; 6793e12c5d1SDavid du Colombier } 6807dd7cddfSDavid du Colombier return 0; 6817dd7cddfSDavid du Colombier } 6823e12c5d1SDavid du Colombier 6830c547597SDavid du Colombier 6844b30ca09SDavid du Colombier /* from tar.c */ 6854b30ca09SDavid du Colombier enum { NAMSIZ = 100, TBLOCK = 512 }; 6864b30ca09SDavid du Colombier 6874b30ca09SDavid du Colombier union hblock 6884b30ca09SDavid du Colombier { 6894b30ca09SDavid du Colombier char dummy[TBLOCK]; 6904b30ca09SDavid du Colombier struct header 6914b30ca09SDavid du Colombier { 6924b30ca09SDavid du Colombier char name[NAMSIZ]; 6934b30ca09SDavid du Colombier char mode[8]; 6944b30ca09SDavid du Colombier char uid[8]; 6954b30ca09SDavid du Colombier char gid[8]; 6964b30ca09SDavid du Colombier char size[12]; 6974b30ca09SDavid du Colombier char mtime[12]; 6984b30ca09SDavid du Colombier char chksum[8]; 6994b30ca09SDavid du Colombier char linkflag; 7004b30ca09SDavid du Colombier char linkname[NAMSIZ]; 7014b30ca09SDavid du Colombier /* rest are defined by POSIX's ustar format; see p1003.2b */ 7024b30ca09SDavid du Colombier char magic[6]; /* "ustar" */ 7034b30ca09SDavid du Colombier char version[2]; 7044b30ca09SDavid du Colombier char uname[32]; 7054b30ca09SDavid du Colombier char gname[32]; 7064b30ca09SDavid du Colombier char devmajor[8]; 7074b30ca09SDavid du Colombier char devminor[8]; 7084b30ca09SDavid du Colombier char prefix[155]; /* if non-null, path = prefix "/" name */ 7094b30ca09SDavid du Colombier } dbuf; 7104b30ca09SDavid du Colombier }; 7114b30ca09SDavid du Colombier 7124b30ca09SDavid du Colombier int 7134b30ca09SDavid du Colombier checksum(union hblock *hp) 7144b30ca09SDavid du Colombier { 7154b30ca09SDavid du Colombier int i; 7164b30ca09SDavid du Colombier char *cp; 7174b30ca09SDavid du Colombier struct header *hdr = &hp->dbuf; 7184b30ca09SDavid du Colombier 7194b30ca09SDavid du Colombier for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++) 7204b30ca09SDavid du Colombier *cp = ' '; 7214b30ca09SDavid du Colombier i = 0; 7224b30ca09SDavid du Colombier for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++) 7234b30ca09SDavid du Colombier i += *cp & 0xff; 7244b30ca09SDavid du Colombier return i; 7254b30ca09SDavid du Colombier } 7264b30ca09SDavid du Colombier 7274b30ca09SDavid du Colombier int 7284b30ca09SDavid du Colombier istar(void) 7294b30ca09SDavid du Colombier { 7304b30ca09SDavid du Colombier int chksum; 7314b30ca09SDavid du Colombier char tblock[TBLOCK]; 7324b30ca09SDavid du Colombier union hblock *hp = (union hblock *)tblock; 7334b30ca09SDavid du Colombier struct header *hdr = &hp->dbuf; 7344b30ca09SDavid du Colombier 7354b30ca09SDavid du Colombier seek(fd, 0, 0); /* reposition to start of file */ 7364b30ca09SDavid du Colombier if (readn(fd, tblock, sizeof tblock) != sizeof tblock) 7374b30ca09SDavid du Colombier return 0; 7384b30ca09SDavid du Colombier chksum = strtol(hdr->chksum, 0, 8); 7394b30ca09SDavid du Colombier if (hdr->name[0] != '\0' && checksum(hp) == chksum) { 7404b30ca09SDavid du Colombier if (strcmp(hdr->magic, "ustar") == 0) 7414b30ca09SDavid du Colombier print(mime? "application/x-ustar\n": 7424b30ca09SDavid du Colombier "posix tar archive\n"); 7434b30ca09SDavid du Colombier else 7444b30ca09SDavid du Colombier print(mime? "application/x-tar\n": "tar archive\n"); 7454b30ca09SDavid du Colombier return 1; 7464b30ca09SDavid du Colombier } 7474b30ca09SDavid du Colombier return 0; 7484b30ca09SDavid du Colombier } 7494b30ca09SDavid du Colombier 7503e12c5d1SDavid du Colombier /* 7513e12c5d1SDavid du Colombier * initial words to classify file 7523e12c5d1SDavid du Colombier */ 753219b2ee8SDavid du Colombier struct FILE_STRING 754219b2ee8SDavid du Colombier { 7553e12c5d1SDavid du Colombier char *key; 7563e12c5d1SDavid du Colombier char *filetype; 7573e12c5d1SDavid du Colombier int length; 7587dd7cddfSDavid du Colombier char *mime; 7593e12c5d1SDavid du Colombier } file_string[] = 7603e12c5d1SDavid du Colombier { 7617dd7cddfSDavid du Colombier "!<arch>\n__.SYMDEF", "archive random library", 16, "application/octet-stream", 7627dd7cddfSDavid du Colombier "!<arch>\n", "archive", 8, "application/octet-stream", 7637dd7cddfSDavid du Colombier "070707", "cpio archive - ascii header", 6, "application/octet-stream", 7647dd7cddfSDavid du Colombier "#!/bin/rc", "rc executable file", 9, "text/plain", 7657dd7cddfSDavid du Colombier "#!/bin/sh", "sh executable file", 9, "text/plain", 7667dd7cddfSDavid du Colombier "%!", "postscript", 2, "application/postscript", 7677dd7cddfSDavid du Colombier "\004%!", "postscript", 3, "application/postscript", 7687dd7cddfSDavid du Colombier "x T post", "troff output for post", 8, "application/troff", 7697dd7cddfSDavid du Colombier "x T Latin1", "troff output for Latin1", 10, "application/troff", 7707dd7cddfSDavid du Colombier "x T utf", "troff output for UTF", 7, "application/troff", 7717dd7cddfSDavid du Colombier "x T 202", "troff output for 202", 7, "application/troff", 7727dd7cddfSDavid du Colombier "x T aps", "troff output for aps", 7, "application/troff", 7737dd7cddfSDavid du Colombier "GIF", "GIF image", 3, "image/gif", 7747dd7cddfSDavid du Colombier "\0PC Research, Inc\0", "ghostscript fax file", 18, "application/ghostscript", 77559cc4ca5SDavid du Colombier "%PDF", "PDF", 4, "application/pdf", 7767dd7cddfSDavid du Colombier "<html>\n", "HTML file", 7, "text/html", 7777dd7cddfSDavid du Colombier "<HTML>\n", "HTML file", 7, "text/html", 7787dd7cddfSDavid du Colombier "compressed\n", "Compressed image or subfont", 11, "application/octet-stream", 7797dd7cddfSDavid du Colombier "\111\111\052\000", "tiff", 4, "image/tiff", 7807dd7cddfSDavid du Colombier "\115\115\000\052", "tiff", 4, "image/tiff", 7817dd7cddfSDavid du Colombier "\377\330\377\340", "jpeg", 4, "image/jpeg", 7827dd7cddfSDavid du Colombier "\377\330\377\341", "jpeg", 4, "image/jpeg", 7837dd7cddfSDavid du Colombier "\377\330\377\333", "jpeg", 4, "image/jpeg", 784da51d93aSDavid du Colombier "BM", "bmp", 2, "image/bmp", 7857dd7cddfSDavid du Colombier "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/octet-stream", 786fb7f0c93SDavid du Colombier "<MakerFile ", "FrameMaker file", 11, "application/framemaker", 787fb7f0c93SDavid du Colombier "\033%-12345X", "HPJCL file", 9, "application/hpjcl", 788ddb951e3SDavid du Colombier "ID3", "mp3 audio with id3", 3, "audio/mpeg", 7897989f6fbSDavid du Colombier "\211PNG", "PNG image", 4, "image/png", 7900c547597SDavid du Colombier "P3\n", "ppm", 3, "image/ppm", 7910c547597SDavid du Colombier "P6\n", "ppm", 3, "image/ppm", 7920c547597SDavid du Colombier "/* XPM */\n", "xbm", 10, "image/xbm", 793*7c70c028SDavid du Colombier ".HTML ", "troff -ms input", 6, "text/troff", 794*7c70c028SDavid du Colombier ".LP", "troff -ms input", 3, "text/troff", 795*7c70c028SDavid du Colombier ".ND", "troff -ms input", 3, "text/troff", 796*7c70c028SDavid du Colombier ".PP", "troff -ms input", 3, "text/troff", 797*7c70c028SDavid du Colombier ".TL", "troff -ms input", 3, "text/troff", 798*7c70c028SDavid du Colombier ".TR", "troff -ms input", 3, "text/troff", 799*7c70c028SDavid du Colombier ".TH", "manual page", 3, "text/troff", 800*7c70c028SDavid du Colombier ".\\\"", "troff input", 3, "text/troff", 801*7c70c028SDavid du Colombier ".de", "troff input", 3, "text/troff", 802*7c70c028SDavid du Colombier ".if", "troff input", 3, "text/troff", 803*7c70c028SDavid du Colombier ".nr", "troff input", 3, "text/troff", 804*7c70c028SDavid du Colombier ".tr", "troff input", 3, "text/troff", 80519a27a12SDavid du Colombier "vac:", "venti score", 4, "text/plain", 8067dd7cddfSDavid du Colombier 0,0,0,0 8073e12c5d1SDavid du Colombier }; 8083e12c5d1SDavid du Colombier 8093e12c5d1SDavid du Colombier int 8103e12c5d1SDavid du Colombier istring(void) 8113e12c5d1SDavid du Colombier { 8123e12c5d1SDavid du Colombier int i; 8133e12c5d1SDavid du Colombier struct FILE_STRING *p; 8143e12c5d1SDavid du Colombier 8153e12c5d1SDavid du Colombier for(p = file_string; p->key; p++) { 816219b2ee8SDavid du Colombier if(nbuf >= p->length && !memcmp(buf, p->key, p->length)) { 8177dd7cddfSDavid du Colombier if(mime) 8187dd7cddfSDavid du Colombier print("%s\n", p->mime); 8197dd7cddfSDavid du Colombier else 8203e12c5d1SDavid du Colombier print("%s\n", p->filetype); 8213e12c5d1SDavid du Colombier return 1; 8223e12c5d1SDavid du Colombier } 8233e12c5d1SDavid du Colombier } 8243e12c5d1SDavid du Colombier if(strncmp((char*)buf, "TYPE=", 5) == 0) { /* td */ 8253e12c5d1SDavid du Colombier for(i = 5; i < nbuf; i++) 8263e12c5d1SDavid du Colombier if(buf[i] == '\n') 8273e12c5d1SDavid du Colombier break; 8287dd7cddfSDavid du Colombier if(mime) 8297dd7cddfSDavid du Colombier print(OCTET); 8307dd7cddfSDavid du Colombier else 83159cc4ca5SDavid du Colombier print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5); 8323e12c5d1SDavid du Colombier return 1; 8333e12c5d1SDavid du Colombier } 8343e12c5d1SDavid du Colombier return 0; 8353e12c5d1SDavid du Colombier } 8363e12c5d1SDavid du Colombier 8373306492aSDavid du Colombier struct offstr 8383306492aSDavid du Colombier { 8393306492aSDavid du Colombier ulong off; 8403306492aSDavid du Colombier struct FILE_STRING; 8413306492aSDavid du Colombier } offstrs[] = { 8423306492aSDavid du Colombier 32*1024, "\001CD001\001", "ISO9660 CD image", 7, OCTET, 8433306492aSDavid du Colombier 0, 0, 0, 0, 0 8443306492aSDavid du Colombier }; 8453306492aSDavid du Colombier 8463306492aSDavid du Colombier int 8473306492aSDavid du Colombier isoffstr(void) 8483306492aSDavid du Colombier { 8493306492aSDavid du Colombier int n; 8503306492aSDavid du Colombier char buf[256]; 8513306492aSDavid du Colombier struct offstr *p; 8523306492aSDavid du Colombier 8533306492aSDavid du Colombier for(p = offstrs; p->key; p++) { 8543306492aSDavid du Colombier seek(fd, p->off, 0); 8553306492aSDavid du Colombier n = p->length; 8563306492aSDavid du Colombier if (n > sizeof buf) 8573306492aSDavid du Colombier n = sizeof buf; 8583306492aSDavid du Colombier if (read(fd, buf, n) != n) 8593306492aSDavid du Colombier continue; 8603306492aSDavid du Colombier if(memcmp(buf, p->key, n) == 0) { 8613306492aSDavid du Colombier if(mime) 8623306492aSDavid du Colombier print("%s\n", p->mime); 8633306492aSDavid du Colombier else 8643306492aSDavid du Colombier print("%s\n", p->filetype); 8653306492aSDavid du Colombier return 1; 8663306492aSDavid du Colombier } 8673306492aSDavid du Colombier } 8683306492aSDavid du Colombier return 0; 8693306492aSDavid du Colombier } 8703306492aSDavid du Colombier 871ddb951e3SDavid du Colombier int 872ddb951e3SDavid du Colombier iff(void) 873ddb951e3SDavid du Colombier { 874ddb951e3SDavid du Colombier if (strncmp((char*)buf, "FORM", 4) == 0 && 875ddb951e3SDavid du Colombier strncmp((char*)buf+8, "AIFF", 4) == 0) { 876ddb951e3SDavid du Colombier print("%s\n", mime? "audio/x-aiff": "aiff audio"); 877ddb951e3SDavid du Colombier return 1; 878ddb951e3SDavid du Colombier } 879ddb951e3SDavid du Colombier return 0; 880ddb951e3SDavid du Colombier } 881ddb951e3SDavid du Colombier 8827dd7cddfSDavid du Colombier char* html_string[] = 8837dd7cddfSDavid du Colombier { 8847dd7cddfSDavid du Colombier "title", 8857dd7cddfSDavid du Colombier "body", 8867dd7cddfSDavid du Colombier "head", 8877dd7cddfSDavid du Colombier "strong", 8887dd7cddfSDavid du Colombier "h1", 8897dd7cddfSDavid du Colombier "h2", 8907dd7cddfSDavid du Colombier "h3", 8917dd7cddfSDavid du Colombier "h4", 8927dd7cddfSDavid du Colombier "h5", 8937dd7cddfSDavid du Colombier "h6", 8947dd7cddfSDavid du Colombier "ul", 8957dd7cddfSDavid du Colombier "li", 8967dd7cddfSDavid du Colombier "dl", 8977dd7cddfSDavid du Colombier "br", 8987dd7cddfSDavid du Colombier "em", 8997dd7cddfSDavid du Colombier 0, 9007dd7cddfSDavid du Colombier }; 9017dd7cddfSDavid du Colombier 9027dd7cddfSDavid du Colombier int 9037dd7cddfSDavid du Colombier ishtml(void) 9047dd7cddfSDavid du Colombier { 9057dd7cddfSDavid du Colombier uchar *p, *q; 9067dd7cddfSDavid du Colombier int i, count; 9077dd7cddfSDavid du Colombier 9087dd7cddfSDavid du Colombier /* compare strings between '<' and '>' to html table */ 9097dd7cddfSDavid du Colombier count = 0; 9107dd7cddfSDavid du Colombier p = buf; 9117dd7cddfSDavid du Colombier for(;;) { 9127dd7cddfSDavid du Colombier while (p < buf+nbuf && *p != '<') 9137dd7cddfSDavid du Colombier p++; 9147dd7cddfSDavid du Colombier p++; 9157dd7cddfSDavid du Colombier if (p >= buf+nbuf) 9167dd7cddfSDavid du Colombier break; 9177dd7cddfSDavid du Colombier if(*p == '/') 9187dd7cddfSDavid du Colombier p++; 9197dd7cddfSDavid du Colombier q = p; 9207dd7cddfSDavid du Colombier while(p < buf+nbuf && *p != '>') 9217dd7cddfSDavid du Colombier p++; 9227dd7cddfSDavid du Colombier if (p >= buf+nbuf) 9237dd7cddfSDavid du Colombier break; 9247dd7cddfSDavid du Colombier for(i = 0; html_string[i]; i++) { 9257dd7cddfSDavid du Colombier if(cistrncmp(html_string[i], (char*)q, p-q) == 0) { 9267dd7cddfSDavid du Colombier if(count++ > 4) { 9277dd7cddfSDavid du Colombier print(mime ? "text/html\n" : "HTML file\n"); 9287dd7cddfSDavid du Colombier return 1; 9297dd7cddfSDavid du Colombier } 9307dd7cddfSDavid du Colombier break; 9317dd7cddfSDavid du Colombier } 9327dd7cddfSDavid du Colombier } 9337dd7cddfSDavid du Colombier p++; 9347dd7cddfSDavid du Colombier } 9357dd7cddfSDavid du Colombier return 0; 9367dd7cddfSDavid du Colombier } 9377dd7cddfSDavid du Colombier 9389a747e4fSDavid du Colombier char* rfc822_string[] = 9397dd7cddfSDavid du Colombier { 9409a747e4fSDavid du Colombier "from:", 9419a747e4fSDavid du Colombier "date:", 9429a747e4fSDavid du Colombier "to:", 9439a747e4fSDavid du Colombier "subject:", 9449a747e4fSDavid du Colombier "received:", 945d9306527SDavid du Colombier "reply to:", 946d9306527SDavid du Colombier "sender:", 9479a747e4fSDavid du Colombier 0, 9489a747e4fSDavid du Colombier }; 9497dd7cddfSDavid du Colombier 9509a747e4fSDavid du Colombier int 9519a747e4fSDavid du Colombier isrfc822(void) 9529a747e4fSDavid du Colombier { 9539a747e4fSDavid du Colombier 9549a747e4fSDavid du Colombier char *p, *q, *r; 9559a747e4fSDavid du Colombier int i, count; 9569a747e4fSDavid du Colombier 9579a747e4fSDavid du Colombier count = 0; 9589a747e4fSDavid du Colombier p = (char*)buf; 9599a747e4fSDavid du Colombier for(;;) { 9609a747e4fSDavid du Colombier q = strchr(p, '\n'); 9619a747e4fSDavid du Colombier if(q == nil) 9627dd7cddfSDavid du Colombier break; 963d9306527SDavid du Colombier *q = 0; 964d9306527SDavid du Colombier if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){ 965d9306527SDavid du Colombier count++; 966d9306527SDavid du Colombier *q = '\n'; 967d9306527SDavid du Colombier p = q+1; 968d9306527SDavid du Colombier continue; 969d9306527SDavid du Colombier } 970d9306527SDavid du Colombier *q = '\n'; 9719a747e4fSDavid du Colombier if(*p != '\t' && *p != ' '){ 9729a747e4fSDavid du Colombier r = strchr(p, ':'); 9739a747e4fSDavid du Colombier if(r == 0 || r > q) 9749a747e4fSDavid du Colombier break; 9759a747e4fSDavid du Colombier for(i = 0; rfc822_string[i]; i++) { 9769a747e4fSDavid du Colombier if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){ 9779a747e4fSDavid du Colombier count++; 9789a747e4fSDavid du Colombier break; 9797dd7cddfSDavid du Colombier } 9809a747e4fSDavid du Colombier } 9819a747e4fSDavid du Colombier } 9829a747e4fSDavid du Colombier p = q+1; 9839a747e4fSDavid du Colombier } 9849a747e4fSDavid du Colombier if(count >= 3){ 9859a747e4fSDavid du Colombier print(mime ? "message/rfc822\n" : "email file\n"); 9867dd7cddfSDavid du Colombier return 1; 9877dd7cddfSDavid du Colombier } 9889a747e4fSDavid du Colombier return 0; 9899a747e4fSDavid du Colombier } 9907dd7cddfSDavid du Colombier 9913e12c5d1SDavid du Colombier int 992d9306527SDavid du Colombier ismbox(void) 993d9306527SDavid du Colombier { 994d9306527SDavid du Colombier char *p, *q; 995d9306527SDavid du Colombier 996d9306527SDavid du Colombier p = (char*)buf; 997d9306527SDavid du Colombier q = strchr(p, '\n'); 998d9306527SDavid du Colombier if(q == nil) 999d9306527SDavid du Colombier return 0; 1000d9306527SDavid du Colombier *q = 0; 1001d9306527SDavid du Colombier if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){ 1002d9306527SDavid du Colombier print(mime ? "text/plain\n" : "mail box\n"); 1003d9306527SDavid du Colombier return 1; 1004d9306527SDavid du Colombier } 1005d9306527SDavid du Colombier *q = '\n'; 1006d9306527SDavid du Colombier return 0; 1007d9306527SDavid du Colombier } 1008d9306527SDavid du Colombier 1009d9306527SDavid du Colombier int 10103e12c5d1SDavid du Colombier iscint(void) 10113e12c5d1SDavid du Colombier { 1012219b2ee8SDavid du Colombier int type; 1013219b2ee8SDavid du Colombier char *name; 1014219b2ee8SDavid du Colombier Biobuf b; 10153e12c5d1SDavid du Colombier 1016219b2ee8SDavid du Colombier if(Binit(&b, fd, OREAD) == Beof) 10173e12c5d1SDavid du Colombier return 0; 1018219b2ee8SDavid du Colombier seek(fd, 0, 0); 1019219b2ee8SDavid du Colombier type = objtype(&b, &name); 1020219b2ee8SDavid du Colombier if(type < 0) 1021219b2ee8SDavid du Colombier return 0; 10227dd7cddfSDavid du Colombier if(mime) 10237dd7cddfSDavid du Colombier print(OCTET); 10247dd7cddfSDavid du Colombier else 1025219b2ee8SDavid du Colombier print("%s intermediate\n", name); 1026219b2ee8SDavid du Colombier return 1; 10273e12c5d1SDavid du Colombier } 10283e12c5d1SDavid du Colombier 10293e12c5d1SDavid du Colombier int 10303e12c5d1SDavid du Colombier isc(void) 10313e12c5d1SDavid du Colombier { 10323e12c5d1SDavid du Colombier int n; 10333e12c5d1SDavid du Colombier 10343e12c5d1SDavid du Colombier n = wfreq[I1]; 10353e12c5d1SDavid du Colombier /* 10363e12c5d1SDavid du Colombier * includes 10373e12c5d1SDavid du Colombier */ 10383e12c5d1SDavid du Colombier if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n) 10393e12c5d1SDavid du Colombier goto yes; 1040219b2ee8SDavid du Colombier if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n) 1041219b2ee8SDavid du Colombier goto yes; 10423e12c5d1SDavid du Colombier /* 10433e12c5d1SDavid du Colombier * declarations 10443e12c5d1SDavid du Colombier */ 10453e12c5d1SDavid du Colombier if(wfreq[Cword] >= 5 && cfreq[';'] >= 5) 10463e12c5d1SDavid du Colombier goto yes; 10473e12c5d1SDavid du Colombier /* 10483e12c5d1SDavid du Colombier * assignments 10493e12c5d1SDavid du Colombier */ 10503e12c5d1SDavid du Colombier if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1) 10513e12c5d1SDavid du Colombier goto yes; 10523e12c5d1SDavid du Colombier return 0; 10533e12c5d1SDavid du Colombier 10543e12c5d1SDavid du Colombier yes: 10557dd7cddfSDavid du Colombier if(mime){ 10567dd7cddfSDavid du Colombier print(PLAIN); 10577dd7cddfSDavid du Colombier return 1; 10587dd7cddfSDavid du Colombier } 1059219b2ee8SDavid du Colombier if(wfreq[Alword] > 0) 1060219b2ee8SDavid du Colombier print("alef program\n"); 1061219b2ee8SDavid du Colombier else 10623e12c5d1SDavid du Colombier print("c program\n"); 10633e12c5d1SDavid du Colombier return 1; 10643e12c5d1SDavid du Colombier } 10653e12c5d1SDavid du Colombier 10663e12c5d1SDavid du Colombier int 10677dd7cddfSDavid du Colombier islimbo(void) 10687dd7cddfSDavid du Colombier { 10697dd7cddfSDavid du Colombier 10707dd7cddfSDavid du Colombier /* 10717dd7cddfSDavid du Colombier * includes 10727dd7cddfSDavid du Colombier */ 10737dd7cddfSDavid du Colombier if(wfreq[Lword] < 4) 10747dd7cddfSDavid du Colombier return 0; 10757dd7cddfSDavid du Colombier print(mime ? PLAIN : "limbo program\n"); 10767dd7cddfSDavid du Colombier return 1; 10777dd7cddfSDavid du Colombier } 10787dd7cddfSDavid du Colombier 10797dd7cddfSDavid du Colombier int 10803e12c5d1SDavid du Colombier isas(void) 10813e12c5d1SDavid du Colombier { 10823e12c5d1SDavid du Colombier 10833e12c5d1SDavid du Colombier /* 10843e12c5d1SDavid du Colombier * includes 10853e12c5d1SDavid du Colombier */ 10863e12c5d1SDavid du Colombier if(wfreq[Aword] < 2) 10873e12c5d1SDavid du Colombier return 0; 10887dd7cddfSDavid du Colombier print(mime ? PLAIN : "as program\n"); 10893e12c5d1SDavid du Colombier return 1; 10903e12c5d1SDavid du Colombier } 10913e12c5d1SDavid du Colombier 10923e12c5d1SDavid du Colombier /* 10933e12c5d1SDavid du Colombier * low entropy means encrypted 10943e12c5d1SDavid du Colombier */ 10953e12c5d1SDavid du Colombier int 10963e12c5d1SDavid du Colombier ismung(void) 10973e12c5d1SDavid du Colombier { 10983e12c5d1SDavid du Colombier int i, bucket[8]; 10993e12c5d1SDavid du Colombier float cs; 11003e12c5d1SDavid du Colombier 11013e12c5d1SDavid du Colombier if(nbuf < 64) 11023e12c5d1SDavid du Colombier return 0; 11033e12c5d1SDavid du Colombier memset(bucket, 0, sizeof(bucket)); 110490630c3aSDavid du Colombier for(i=nbuf-64; i<nbuf; i++) 11053e12c5d1SDavid du Colombier bucket[(buf[i]>>5)&07] += 1; 11063e12c5d1SDavid du Colombier 11073e12c5d1SDavid du Colombier cs = 0.; 11083e12c5d1SDavid du Colombier for(i=0; i<8; i++) 11093e12c5d1SDavid du Colombier cs += (bucket[i]-8)*(bucket[i]-8); 11103e12c5d1SDavid du Colombier cs /= 8.; 11113e12c5d1SDavid du Colombier if(cs <= 24.322) { 111290630c3aSDavid du Colombier if(buf[0]==0x1f && buf[1]==0x9d) 11137dd7cddfSDavid du Colombier print(mime ? OCTET : "compressed\n"); 11143e12c5d1SDavid du Colombier else 111590630c3aSDavid du Colombier if(buf[0]==0x1f && buf[1]==0x8b) 111690630c3aSDavid du Colombier print(mime ? OCTET : "gzip compressed\n"); 111790630c3aSDavid du Colombier else 111890630c3aSDavid du Colombier if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h') 111990630c3aSDavid du Colombier print(mime ? OCTET : "bzip2 compressed\n"); 112090630c3aSDavid du Colombier else 11217dd7cddfSDavid du Colombier print(mime ? OCTET : "encrypted\n"); 11223e12c5d1SDavid du Colombier return 1; 11233e12c5d1SDavid du Colombier } 11243e12c5d1SDavid du Colombier return 0; 11253e12c5d1SDavid du Colombier } 11263e12c5d1SDavid du Colombier 11273e12c5d1SDavid du Colombier /* 11283e12c5d1SDavid du Colombier * english by punctuation and frequencies 11293e12c5d1SDavid du Colombier */ 11303e12c5d1SDavid du Colombier int 11313e12c5d1SDavid du Colombier isenglish(void) 11323e12c5d1SDavid du Colombier { 11333e12c5d1SDavid du Colombier int vow, comm, rare, badpun, punct; 11343e12c5d1SDavid du Colombier char *p; 11353e12c5d1SDavid du Colombier 11363e12c5d1SDavid du Colombier if(guess != Fascii && guess != Feascii) 11373e12c5d1SDavid du Colombier return 0; 11383e12c5d1SDavid du Colombier badpun = 0; 11393e12c5d1SDavid du Colombier punct = 0; 11403e12c5d1SDavid du Colombier for(p = (char *)buf; p < (char *)buf+nbuf-1; p++) 11413e12c5d1SDavid du Colombier switch(*p) { 11423e12c5d1SDavid du Colombier case '.': 11433e12c5d1SDavid du Colombier case ',': 11443e12c5d1SDavid du Colombier case ')': 11453e12c5d1SDavid du Colombier case '%': 11463e12c5d1SDavid du Colombier case ';': 11473e12c5d1SDavid du Colombier case ':': 11483e12c5d1SDavid du Colombier case '?': 11493e12c5d1SDavid du Colombier punct++; 11503e12c5d1SDavid du Colombier if(p[1] != ' ' && p[1] != '\n') 11513e12c5d1SDavid du Colombier badpun++; 11523e12c5d1SDavid du Colombier } 11533e12c5d1SDavid du Colombier if(badpun*5 > punct) 11543e12c5d1SDavid du Colombier return 0; 11553e12c5d1SDavid du Colombier if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e']) /* shell file test */ 11563e12c5d1SDavid du Colombier return 0; 11573e12c5d1SDavid du Colombier if(2*cfreq[';'] > cfreq['e']) 11583e12c5d1SDavid du Colombier return 0; 11593e12c5d1SDavid du Colombier 11603e12c5d1SDavid du Colombier vow = 0; 11613e12c5d1SDavid du Colombier for(p="AEIOU"; *p; p++) { 11623e12c5d1SDavid du Colombier vow += cfreq[*p]; 11633e12c5d1SDavid du Colombier vow += cfreq[tolower(*p)]; 11643e12c5d1SDavid du Colombier } 11653e12c5d1SDavid du Colombier comm = 0; 11663e12c5d1SDavid du Colombier for(p="ETAION"; *p; p++) { 11673e12c5d1SDavid du Colombier comm += cfreq[*p]; 11683e12c5d1SDavid du Colombier comm += cfreq[tolower(*p)]; 11693e12c5d1SDavid du Colombier } 11703e12c5d1SDavid du Colombier rare = 0; 11713e12c5d1SDavid du Colombier for(p="VJKQXZ"; *p; p++) { 11723e12c5d1SDavid du Colombier rare += cfreq[*p]; 11733e12c5d1SDavid du Colombier rare += cfreq[tolower(*p)]; 11743e12c5d1SDavid du Colombier } 11753e12c5d1SDavid du Colombier if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) { 11767dd7cddfSDavid du Colombier print(mime ? PLAIN : "English text\n"); 11773e12c5d1SDavid du Colombier return 1; 11783e12c5d1SDavid du Colombier } 11793e12c5d1SDavid du Colombier return 0; 11803e12c5d1SDavid du Colombier } 11813e12c5d1SDavid du Colombier 11823e12c5d1SDavid du Colombier /* 11833e12c5d1SDavid du Colombier * pick up a number with 11843e12c5d1SDavid du Colombier * syntax _*[0-9]+_ 11853e12c5d1SDavid du Colombier */ 11863e12c5d1SDavid du Colombier #define P9BITLEN 12 11873e12c5d1SDavid du Colombier int 11883e12c5d1SDavid du Colombier p9bitnum(uchar *bp) 11893e12c5d1SDavid du Colombier { 11903e12c5d1SDavid du Colombier int n, c, len; 11913e12c5d1SDavid du Colombier 11923e12c5d1SDavid du Colombier len = P9BITLEN; 11933e12c5d1SDavid du Colombier while(*bp == ' ') { 11943e12c5d1SDavid du Colombier bp++; 11953e12c5d1SDavid du Colombier len--; 11963e12c5d1SDavid du Colombier if(len <= 0) 11973e12c5d1SDavid du Colombier return -1; 11983e12c5d1SDavid du Colombier } 11993e12c5d1SDavid du Colombier n = 0; 12003e12c5d1SDavid du Colombier while(len > 1) { 12013e12c5d1SDavid du Colombier c = *bp++; 12023e12c5d1SDavid du Colombier if(!isdigit(c)) 12033e12c5d1SDavid du Colombier return -1; 12043e12c5d1SDavid du Colombier n = n*10 + c-'0'; 12053e12c5d1SDavid du Colombier len--; 12063e12c5d1SDavid du Colombier } 12073e12c5d1SDavid du Colombier if(*bp != ' ') 12083e12c5d1SDavid du Colombier return -1; 12093e12c5d1SDavid du Colombier return n; 12103e12c5d1SDavid du Colombier } 12113e12c5d1SDavid du Colombier 12123e12c5d1SDavid du Colombier int 12137dd7cddfSDavid du Colombier depthof(char *s, int *newp) 12147dd7cddfSDavid du Colombier { 12157dd7cddfSDavid du Colombier char *es; 12167dd7cddfSDavid du Colombier int d; 12177dd7cddfSDavid du Colombier 12187dd7cddfSDavid du Colombier *newp = 0; 12197dd7cddfSDavid du Colombier es = s+12; 12207dd7cddfSDavid du Colombier while(s<es && *s==' ') 12217dd7cddfSDavid du Colombier s++; 12227dd7cddfSDavid du Colombier if(s == es) 12237dd7cddfSDavid du Colombier return -1; 12247dd7cddfSDavid du Colombier if('0'<=*s && *s<='9') 122516941224SDavid du Colombier return 1<<strtol(s, 0, 0); 12267dd7cddfSDavid du Colombier 12277dd7cddfSDavid du Colombier *newp = 1; 12287dd7cddfSDavid du Colombier d = 0; 12297dd7cddfSDavid du Colombier while(s<es && *s!=' '){ 12307dd7cddfSDavid du Colombier s++; /* skip letter */ 12317dd7cddfSDavid du Colombier d += strtoul(s, &s, 10); 12327dd7cddfSDavid du Colombier } 12337dd7cddfSDavid du Colombier 12347dd7cddfSDavid du Colombier switch(d){ 12357dd7cddfSDavid du Colombier case 32: 12367dd7cddfSDavid du Colombier case 24: 12377dd7cddfSDavid du Colombier case 16: 12387dd7cddfSDavid du Colombier case 8: 12397dd7cddfSDavid du Colombier return d; 12407dd7cddfSDavid du Colombier } 12417dd7cddfSDavid du Colombier return -1; 12427dd7cddfSDavid du Colombier } 12437dd7cddfSDavid du Colombier 12447dd7cddfSDavid du Colombier int 12453e12c5d1SDavid du Colombier isp9bit(void) 12463e12c5d1SDavid du Colombier { 12477dd7cddfSDavid du Colombier int dep, lox, loy, hix, hiy, px, new; 1248219b2ee8SDavid du Colombier ulong t; 12493e12c5d1SDavid du Colombier long len; 12507dd7cddfSDavid du Colombier char *newlabel; 12513e12c5d1SDavid du Colombier 12527dd7cddfSDavid du Colombier newlabel = "old "; 12537dd7cddfSDavid du Colombier 12547dd7cddfSDavid du Colombier dep = depthof((char*)buf + 0*P9BITLEN, &new); 12557dd7cddfSDavid du Colombier if(new) 12567dd7cddfSDavid du Colombier newlabel = ""; 12573e12c5d1SDavid du Colombier lox = p9bitnum(buf + 1*P9BITLEN); 12583e12c5d1SDavid du Colombier loy = p9bitnum(buf + 2*P9BITLEN); 12593e12c5d1SDavid du Colombier hix = p9bitnum(buf + 3*P9BITLEN); 12603e12c5d1SDavid du Colombier hiy = p9bitnum(buf + 4*P9BITLEN); 12617dd7cddfSDavid du Colombier if(dep < 0 || lox < 0 || loy < 0 || hix < 0 || hiy < 0) 12623e12c5d1SDavid du Colombier return 0; 12633e12c5d1SDavid du Colombier 12647dd7cddfSDavid du Colombier if(dep < 8){ 12657dd7cddfSDavid du Colombier px = 8/dep; /* pixels per byte */ 1266219b2ee8SDavid du Colombier /* set l to number of bytes of data per scan line */ 1267219b2ee8SDavid du Colombier if(lox >= 0) 1268219b2ee8SDavid du Colombier len = (hix+px-1)/px - lox/px; 1269219b2ee8SDavid du Colombier else{ /* make positive before divide */ 1270219b2ee8SDavid du Colombier t = (-lox)+px-1; 1271219b2ee8SDavid du Colombier t = (t/px)*px; 1272219b2ee8SDavid du Colombier len = (t+hix+px-1)/px; 1273219b2ee8SDavid du Colombier } 12747dd7cddfSDavid du Colombier }else 12757dd7cddfSDavid du Colombier len = (hix-lox)*dep/8; 12763e12c5d1SDavid du Colombier len *= (hiy-loy); /* col length */ 12773e12c5d1SDavid du Colombier len += 5*P9BITLEN; /* size of initial ascii */ 12783e12c5d1SDavid du Colombier 12793e12c5d1SDavid du Colombier /* 12807dd7cddfSDavid du Colombier * for image file, length is non-zero and must match calculation above 12813e12c5d1SDavid du Colombier * for /dev/window and /dev/screen the length is always zero 12823e12c5d1SDavid du Colombier * for subfont, the subfont header should follow immediately. 12833e12c5d1SDavid du Colombier */ 12849a747e4fSDavid du Colombier if (len != 0 && mbuf->length == 0) { 12857dd7cddfSDavid du Colombier print("%splan 9 image\n", newlabel); 12867dd7cddfSDavid du Colombier return 1; 12877dd7cddfSDavid du Colombier } 12889a747e4fSDavid du Colombier if (mbuf->length == len) { 12897dd7cddfSDavid du Colombier print("%splan 9 image\n", newlabel); 12907dd7cddfSDavid du Colombier return 1; 12917dd7cddfSDavid du Colombier } 12927dd7cddfSDavid du Colombier /* Ghostscript sometimes produces a little extra on the end */ 12939a747e4fSDavid du Colombier if (mbuf->length < len+P9BITLEN) { 12947dd7cddfSDavid du Colombier print("%splan 9 image\n", newlabel); 12953e12c5d1SDavid du Colombier return 1; 12963e12c5d1SDavid du Colombier } 12973e12c5d1SDavid du Colombier if (p9subfont(buf+len)) { 12987dd7cddfSDavid du Colombier print("%ssubfont file\n", newlabel); 12993e12c5d1SDavid du Colombier return 1; 13003e12c5d1SDavid du Colombier } 13013e12c5d1SDavid du Colombier return 0; 13023e12c5d1SDavid du Colombier } 13033e12c5d1SDavid du Colombier 13043e12c5d1SDavid du Colombier int 13053e12c5d1SDavid du Colombier p9subfont(uchar *p) 13063e12c5d1SDavid du Colombier { 13073e12c5d1SDavid du Colombier int n, h, a; 13083e12c5d1SDavid du Colombier 13097dd7cddfSDavid du Colombier /* if image too big, assume it's a subfont */ 13103e12c5d1SDavid du Colombier if (p+3*P9BITLEN > buf+sizeof(buf)) 13113e12c5d1SDavid du Colombier return 1; 13123e12c5d1SDavid du Colombier 13133e12c5d1SDavid du Colombier n = p9bitnum(p + 0*P9BITLEN); /* char count */ 13143e12c5d1SDavid du Colombier if (n < 0) 13153e12c5d1SDavid du Colombier return 0; 13163e12c5d1SDavid du Colombier h = p9bitnum(p + 1*P9BITLEN); /* height */ 13173e12c5d1SDavid du Colombier if (h < 0) 13183e12c5d1SDavid du Colombier return 0; 13193e12c5d1SDavid du Colombier a = p9bitnum(p + 2*P9BITLEN); /* ascent */ 13203e12c5d1SDavid du Colombier if (a < 0) 13213e12c5d1SDavid du Colombier return 0; 13223e12c5d1SDavid du Colombier return 1; 13233e12c5d1SDavid du Colombier } 13243e12c5d1SDavid du Colombier 13253e12c5d1SDavid du Colombier #define WHITESPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') 13263e12c5d1SDavid du Colombier 13273e12c5d1SDavid du Colombier int 13283e12c5d1SDavid du Colombier isp9font(void) 13293e12c5d1SDavid du Colombier { 13303e12c5d1SDavid du Colombier uchar *cp, *p; 13313e12c5d1SDavid du Colombier int i, n; 13323e12c5d1SDavid du Colombier char pathname[1024]; 13333e12c5d1SDavid du Colombier 13343e12c5d1SDavid du Colombier cp = buf; 13353e12c5d1SDavid du Colombier if (!getfontnum(cp, &cp)) /* height */ 13363e12c5d1SDavid du Colombier return 0; 13373e12c5d1SDavid du Colombier if (!getfontnum(cp, &cp)) /* ascent */ 13383e12c5d1SDavid du Colombier return 0; 13395e492409SDavid du Colombier for (i = 0; cp=(uchar*)strchr((char*)cp, '\n'); i++) { 13403e12c5d1SDavid du Colombier if (!getfontnum(cp, &cp)) /* min */ 13413e12c5d1SDavid du Colombier break; 13423e12c5d1SDavid du Colombier if (!getfontnum(cp, &cp)) /* max */ 13433e12c5d1SDavid du Colombier return 0; 13445e492409SDavid du Colombier getfontnum(cp, &cp); /* optional offset */ 13453e12c5d1SDavid du Colombier while (WHITESPACE(*cp)) 13463e12c5d1SDavid du Colombier cp++; 13473e12c5d1SDavid du Colombier for (p = cp; *cp && !WHITESPACE(*cp); cp++) 13483e12c5d1SDavid du Colombier ; 13493e12c5d1SDavid du Colombier /* construct a path name, if needed */ 13503e12c5d1SDavid du Colombier n = 0; 13513e12c5d1SDavid du Colombier if (*p != '/' && slash) { 13523e12c5d1SDavid du Colombier n = slash-fname+1; 13533e12c5d1SDavid du Colombier if (n < sizeof(pathname)) 13543e12c5d1SDavid du Colombier memcpy(pathname, fname, n); 13553e12c5d1SDavid du Colombier else n = 0; 13563e12c5d1SDavid du Colombier } 13575e492409SDavid du Colombier if (n+cp-p+4 < sizeof(pathname)) { 13583e12c5d1SDavid du Colombier memcpy(pathname+n, p, cp-p); 13593e12c5d1SDavid du Colombier n += cp-p; 13603e12c5d1SDavid du Colombier pathname[n] = 0; 13615e492409SDavid du Colombier if (access(pathname, AEXIST) < 0) { 13625e492409SDavid du Colombier strcpy(pathname+n, ".0"); 13639a747e4fSDavid du Colombier if (access(pathname, AEXIST) < 0) 13643e12c5d1SDavid du Colombier return 0; 13653e12c5d1SDavid du Colombier } 13663e12c5d1SDavid du Colombier } 13675e492409SDavid du Colombier } 13683e12c5d1SDavid du Colombier if (i) { 13698d37e088SDavid du Colombier print(mime ? "text/plain\n" : "font file\n"); 13703e12c5d1SDavid du Colombier return 1; 13713e12c5d1SDavid du Colombier } 13723e12c5d1SDavid du Colombier return 0; 13733e12c5d1SDavid du Colombier } 13743e12c5d1SDavid du Colombier 13753e12c5d1SDavid du Colombier int 13763e12c5d1SDavid du Colombier getfontnum(uchar *cp, uchar **rp) 13773e12c5d1SDavid du Colombier { 13783e12c5d1SDavid du Colombier while (WHITESPACE(*cp)) /* extract ulong delimited by whitespace */ 13793e12c5d1SDavid du Colombier cp++; 13803e12c5d1SDavid du Colombier if (*cp < '0' || *cp > '9') 13813e12c5d1SDavid du Colombier return 0; 13823e12c5d1SDavid du Colombier strtoul((char *)cp, (char **)rp, 0); 13835e492409SDavid du Colombier if (!WHITESPACE(**rp)) { 13845e492409SDavid du Colombier *rp = cp; 13853e12c5d1SDavid du Colombier return 0; 13865e492409SDavid du Colombier } 13873e12c5d1SDavid du Colombier return 1; 13883e12c5d1SDavid du Colombier } 13897dd7cddfSDavid du Colombier 13907dd7cddfSDavid du Colombier int 1391fb7f0c93SDavid du Colombier isrtf(void) 13927dd7cddfSDavid du Colombier { 1393fb7f0c93SDavid du Colombier if(strstr((char *)buf, "\\rtf1")){ 1394f2e8132aSDavid du Colombier print(mime ? "application/rtf\n" : "rich text format\n"); 1395f2e8132aSDavid du Colombier return 1; 1396f2e8132aSDavid du Colombier } 1397f2e8132aSDavid du Colombier return 0; 1398f2e8132aSDavid du Colombier } 1399f2e8132aSDavid du Colombier 1400f2e8132aSDavid du Colombier int 1401f2e8132aSDavid du Colombier ismsdos(void) 1402f2e8132aSDavid du Colombier { 1403f2e8132aSDavid du Colombier if (buf[0] == 0x4d && buf[1] == 0x5a){ 1404f2e8132aSDavid du Colombier print(mime ? "application/x-msdownload\n" : "MSDOS executable\n"); 14057dd7cddfSDavid du Colombier return 1; 14067dd7cddfSDavid du Colombier } 14077dd7cddfSDavid du Colombier return 0; 14087dd7cddfSDavid du Colombier } 1409b7327ca2SDavid du Colombier 1410b7327ca2SDavid du Colombier int 1411b7327ca2SDavid du Colombier iself(void) 1412b7327ca2SDavid du Colombier { 1413ee7057f8SDavid du Colombier static char *cpu[] = { /* NB: incomplete and arbitary list */ 1414b7327ca2SDavid du Colombier [1] "WE32100", 1415b7327ca2SDavid du Colombier [2] "SPARC", 1416b7327ca2SDavid du Colombier [3] "i386", 1417b7327ca2SDavid du Colombier [4] "M68000", 1418b7327ca2SDavid du Colombier [5] "M88000", 1419b7327ca2SDavid du Colombier [6] "i486", 1420b7327ca2SDavid du Colombier [7] "i860", 1421b7327ca2SDavid du Colombier [8] "R3000", 1422b7327ca2SDavid du Colombier [9] "S370", 1423b7327ca2SDavid du Colombier [10] "R4000", 1424b7327ca2SDavid du Colombier [15] "HP-PA", 1425b7327ca2SDavid du Colombier [18] "sparc v8+", 1426b7327ca2SDavid du Colombier [19] "i960", 1427b7327ca2SDavid du Colombier [20] "PPC-32", 1428b7327ca2SDavid du Colombier [21] "PPC-64", 1429b7327ca2SDavid du Colombier [40] "ARM", 1430b7327ca2SDavid du Colombier [41] "Alpha", 1431b7327ca2SDavid du Colombier [43] "sparc v9", 1432ea43b5ecSDavid du Colombier [50] "IA-64", 1433f9247424SDavid du Colombier [62] "AMD64", 1434b7327ca2SDavid du Colombier [75] "VAX", 1435b7327ca2SDavid du Colombier }; 1436ee7057f8SDavid du Colombier static char *type[] = { 1437ee7057f8SDavid du Colombier [1] "relocatable object", 1438ee7057f8SDavid du Colombier [2] "executable", 1439ee7057f8SDavid du Colombier [3] "shared library", 1440ee7057f8SDavid du Colombier [4] "core dump", 1441ee7057f8SDavid du Colombier }; 1442b7327ca2SDavid du Colombier 1443b7327ca2SDavid du Colombier if (memcmp(buf, "\x7fELF", 4) == 0){ 1444b7327ca2SDavid du Colombier if (!mime){ 1445b7327ca2SDavid du Colombier int n = (buf[19] << 8) | buf[18]; 14468a2c5ad0SDavid du Colombier char *p = "unknown"; 1447ee7057f8SDavid du Colombier char *t = "unknown"; 14488a2c5ad0SDavid du Colombier 14498a2c5ad0SDavid du Colombier if (n > 0 && n < nelem(cpu) && cpu[n]) 14508a2c5ad0SDavid du Colombier p = cpu[n]; 14518a2c5ad0SDavid du Colombier else { 14528a2c5ad0SDavid du Colombier /* try the other byte order */ 14538a2c5ad0SDavid du Colombier n = (buf[18] << 8) | buf[19]; 14548a2c5ad0SDavid du Colombier if (n > 0 && n < nelem(cpu) && cpu[n]) 14558a2c5ad0SDavid du Colombier p = cpu[n]; 14568a2c5ad0SDavid du Colombier } 1457ee7057f8SDavid du Colombier n = buf[16]; 1458ee7057f8SDavid du Colombier if(n>0 && n < nelem(type) && type[n]) 1459ee7057f8SDavid du Colombier t = type[n]; 1460ee7057f8SDavid du Colombier print("%s ELF %s\n", p, t); 1461b7327ca2SDavid du Colombier } 1462b7327ca2SDavid du Colombier else 1463b7327ca2SDavid du Colombier print("application/x-elf-executable"); 1464b7327ca2SDavid du Colombier return 1; 1465b7327ca2SDavid du Colombier } 1466b7327ca2SDavid du Colombier 1467b7327ca2SDavid du Colombier return 0; 1468b7327ca2SDavid du Colombier } 14690c547597SDavid du Colombier 14700c547597SDavid du Colombier int 14710c547597SDavid du Colombier isface(void) 14720c547597SDavid du Colombier { 14730c547597SDavid du Colombier int i, j, ldepth, l; 14740c547597SDavid du Colombier char *p; 14750c547597SDavid du Colombier 14760c547597SDavid du Colombier ldepth = -1; 14770c547597SDavid du Colombier for(j = 0; j < 3; j++){ 14780c547597SDavid du Colombier for(p = (char*)buf, i=0; i<3; i++){ 14790c547597SDavid du Colombier if(p[0] != '0' || p[1] != 'x') 14800c547597SDavid du Colombier return 0; 14810c547597SDavid du Colombier if(buf[2+8] == ',') 14820c547597SDavid du Colombier l = 2; 14830c547597SDavid du Colombier else if(buf[2+4] == ',') 14840c547597SDavid du Colombier l = 1; 14850c547597SDavid du Colombier else 14860c547597SDavid du Colombier return 0; 14870c547597SDavid du Colombier if(ldepth == -1) 14880c547597SDavid du Colombier ldepth = l; 14890c547597SDavid du Colombier if(l != ldepth) 14900c547597SDavid du Colombier return 0; 14910c547597SDavid du Colombier strtoul(p, &p, 16); 14920c547597SDavid du Colombier if(*p++ != ',') 14930c547597SDavid du Colombier return 0; 14940c547597SDavid du Colombier while(*p == ' ' || *p == '\t') 14950c547597SDavid du Colombier p++; 14960c547597SDavid du Colombier } 14970c547597SDavid du Colombier if (*p++ != '\n') 14980c547597SDavid du Colombier return 0; 14990c547597SDavid du Colombier } 15000c547597SDavid du Colombier 15010c547597SDavid du Colombier if(mime) 15020c547597SDavid du Colombier print("application/x-face\n"); 15030c547597SDavid du Colombier else 15040c547597SDavid du Colombier print("face image depth %d\n", ldepth); 15050c547597SDavid du Colombier return 1; 15060c547597SDavid du Colombier } 15070c547597SDavid du Colombier 1508