1bd389b36SDavid du Colombier #include <u.h> 2bd389b36SDavid du Colombier #include <libc.h> 3bd389b36SDavid du Colombier #include <bio.h> 4bd389b36SDavid du Colombier #include <ctype.h> 5bd389b36SDavid du Colombier #include <mach.h> 63e12c5d1SDavid du Colombier 73e12c5d1SDavid du Colombier /* 83e12c5d1SDavid du Colombier * file - determine type of file 93e12c5d1SDavid du Colombier */ 103e12c5d1SDavid du Colombier #define LENDIAN(p) ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24)) 113e12c5d1SDavid du Colombier 129a747e4fSDavid du Colombier uchar buf[6001]; 133e12c5d1SDavid du Colombier short cfreq[140]; 143e12c5d1SDavid du Colombier short wfreq[50]; 153e12c5d1SDavid du Colombier int nbuf; 169a747e4fSDavid du Colombier Dir* mbuf; 173e12c5d1SDavid du Colombier int fd; 183e12c5d1SDavid du Colombier char *fname; 193e12c5d1SDavid du Colombier char *slash; 203e12c5d1SDavid du Colombier 213e12c5d1SDavid du Colombier enum 223e12c5d1SDavid du Colombier { 233e12c5d1SDavid du Colombier Cword, 243e12c5d1SDavid du Colombier Fword, 253e12c5d1SDavid du Colombier Aword, 26219b2ee8SDavid du Colombier Alword, 277dd7cddfSDavid du Colombier Lword, 283e12c5d1SDavid du Colombier I1, 293e12c5d1SDavid du Colombier I2, 303e12c5d1SDavid du Colombier I3, 313e12c5d1SDavid du Colombier Clatin = 128, 323e12c5d1SDavid du Colombier Cbinary, 333e12c5d1SDavid du Colombier Cnull, 343e12c5d1SDavid du Colombier Ceascii, 353e12c5d1SDavid du Colombier Cutf, 363e12c5d1SDavid du Colombier }; 373e12c5d1SDavid du Colombier struct 383e12c5d1SDavid du Colombier { 393e12c5d1SDavid du Colombier char* word; 403e12c5d1SDavid du Colombier int class; 413e12c5d1SDavid du Colombier } dict[] = 423e12c5d1SDavid du Colombier { 437dd7cddfSDavid du Colombier "PATH", Lword, 44219b2ee8SDavid du Colombier "TEXT", Aword, 45219b2ee8SDavid du Colombier "adt", Alword, 46219b2ee8SDavid du Colombier "aggr", Alword, 47219b2ee8SDavid du Colombier "alef", Alword, 487dd7cddfSDavid du Colombier "array", Lword, 49219b2ee8SDavid du Colombier "block", Fword, 50219b2ee8SDavid du Colombier "char", Cword, 51219b2ee8SDavid du Colombier "common", Fword, 527dd7cddfSDavid du Colombier "con", Lword, 53219b2ee8SDavid du Colombier "data", Fword, 54219b2ee8SDavid du Colombier "dimension", Fword, 55219b2ee8SDavid du Colombier "double", Cword, 56219b2ee8SDavid du Colombier "extern", Cword, 57219b2ee8SDavid du Colombier "bio", I2, 58219b2ee8SDavid du Colombier "float", Cword, 597dd7cddfSDavid du Colombier "fn", Lword, 60219b2ee8SDavid du Colombier "function", Fword, 61219b2ee8SDavid du Colombier "h", I3, 627dd7cddfSDavid du Colombier "implement", Lword, 637dd7cddfSDavid du Colombier "import", Lword, 64219b2ee8SDavid du Colombier "include", I1, 65219b2ee8SDavid du Colombier "int", Cword, 66219b2ee8SDavid du Colombier "integer", Fword, 677dd7cddfSDavid du Colombier "iota", Lword, 68219b2ee8SDavid du Colombier "libc", I2, 69219b2ee8SDavid du Colombier "long", Cword, 707dd7cddfSDavid du Colombier "module", Lword, 71219b2ee8SDavid du Colombier "real", Fword, 727dd7cddfSDavid du Colombier "ref", Lword, 73219b2ee8SDavid du Colombier "register", Cword, 747dd7cddfSDavid du Colombier "self", Lword, 75219b2ee8SDavid du Colombier "short", Cword, 76219b2ee8SDavid du Colombier "static", Cword, 77219b2ee8SDavid du Colombier "stdio", I2, 78219b2ee8SDavid du Colombier "struct", Cword, 79219b2ee8SDavid du Colombier "subroutine", Fword, 80219b2ee8SDavid du Colombier "u", I2, 81219b2ee8SDavid du Colombier "void", Cword, 82219b2ee8SDavid du Colombier }; 83219b2ee8SDavid du Colombier 84219b2ee8SDavid du Colombier /* codes for 'mode' field in language structure */ 85219b2ee8SDavid du Colombier enum { 86219b2ee8SDavid du Colombier Normal = 0, 87219b2ee8SDavid du Colombier First, /* first entry for language spanning several ranges */ 88219b2ee8SDavid du Colombier Multi, /* later entries " " " ... */ 89219b2ee8SDavid du Colombier Shared, /* codes used in several languages */ 903e12c5d1SDavid du Colombier }; 913e12c5d1SDavid du Colombier 923e12c5d1SDavid du Colombier struct 933e12c5d1SDavid du Colombier { 94219b2ee8SDavid du Colombier int mode; /* see enum above */ 953e12c5d1SDavid du Colombier int count; 963e12c5d1SDavid du Colombier int low; 973e12c5d1SDavid du Colombier int high; 983e12c5d1SDavid du Colombier char *name; 993e12c5d1SDavid du Colombier 1003e12c5d1SDavid du Colombier } language[] = 1013e12c5d1SDavid du Colombier { 102219b2ee8SDavid du Colombier Normal, 0, 0x0100, 0x01FF, "Extended Latin", 103219b2ee8SDavid du Colombier Normal, 0, 0x0370, 0x03FF, "Greek", 104219b2ee8SDavid du Colombier Normal, 0, 0x0400, 0x04FF, "Cyrillic", 105219b2ee8SDavid du Colombier Normal, 0, 0x0530, 0x058F, "Armenian", 106219b2ee8SDavid du Colombier Normal, 0, 0x0590, 0x05FF, "Hebrew", 107219b2ee8SDavid du Colombier Normal, 0, 0x0600, 0x06FF, "Arabic", 108219b2ee8SDavid du Colombier Normal, 0, 0x0900, 0x097F, "Devanagari", 109219b2ee8SDavid du Colombier Normal, 0, 0x0980, 0x09FF, "Bengali", 110219b2ee8SDavid du Colombier Normal, 0, 0x0A00, 0x0A7F, "Gurmukhi", 111219b2ee8SDavid du Colombier Normal, 0, 0x0A80, 0x0AFF, "Gujarati", 112219b2ee8SDavid du Colombier Normal, 0, 0x0B00, 0x0B7F, "Oriya", 113219b2ee8SDavid du Colombier Normal, 0, 0x0B80, 0x0BFF, "Tamil", 114219b2ee8SDavid du Colombier Normal, 0, 0x0C00, 0x0C7F, "Telugu", 115219b2ee8SDavid du Colombier Normal, 0, 0x0C80, 0x0CFF, "Kannada", 116219b2ee8SDavid du Colombier Normal, 0, 0x0D00, 0x0D7F, "Malayalam", 117219b2ee8SDavid du Colombier Normal, 0, 0x0E00, 0x0E7F, "Thai", 118219b2ee8SDavid du Colombier Normal, 0, 0x0E80, 0x0EFF, "Lao", 119219b2ee8SDavid du Colombier Normal, 0, 0x1000, 0x105F, "Tibetan", 120219b2ee8SDavid du Colombier Normal, 0, 0x10A0, 0x10FF, "Georgian", 121219b2ee8SDavid du Colombier Normal, 0, 0x3040, 0x30FF, "Japanese", 122219b2ee8SDavid du Colombier Normal, 0, 0x3100, 0x312F, "Chinese", 123219b2ee8SDavid du Colombier First, 0, 0x3130, 0x318F, "Korean", 124219b2ee8SDavid du Colombier Multi, 0, 0x3400, 0x3D2F, "Korean", 125219b2ee8SDavid du Colombier Shared, 0, 0x4e00, 0x9fff, "CJK", 126219b2ee8SDavid du Colombier Normal, 0, 0, 0, 0, /* terminal entry */ 1273e12c5d1SDavid du Colombier }; 1283e12c5d1SDavid du Colombier 1293e12c5d1SDavid du Colombier 1303e12c5d1SDavid du Colombier enum 1313e12c5d1SDavid du Colombier { 1323e12c5d1SDavid du Colombier Fascii, /* printable ascii */ 1333e12c5d1SDavid du Colombier Flatin, /* latin 1*/ 1345e492409SDavid du Colombier Futf, /* UTF character set */ 1353e12c5d1SDavid du Colombier Fbinary, /* binary */ 1363e12c5d1SDavid du Colombier Feascii, /* ASCII with control chars */ 1373e12c5d1SDavid du Colombier Fnull, /* NULL in file */ 1383e12c5d1SDavid du Colombier } guess; 1393e12c5d1SDavid du Colombier 1403e12c5d1SDavid du Colombier void bump_utf_count(Rune); 1417dd7cddfSDavid du Colombier int cistrncmp(char*, char*, int); 1423e12c5d1SDavid du Colombier void filetype(int); 1433e12c5d1SDavid du Colombier int getfontnum(uchar*, uchar**); 1443e12c5d1SDavid du Colombier int isas(void); 1453e12c5d1SDavid du Colombier int isc(void); 1463e12c5d1SDavid du Colombier int iscint(void); 1473e12c5d1SDavid du Colombier int isenglish(void); 1487dd7cddfSDavid du Colombier int ishp(void); 1497dd7cddfSDavid du Colombier int ishtml(void); 1509a747e4fSDavid du Colombier int isrfc822(void); 151d9306527SDavid du Colombier int ismbox(void); 1527dd7cddfSDavid du Colombier int islimbo(void); 1533e12c5d1SDavid du Colombier int ismung(void); 1543e12c5d1SDavid du Colombier int isp9bit(void); 1553e12c5d1SDavid du Colombier int isp9font(void); 156fb7f0c93SDavid du Colombier int isrtf(void); 157f2e8132aSDavid du Colombier int ismsdos(void); 158b7327ca2SDavid du Colombier int iself(void); 1593e12c5d1SDavid du Colombier int istring(void); 1603306492aSDavid du Colombier int isoffstr(void); 161ddb951e3SDavid du Colombier int iff(void); 1623e12c5d1SDavid du Colombier int long0(void); 1633306492aSDavid du Colombier int longoff(void); 1644b30ca09SDavid du Colombier int istar(void); 1650c547597SDavid du Colombier int isface(void); 1660c547597SDavid du Colombier int isexec(void); 1673e12c5d1SDavid du Colombier int p9bitnum(uchar*); 1683e12c5d1SDavid du Colombier int p9subfont(uchar*); 1693e12c5d1SDavid du Colombier void print_utf(void); 1703e12c5d1SDavid du Colombier void type(char*, int); 1713e12c5d1SDavid du Colombier int utf_count(void); 1723e12c5d1SDavid du Colombier void wordfreq(void); 1733e12c5d1SDavid du Colombier 1743e12c5d1SDavid du Colombier int (*call[])(void) = 1753e12c5d1SDavid du Colombier { 1763e12c5d1SDavid du Colombier long0, /* recognizable by first 4 bytes */ 1773e12c5d1SDavid du Colombier istring, /* recognizable by first string */ 1780c547597SDavid du Colombier iself, /* ELF (foreign) executable */ 1790c547597SDavid du Colombier isexec, /* native executables */ 180ddb951e3SDavid du Colombier iff, /* interchange file format (strings) */ 1813306492aSDavid du Colombier longoff, /* recognizable by 4 bytes at some offset */ 1823306492aSDavid du Colombier isoffstr, /* recognizable by string at some offset */ 1839a747e4fSDavid du Colombier isrfc822, /* email file */ 184d9306527SDavid du Colombier ismbox, /* mail box */ 1854b30ca09SDavid du Colombier istar, /* recognizable by tar checksum */ 186643074abSDavid du Colombier ishtml, /* html keywords */ 187219b2ee8SDavid du Colombier iscint, /* compiler/assembler intermediate */ 1887dd7cddfSDavid du Colombier islimbo, /* limbo source */ 189219b2ee8SDavid du Colombier isc, /* c & alef compiler key words */ 1903e12c5d1SDavid du Colombier isas, /* assembler key words */ 1913e12c5d1SDavid du Colombier isp9font, /* plan 9 font */ 1927dd7cddfSDavid du Colombier isp9bit, /* plan 9 image (as from /dev/window) */ 193883a8c51SDavid du Colombier ismung, /* entropy compressed/encrypted */ 1947dd7cddfSDavid du Colombier isenglish, /* char frequency English */ 195fb7f0c93SDavid du Colombier isrtf, /* rich text format */ 196f2e8132aSDavid du Colombier ismsdos, /* msdos exe (virus file attachement) */ 1970c547597SDavid du Colombier isface, /* ascii face file */ 1983e12c5d1SDavid du Colombier 0 1993e12c5d1SDavid du Colombier }; 2003e12c5d1SDavid du Colombier 2017dd7cddfSDavid du Colombier int mime; 2027dd7cddfSDavid du Colombier 2037dd7cddfSDavid du Colombier #define OCTET "application/octet-stream\n" 2047dd7cddfSDavid du Colombier #define PLAIN "text/plain\n" 2057dd7cddfSDavid du Colombier 2063e12c5d1SDavid du Colombier void 2073e12c5d1SDavid du Colombier main(int argc, char *argv[]) 2083e12c5d1SDavid du Colombier { 2093e12c5d1SDavid du Colombier int i, j, maxlen; 2103e12c5d1SDavid du Colombier char *cp; 2113e12c5d1SDavid du Colombier Rune r; 2123e12c5d1SDavid du Colombier 2137dd7cddfSDavid du Colombier ARGBEGIN{ 2147dd7cddfSDavid du Colombier case 'm': 2157dd7cddfSDavid du Colombier mime = 1; 2167dd7cddfSDavid du Colombier break; 2177dd7cddfSDavid du Colombier default: 2187dd7cddfSDavid du Colombier fprint(2, "usage: file [-m] [file...]\n"); 2197dd7cddfSDavid du Colombier exits("usage"); 2207dd7cddfSDavid du Colombier }ARGEND; 2217dd7cddfSDavid du Colombier 2223e12c5d1SDavid du Colombier maxlen = 0; 2237dd7cddfSDavid du Colombier if(mime == 0 || argc > 1){ 2247dd7cddfSDavid du Colombier for(i = 0; i < argc; i++) { 2253e12c5d1SDavid du Colombier for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp)) 2263e12c5d1SDavid du Colombier ; 2273e12c5d1SDavid du Colombier if(j > maxlen) 2283e12c5d1SDavid du Colombier maxlen = j; 2293e12c5d1SDavid du Colombier } 2307dd7cddfSDavid du Colombier } 2317dd7cddfSDavid du Colombier if (argc <= 0) { 2327dd7cddfSDavid du Colombier if(!mime) 2333e12c5d1SDavid du Colombier print ("stdin: "); 2343e12c5d1SDavid du Colombier filetype(0); 2353e12c5d1SDavid du Colombier } 2363e12c5d1SDavid du Colombier else { 2377dd7cddfSDavid du Colombier for(i = 0; i < argc; i++) 2383e12c5d1SDavid du Colombier type(argv[i], maxlen); 2393e12c5d1SDavid du Colombier } 2403e12c5d1SDavid du Colombier exits(0); 2413e12c5d1SDavid du Colombier } 2423e12c5d1SDavid du Colombier 2433e12c5d1SDavid du Colombier void 2443e12c5d1SDavid du Colombier type(char *file, int nlen) 2453e12c5d1SDavid du Colombier { 2463e12c5d1SDavid du Colombier Rune r; 2473e12c5d1SDavid du Colombier int i; 2483e12c5d1SDavid du Colombier char *p; 2493e12c5d1SDavid du Colombier 2507dd7cddfSDavid du Colombier if(nlen > 0){ 2513e12c5d1SDavid du Colombier slash = 0; 2523e12c5d1SDavid du Colombier for (i = 0, p = file; *p; i++) { 2533e12c5d1SDavid du Colombier if (*p == '/') /* find rightmost slash */ 2543e12c5d1SDavid du Colombier slash = p; 2553e12c5d1SDavid du Colombier p += chartorune(&r, p); /* count runes */ 2563e12c5d1SDavid du Colombier } 2573e12c5d1SDavid du Colombier print("%s:%*s",file, nlen-i+1, ""); 2587dd7cddfSDavid du Colombier } 2593e12c5d1SDavid du Colombier fname = file; 2603e12c5d1SDavid du Colombier if ((fd = open(file, OREAD)) < 0) { 2613e12c5d1SDavid du Colombier print("cannot open\n"); 2623e12c5d1SDavid du Colombier return; 2633e12c5d1SDavid du Colombier } 2643e12c5d1SDavid du Colombier filetype(fd); 2653e12c5d1SDavid du Colombier close(fd); 2663e12c5d1SDavid du Colombier } 2673e12c5d1SDavid du Colombier 2685e492409SDavid du Colombier /* 2695e492409SDavid du Colombier * Unicode 4.0 4-byte runes. 2705e492409SDavid du Colombier */ 2715e492409SDavid du Colombier typedef int Rune1; 2725e492409SDavid du Colombier 2735e492409SDavid du Colombier enum { 2745e492409SDavid du Colombier UTFmax1 = 4, 2755e492409SDavid du Colombier }; 2765e492409SDavid du Colombier 2775e492409SDavid du Colombier int 2785e492409SDavid du Colombier fullrune1(char *p, int n) 2795e492409SDavid du Colombier { 2805e492409SDavid du Colombier int c; 2815e492409SDavid du Colombier 2825e492409SDavid du Colombier if(n >= 1) { 2835e492409SDavid du Colombier c = *(uchar*)p; 2845e492409SDavid du Colombier if(c < 0x80) 2855e492409SDavid du Colombier return 1; 2865e492409SDavid du Colombier if(n >= 2 && c < 0xE0) 2875e492409SDavid du Colombier return 1; 2885e492409SDavid du Colombier if(n >= 3 && c < 0xF0) 2895e492409SDavid du Colombier return 1; 2905e492409SDavid du Colombier if(n >= 4) 2915e492409SDavid du Colombier return 1; 2925e492409SDavid du Colombier } 2935e492409SDavid du Colombier return 0; 2945e492409SDavid du Colombier } 2955e492409SDavid du Colombier 2965e492409SDavid du Colombier int 2975e492409SDavid du Colombier chartorune1(Rune1 *rune, char *str) 2985e492409SDavid du Colombier { 2995e492409SDavid du Colombier int c, c1, c2, c3, n; 3005e492409SDavid du Colombier Rune r; 3015e492409SDavid du Colombier 3025e492409SDavid du Colombier c = *(uchar*)str; 3035e492409SDavid du Colombier if(c < 0xF0){ 3045e492409SDavid du Colombier r = 0; 3055e492409SDavid du Colombier n = chartorune(&r, str); 3065e492409SDavid du Colombier *rune = r; 3075e492409SDavid du Colombier return n; 3085e492409SDavid du Colombier } 3095e492409SDavid du Colombier c &= ~0xF0; 3105e492409SDavid du Colombier c1 = *(uchar*)(str+1) & ~0x80; 3115e492409SDavid du Colombier c2 = *(uchar*)(str+2) & ~0x80; 3125e492409SDavid du Colombier c3 = *(uchar*)(str+3) & ~0x80; 3135e492409SDavid du Colombier n = (c<<18) | (c1<<12) | (c2<<6) | c3; 3145e492409SDavid du Colombier if(n < 0x10000 || n > 0x10FFFF){ 3155e492409SDavid du Colombier *rune = Runeerror; 3165e492409SDavid du Colombier return 1; 3175e492409SDavid du Colombier } 3185e492409SDavid du Colombier *rune = n; 3195e492409SDavid du Colombier return 4; 3205e492409SDavid du Colombier } 3215e492409SDavid du Colombier 3223e12c5d1SDavid du Colombier void 3233e12c5d1SDavid du Colombier filetype(int fd) 3243e12c5d1SDavid du Colombier { 3255e492409SDavid du Colombier Rune1 r; 326219b2ee8SDavid du Colombier int i, f, n; 327219b2ee8SDavid du Colombier char *p, *eob; 3283e12c5d1SDavid du Colombier 3299a747e4fSDavid du Colombier free(mbuf); 3309a747e4fSDavid du Colombier mbuf = dirfstat(fd); 3319a747e4fSDavid du Colombier if(mbuf == nil){ 3329a747e4fSDavid du Colombier print("cannot stat: %r\n"); 3333e12c5d1SDavid du Colombier return; 3343e12c5d1SDavid du Colombier } 3359a747e4fSDavid du Colombier if(mbuf->mode & DMDIR) { 3367dd7cddfSDavid du Colombier print(mime ? "text/directory\n" : "directory\n"); 3373e12c5d1SDavid du Colombier return; 3383e12c5d1SDavid du Colombier } 3399a747e4fSDavid du Colombier if(mbuf->type != 'M' && mbuf->type != '|') { 3407dd7cddfSDavid du Colombier print(mime ? OCTET : "special file #%c/%s\n", 3419a747e4fSDavid du Colombier mbuf->type, mbuf->name); 3423e12c5d1SDavid du Colombier return; 3433e12c5d1SDavid du Colombier } 344*0dc12738SDavid du Colombier /* may be reading a pipe on standard input */ 345*0dc12738SDavid du Colombier nbuf = readn(fd, buf, sizeof(buf)-1); 3463e12c5d1SDavid du Colombier if(nbuf < 0) { 3473e12c5d1SDavid du Colombier print("cannot read\n"); 3483e12c5d1SDavid du Colombier return; 3493e12c5d1SDavid du Colombier } 3503e12c5d1SDavid du Colombier if(nbuf == 0) { 3517dd7cddfSDavid du Colombier print(mime ? PLAIN : "empty file\n"); 3523e12c5d1SDavid du Colombier return; 3533e12c5d1SDavid du Colombier } 3549a747e4fSDavid du Colombier buf[nbuf] = 0; 3553e12c5d1SDavid du Colombier 3563e12c5d1SDavid du Colombier /* 3573e12c5d1SDavid du Colombier * build histogram table 3583e12c5d1SDavid du Colombier */ 3593e12c5d1SDavid du Colombier memset(cfreq, 0, sizeof(cfreq)); 3603e12c5d1SDavid du Colombier for (i = 0; language[i].name; i++) 3613e12c5d1SDavid du Colombier language[i].count = 0; 362219b2ee8SDavid du Colombier eob = (char *)buf+nbuf; 363219b2ee8SDavid du Colombier for(n = 0, p = (char *)buf; p < eob; n++) { 3645e492409SDavid du Colombier if (!fullrune1(p, eob-p) && eob-p < UTFmax1) 365219b2ee8SDavid du Colombier break; 3665e492409SDavid du Colombier p += chartorune1(&r, p); 3673e12c5d1SDavid du Colombier if (r == 0) 3683e12c5d1SDavid du Colombier f = Cnull; 3693e12c5d1SDavid du Colombier else if (r <= 0x7f) { 3703e12c5d1SDavid du Colombier if (!isprint(r) && !isspace(r)) 3713e12c5d1SDavid du Colombier f = Ceascii; /* ASCII control char */ 3723e12c5d1SDavid du Colombier else f = r; 3735e492409SDavid du Colombier } else if (r == 0x80) { 374219b2ee8SDavid du Colombier bump_utf_count(r); 375219b2ee8SDavid du Colombier f = Cutf; 3763e12c5d1SDavid du Colombier } else if (r < 0xA0) 3773e12c5d1SDavid du Colombier f = Cbinary; /* Invalid Runes */ 3783e12c5d1SDavid du Colombier else if (r <= 0xff) 3793e12c5d1SDavid du Colombier f = Clatin; /* Latin 1 */ 3803e12c5d1SDavid du Colombier else { 3813e12c5d1SDavid du Colombier bump_utf_count(r); 3823e12c5d1SDavid du Colombier f = Cutf; /* UTF extension */ 3833e12c5d1SDavid du Colombier } 3843e12c5d1SDavid du Colombier cfreq[f]++; /* ASCII chars peg directly */ 3853e12c5d1SDavid du Colombier } 3863e12c5d1SDavid du Colombier /* 3873e12c5d1SDavid du Colombier * gross classify 3883e12c5d1SDavid du Colombier */ 3893e12c5d1SDavid du Colombier if (cfreq[Cbinary]) 3903e12c5d1SDavid du Colombier guess = Fbinary; 3913e12c5d1SDavid du Colombier else if (cfreq[Cutf]) 3923e12c5d1SDavid du Colombier guess = Futf; 3933e12c5d1SDavid du Colombier else if (cfreq[Clatin]) 3943e12c5d1SDavid du Colombier guess = Flatin; 3953e12c5d1SDavid du Colombier else if (cfreq[Ceascii]) 3963e12c5d1SDavid du Colombier guess = Feascii; 3975e492409SDavid du Colombier else if (cfreq[Cnull]) 3983306492aSDavid du Colombier guess = Fbinary; 3995e492409SDavid du Colombier else 4005e492409SDavid du Colombier guess = Fascii; 4013e12c5d1SDavid du Colombier /* 4023e12c5d1SDavid du Colombier * lookup dictionary words 4033e12c5d1SDavid du Colombier */ 404219b2ee8SDavid du Colombier memset(wfreq, 0, sizeof(wfreq)); 4057dd7cddfSDavid du Colombier if(guess == Fascii || guess == Flatin || guess == Futf) 4063e12c5d1SDavid du Colombier wordfreq(); 4073e12c5d1SDavid du Colombier /* 4083e12c5d1SDavid du Colombier * call individual classify routines 4093e12c5d1SDavid du Colombier */ 4103e12c5d1SDavid du Colombier for(i=0; call[i]; i++) 4113e12c5d1SDavid du Colombier if((*call[i])()) 4123e12c5d1SDavid du Colombier return; 4133e12c5d1SDavid du Colombier 4143e12c5d1SDavid du Colombier /* 4153e12c5d1SDavid du Colombier * if all else fails, 4163e12c5d1SDavid du Colombier * print out gross classification 4173e12c5d1SDavid du Colombier */ 41880ee5cbfSDavid du Colombier if (nbuf < 100 && !mime) 4197dd7cddfSDavid du Colombier print(mime ? PLAIN : "short "); 4203e12c5d1SDavid du Colombier if (guess == Fascii) 4217dd7cddfSDavid du Colombier print(mime ? PLAIN : "Ascii\n"); 4223e12c5d1SDavid du Colombier else if (guess == Feascii) 4237dd7cddfSDavid du Colombier print(mime ? PLAIN : "extended ascii\n"); 4243e12c5d1SDavid du Colombier else if (guess == Flatin) 4257dd7cddfSDavid du Colombier print(mime ? PLAIN : "latin ascii\n"); 4263e12c5d1SDavid du Colombier else if (guess == Futf && utf_count() < 4) 4273e12c5d1SDavid du Colombier print_utf(); 4287dd7cddfSDavid du Colombier else print(mime ? OCTET : "binary\n"); 4293e12c5d1SDavid du Colombier } 4303e12c5d1SDavid du Colombier 4313e12c5d1SDavid du Colombier void 4323e12c5d1SDavid du Colombier bump_utf_count(Rune r) 4333e12c5d1SDavid du Colombier { 4343e12c5d1SDavid du Colombier int low, high, mid; 4353e12c5d1SDavid du Colombier 4363e12c5d1SDavid du Colombier high = sizeof(language)/sizeof(language[0])-1; 4373e12c5d1SDavid du Colombier for (low = 0; low < high;) { 4383e12c5d1SDavid du Colombier mid = (low+high)/2; 4393e12c5d1SDavid du Colombier if (r >= language[mid].low) { 4403e12c5d1SDavid du Colombier if (r <= language[mid].high) { 4413e12c5d1SDavid du Colombier language[mid].count++; 4423e12c5d1SDavid du Colombier break; 4433e12c5d1SDavid du Colombier } else low = mid+1; 4443e12c5d1SDavid du Colombier } else high = mid; 4453e12c5d1SDavid du Colombier } 4463e12c5d1SDavid du Colombier } 4473e12c5d1SDavid du Colombier 4483e12c5d1SDavid du Colombier int 4493e12c5d1SDavid du Colombier utf_count(void) 4503e12c5d1SDavid du Colombier { 4513e12c5d1SDavid du Colombier int i, count; 4523e12c5d1SDavid du Colombier 453219b2ee8SDavid du Colombier count = 0; 454219b2ee8SDavid du Colombier for (i = 0; language[i].name; i++) 4553e12c5d1SDavid du Colombier if (language[i].count > 0) 456219b2ee8SDavid du Colombier switch (language[i].mode) { 457219b2ee8SDavid du Colombier case Normal: 458219b2ee8SDavid du Colombier case First: 4593e12c5d1SDavid du Colombier count++; 460219b2ee8SDavid du Colombier break; 461219b2ee8SDavid du Colombier default: 462219b2ee8SDavid du Colombier break; 463219b2ee8SDavid du Colombier } 4643e12c5d1SDavid du Colombier return count; 4653e12c5d1SDavid du Colombier } 4663e12c5d1SDavid du Colombier 467219b2ee8SDavid du Colombier int 468219b2ee8SDavid du Colombier chkascii(void) 469219b2ee8SDavid du Colombier { 470219b2ee8SDavid du Colombier int i; 471219b2ee8SDavid du Colombier 472219b2ee8SDavid du Colombier for (i = 'a'; i < 'z'; i++) 473219b2ee8SDavid du Colombier if (cfreq[i]) 474219b2ee8SDavid du Colombier return 1; 475219b2ee8SDavid du Colombier for (i = 'A'; i < 'Z'; i++) 476219b2ee8SDavid du Colombier if (cfreq[i]) 477219b2ee8SDavid du Colombier return 1; 478219b2ee8SDavid du Colombier return 0; 479219b2ee8SDavid du Colombier } 480219b2ee8SDavid du Colombier 481219b2ee8SDavid du Colombier int 482219b2ee8SDavid du Colombier find_first(char *name) 483219b2ee8SDavid du Colombier { 484219b2ee8SDavid du Colombier int i; 485219b2ee8SDavid du Colombier 486219b2ee8SDavid du Colombier for (i = 0; language[i].name != 0; i++) 487219b2ee8SDavid du Colombier if (language[i].mode == First 488219b2ee8SDavid du Colombier && strcmp(language[i].name, name) == 0) 489219b2ee8SDavid du Colombier return i; 490219b2ee8SDavid du Colombier return -1; 491219b2ee8SDavid du Colombier } 492219b2ee8SDavid du Colombier 4933e12c5d1SDavid du Colombier void 4943e12c5d1SDavid du Colombier print_utf(void) 4953e12c5d1SDavid du Colombier { 496219b2ee8SDavid du Colombier int i, printed, j; 4973e12c5d1SDavid du Colombier 4987dd7cddfSDavid du Colombier if(mime){ 4997dd7cddfSDavid du Colombier print(PLAIN); 5007dd7cddfSDavid du Colombier return; 5017dd7cddfSDavid du Colombier } 502219b2ee8SDavid du Colombier if (chkascii()) { 503219b2ee8SDavid du Colombier printed = 1; 504219b2ee8SDavid du Colombier print("Ascii"); 505219b2ee8SDavid du Colombier } else 506219b2ee8SDavid du Colombier printed = 0; 507219b2ee8SDavid du Colombier for (i = 0; language[i].name; i++) 5083e12c5d1SDavid du Colombier if (language[i].count) { 509219b2ee8SDavid du Colombier switch(language[i].mode) { 510219b2ee8SDavid du Colombier case Multi: 511219b2ee8SDavid du Colombier j = find_first(language[i].name); 512219b2ee8SDavid du Colombier if (j < 0) 513219b2ee8SDavid du Colombier break; 514219b2ee8SDavid du Colombier if (language[j].count > 0) 515219b2ee8SDavid du Colombier break; 516219b2ee8SDavid du Colombier /* Fall through */ 517219b2ee8SDavid du Colombier case Normal: 518219b2ee8SDavid du Colombier case First: 5193e12c5d1SDavid du Colombier if (printed) 5203e12c5d1SDavid du Colombier print(" & "); 5213e12c5d1SDavid du Colombier else printed = 1; 5223e12c5d1SDavid du Colombier print("%s", language[i].name); 523219b2ee8SDavid du Colombier break; 524219b2ee8SDavid du Colombier case Shared: 525219b2ee8SDavid du Colombier default: 526219b2ee8SDavid du Colombier break; 527219b2ee8SDavid du Colombier } 5283e12c5d1SDavid du Colombier } 5293e12c5d1SDavid du Colombier if(!printed) 5303e12c5d1SDavid du Colombier print("UTF"); 5313e12c5d1SDavid du Colombier print(" text\n"); 5323e12c5d1SDavid du Colombier } 5333e12c5d1SDavid du Colombier 5343e12c5d1SDavid du Colombier void 5353e12c5d1SDavid du Colombier wordfreq(void) 5363e12c5d1SDavid du Colombier { 537219b2ee8SDavid du Colombier int low, high, mid, r; 538219b2ee8SDavid du Colombier uchar *p, *p2, c; 5393e12c5d1SDavid du Colombier 540219b2ee8SDavid du Colombier p = buf; 541219b2ee8SDavid du Colombier for(;;) { 542219b2ee8SDavid du Colombier while (p < buf+nbuf && !isalpha(*p)) 543219b2ee8SDavid du Colombier p++; 544219b2ee8SDavid du Colombier if (p >= buf+nbuf) 545219b2ee8SDavid du Colombier return; 546219b2ee8SDavid du Colombier p2 = p; 547219b2ee8SDavid du Colombier while(p < buf+nbuf && isalpha(*p)) 548219b2ee8SDavid du Colombier p++; 549219b2ee8SDavid du Colombier c = *p; 550219b2ee8SDavid du Colombier *p = 0; 5513e12c5d1SDavid du Colombier high = sizeof(dict)/sizeof(dict[0]); 5523e12c5d1SDavid du Colombier for(low = 0;low < high;) { 5533e12c5d1SDavid du Colombier mid = (low+high)/2; 554219b2ee8SDavid du Colombier r = strcmp(dict[mid].word, (char*)p2); 555219b2ee8SDavid du Colombier if(r == 0) { 5563e12c5d1SDavid du Colombier wfreq[dict[mid].class]++; 5573e12c5d1SDavid du Colombier break; 5583e12c5d1SDavid du Colombier } 559219b2ee8SDavid du Colombier if(r < 0) 5603e12c5d1SDavid du Colombier low = mid+1; 5613e12c5d1SDavid du Colombier else 5623e12c5d1SDavid du Colombier high = mid; 5633e12c5d1SDavid du Colombier } 564219b2ee8SDavid du Colombier *p++ = c; 5653e12c5d1SDavid du Colombier } 5663e12c5d1SDavid du Colombier } 5673e12c5d1SDavid du Colombier 5689a747e4fSDavid du Colombier typedef struct Filemagic Filemagic; 5699a747e4fSDavid du Colombier struct Filemagic { 5709a747e4fSDavid du Colombier ulong x; 5719a747e4fSDavid du Colombier ulong mask; 5729a747e4fSDavid du Colombier char *desc; 5739a747e4fSDavid du Colombier char *mime; 5749a747e4fSDavid du Colombier }; 5759a747e4fSDavid du Colombier 5763306492aSDavid du Colombier /* 5773306492aSDavid du Colombier * integers in this table must be as seen on a little-endian machine 5783306492aSDavid du Colombier * when read from a file. 5793306492aSDavid du Colombier */ 5809a747e4fSDavid du Colombier Filemagic long0tab[] = { 5819a747e4fSDavid du Colombier 0xF16DF16D, 0xFFFFFFFF, "pac1 audio file\n", OCTET, 5823306492aSDavid du Colombier /* "pac1" */ 5839a747e4fSDavid du Colombier 0x31636170, 0xFFFFFFFF, "pac3 audio file\n", OCTET, 5843306492aSDavid du Colombier /* "pXc2 */ 5853306492aSDavid du Colombier 0x32630070, 0xFFFF00FF, "pac4 audio file\n", OCTET, 5869a747e4fSDavid du Colombier 0xBA010000, 0xFFFFFFFF, "mpeg system stream\n", OCTET, 5879a747e4fSDavid du Colombier 0x30800CC0, 0xFFFFFFFF, "inferno .dis executable\n", OCTET, 588fb7f0c93SDavid du Colombier 0x04034B50, 0xFFFFFFFF, "zip archive\n", "application/zip", 5899a747e4fSDavid du Colombier 070707, 0xFFFF, "cpio archive\n", OCTET, 590fb7f0c93SDavid du Colombier 0x2F7, 0xFFFF, "tex dvi\n", "application/dvi", 5919552e201SDavid du Colombier 0xfaff, 0xfeff, "mp3 audio\n", "audio/mpeg", 592ee7057f8SDavid du Colombier 0xfeff0000, 0xffffffff, "utf-32be\n", "text/plain charset=utf-32be", 593ee7057f8SDavid du Colombier 0xfffe, 0xffffffff, "utf-32le\n", "text/plain charset=utf-32le", 594ee7057f8SDavid du Colombier 0xfeff, 0xffff, "utf-16be\n", "text/plain charset=utf-16be", 595ee7057f8SDavid du Colombier 0xfffe, 0xffff, "utf-16le\n", "text/plain charset=utf-16le", 5963306492aSDavid du Colombier /* 5973306492aSDavid du Colombier * venti & fossil magic numbers are stored big-endian on disk, 5983306492aSDavid du Colombier * thus the numbers appear reversed in this table. 5993306492aSDavid du Colombier */ 6003306492aSDavid du Colombier 0xad4e5cd1, 0xFFFFFFFF, "venti arena\n", OCTET, 6019a747e4fSDavid du Colombier }; 6029a747e4fSDavid du Colombier 6039a747e4fSDavid du Colombier int 6049a747e4fSDavid du Colombier filemagic(Filemagic *tab, int ntab, ulong x) 6059a747e4fSDavid du Colombier { 6069a747e4fSDavid du Colombier int i; 6079a747e4fSDavid du Colombier 6089a747e4fSDavid du Colombier for(i=0; i<ntab; i++) 6099a747e4fSDavid du Colombier if((x&tab[i].mask) == tab[i].x){ 6109a747e4fSDavid du Colombier print(mime ? tab[i].mime : tab[i].desc); 6119a747e4fSDavid du Colombier return 1; 6129a747e4fSDavid du Colombier } 6139a747e4fSDavid du Colombier return 0; 6149a747e4fSDavid du Colombier } 6159a747e4fSDavid du Colombier 6163e12c5d1SDavid du Colombier int 6173e12c5d1SDavid du Colombier long0(void) 6183e12c5d1SDavid du Colombier { 6193306492aSDavid du Colombier return filemagic(long0tab, nelem(long0tab), LENDIAN(buf)); 6203306492aSDavid du Colombier } 6213e12c5d1SDavid du Colombier 6223306492aSDavid du Colombier typedef struct Fileoffmag Fileoffmag; 6233306492aSDavid du Colombier struct Fileoffmag { 6243306492aSDavid du Colombier ulong off; 6253306492aSDavid du Colombier Filemagic; 6263306492aSDavid du Colombier }; 6273306492aSDavid du Colombier 6283306492aSDavid du Colombier /* 6293306492aSDavid du Colombier * integers in this table must be as seen on a little-endian machine 6303306492aSDavid du Colombier * when read from a file. 6313306492aSDavid du Colombier */ 6323306492aSDavid du Colombier Fileoffmag longofftab[] = { 6333306492aSDavid du Colombier /* 6343306492aSDavid du Colombier * venti & fossil magic numbers are stored big-endian on disk, 6353306492aSDavid du Colombier * thus the numbers appear reversed in this table. 6363306492aSDavid du Colombier */ 6373306492aSDavid du Colombier 256*1024, 0xe7a5e4a9, 0xFFFFFFFF, "venti arenas partition\n", OCTET, 6383306492aSDavid du Colombier 256*1024, 0xc75e5cd1, 0xFFFFFFFF, "venti index section\n", OCTET, 6393306492aSDavid du Colombier 128*1024, 0x89ae7637, 0xFFFFFFFF, "fossil write buffer\n", OCTET, 6403306492aSDavid du Colombier }; 6413306492aSDavid du Colombier 6423306492aSDavid du Colombier int 6433306492aSDavid du Colombier fileoffmagic(Fileoffmag *tab, int ntab) 6443306492aSDavid du Colombier { 6453306492aSDavid du Colombier int i; 6463306492aSDavid du Colombier ulong x; 6473306492aSDavid du Colombier Fileoffmag *tp; 6483306492aSDavid du Colombier uchar buf[sizeof(long)]; 6493306492aSDavid du Colombier 6503306492aSDavid du Colombier for(i=0; i<ntab; i++) { 6513306492aSDavid du Colombier tp = tab + i; 6523306492aSDavid du Colombier seek(fd, tp->off, 0); 653*0dc12738SDavid du Colombier if (readn(fd, buf, sizeof buf) != sizeof buf) 6543306492aSDavid du Colombier continue; 6550c547597SDavid du Colombier x = LENDIAN(buf); 6563306492aSDavid du Colombier if((x&tp->mask) == tp->x){ 6573306492aSDavid du Colombier print(mime? tp->mime: tp->desc); 6580c547597SDavid du Colombier return 1; 6593306492aSDavid du Colombier } 6603306492aSDavid du Colombier } 6610c547597SDavid du Colombier return 0; 6620c547597SDavid du Colombier } 6630c547597SDavid du Colombier 6640c547597SDavid du Colombier int 6653306492aSDavid du Colombier longoff(void) 6663306492aSDavid du Colombier { 6673306492aSDavid du Colombier return fileoffmagic(longofftab, nelem(longofftab)); 6683306492aSDavid du Colombier } 6693306492aSDavid du Colombier 6703306492aSDavid du Colombier int 6710c547597SDavid du Colombier isexec(void) 6720c547597SDavid du Colombier { 6730c547597SDavid du Colombier Fhdr f; 6740c547597SDavid du Colombier 6753e12c5d1SDavid du Colombier seek(fd, 0, 0); /* reposition to start of file */ 6763e12c5d1SDavid du Colombier if(crackhdr(fd, &f)) { 6777dd7cddfSDavid du Colombier print(mime ? OCTET : "%s\n", f.name); 6783e12c5d1SDavid du Colombier return 1; 6793e12c5d1SDavid du Colombier } 6807dd7cddfSDavid du Colombier return 0; 6817dd7cddfSDavid du Colombier } 6823e12c5d1SDavid du Colombier 6830c547597SDavid du Colombier 6844b30ca09SDavid du Colombier /* from tar.c */ 6854b30ca09SDavid du Colombier enum { NAMSIZ = 100, TBLOCK = 512 }; 6864b30ca09SDavid du Colombier 6874b30ca09SDavid du Colombier union hblock 6884b30ca09SDavid du Colombier { 6894b30ca09SDavid du Colombier char dummy[TBLOCK]; 6904b30ca09SDavid du Colombier struct header 6914b30ca09SDavid du Colombier { 6924b30ca09SDavid du Colombier char name[NAMSIZ]; 6934b30ca09SDavid du Colombier char mode[8]; 6944b30ca09SDavid du Colombier char uid[8]; 6954b30ca09SDavid du Colombier char gid[8]; 6964b30ca09SDavid du Colombier char size[12]; 6974b30ca09SDavid du Colombier char mtime[12]; 6984b30ca09SDavid du Colombier char chksum[8]; 6994b30ca09SDavid du Colombier char linkflag; 7004b30ca09SDavid du Colombier char linkname[NAMSIZ]; 7014b30ca09SDavid du Colombier /* rest are defined by POSIX's ustar format; see p1003.2b */ 7024b30ca09SDavid du Colombier char magic[6]; /* "ustar" */ 7034b30ca09SDavid du Colombier char version[2]; 7044b30ca09SDavid du Colombier char uname[32]; 7054b30ca09SDavid du Colombier char gname[32]; 7064b30ca09SDavid du Colombier char devmajor[8]; 7074b30ca09SDavid du Colombier char devminor[8]; 7084b30ca09SDavid du Colombier char prefix[155]; /* if non-null, path = prefix "/" name */ 7094b30ca09SDavid du Colombier } dbuf; 7104b30ca09SDavid du Colombier }; 7114b30ca09SDavid du Colombier 7124b30ca09SDavid du Colombier int 7134b30ca09SDavid du Colombier checksum(union hblock *hp) 7144b30ca09SDavid du Colombier { 7154b30ca09SDavid du Colombier int i; 7164b30ca09SDavid du Colombier char *cp; 7174b30ca09SDavid du Colombier struct header *hdr = &hp->dbuf; 7184b30ca09SDavid du Colombier 7194b30ca09SDavid du Colombier for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++) 7204b30ca09SDavid du Colombier *cp = ' '; 7214b30ca09SDavid du Colombier i = 0; 7224b30ca09SDavid du Colombier for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++) 7234b30ca09SDavid du Colombier i += *cp & 0xff; 7244b30ca09SDavid du Colombier return i; 7254b30ca09SDavid du Colombier } 7264b30ca09SDavid du Colombier 7274b30ca09SDavid du Colombier int 7284b30ca09SDavid du Colombier istar(void) 7294b30ca09SDavid du Colombier { 7304b30ca09SDavid du Colombier int chksum; 7314b30ca09SDavid du Colombier char tblock[TBLOCK]; 7324b30ca09SDavid du Colombier union hblock *hp = (union hblock *)tblock; 7334b30ca09SDavid du Colombier struct header *hdr = &hp->dbuf; 7344b30ca09SDavid du Colombier 7354b30ca09SDavid du Colombier seek(fd, 0, 0); /* reposition to start of file */ 7364b30ca09SDavid du Colombier if (readn(fd, tblock, sizeof tblock) != sizeof tblock) 7374b30ca09SDavid du Colombier return 0; 7384b30ca09SDavid du Colombier chksum = strtol(hdr->chksum, 0, 8); 7394b30ca09SDavid du Colombier if (hdr->name[0] != '\0' && checksum(hp) == chksum) { 7404b30ca09SDavid du Colombier if (strcmp(hdr->magic, "ustar") == 0) 7414b30ca09SDavid du Colombier print(mime? "application/x-ustar\n": 7424b30ca09SDavid du Colombier "posix tar archive\n"); 7434b30ca09SDavid du Colombier else 7444b30ca09SDavid du Colombier print(mime? "application/x-tar\n": "tar archive\n"); 7454b30ca09SDavid du Colombier return 1; 7464b30ca09SDavid du Colombier } 7474b30ca09SDavid du Colombier return 0; 7484b30ca09SDavid du Colombier } 7494b30ca09SDavid du Colombier 7503e12c5d1SDavid du Colombier /* 7513e12c5d1SDavid du Colombier * initial words to classify file 7523e12c5d1SDavid du Colombier */ 753219b2ee8SDavid du Colombier struct FILE_STRING 754219b2ee8SDavid du Colombier { 7553e12c5d1SDavid du Colombier char *key; 7563e12c5d1SDavid du Colombier char *filetype; 7573e12c5d1SDavid du Colombier int length; 7587dd7cddfSDavid du Colombier char *mime; 7593e12c5d1SDavid du Colombier } file_string[] = 7603e12c5d1SDavid du Colombier { 7617dd7cddfSDavid du Colombier "!<arch>\n__.SYMDEF", "archive random library", 16, "application/octet-stream", 7627dd7cddfSDavid du Colombier "!<arch>\n", "archive", 8, "application/octet-stream", 7637dd7cddfSDavid du Colombier "070707", "cpio archive - ascii header", 6, "application/octet-stream", 7647dd7cddfSDavid du Colombier "#!/bin/rc", "rc executable file", 9, "text/plain", 7657dd7cddfSDavid du Colombier "#!/bin/sh", "sh executable file", 9, "text/plain", 7667dd7cddfSDavid du Colombier "%!", "postscript", 2, "application/postscript", 7677dd7cddfSDavid du Colombier "\004%!", "postscript", 3, "application/postscript", 7687dd7cddfSDavid du Colombier "x T post", "troff output for post", 8, "application/troff", 7697dd7cddfSDavid du Colombier "x T Latin1", "troff output for Latin1", 10, "application/troff", 7707dd7cddfSDavid du Colombier "x T utf", "troff output for UTF", 7, "application/troff", 7717dd7cddfSDavid du Colombier "x T 202", "troff output for 202", 7, "application/troff", 7727dd7cddfSDavid du Colombier "x T aps", "troff output for aps", 7, "application/troff", 7737dd7cddfSDavid du Colombier "GIF", "GIF image", 3, "image/gif", 7747dd7cddfSDavid du Colombier "\0PC Research, Inc\0", "ghostscript fax file", 18, "application/ghostscript", 77559cc4ca5SDavid du Colombier "%PDF", "PDF", 4, "application/pdf", 7767dd7cddfSDavid du Colombier "<html>\n", "HTML file", 7, "text/html", 7777dd7cddfSDavid du Colombier "<HTML>\n", "HTML file", 7, "text/html", 7787dd7cddfSDavid du Colombier "\111\111\052\000", "tiff", 4, "image/tiff", 7797dd7cddfSDavid du Colombier "\115\115\000\052", "tiff", 4, "image/tiff", 7807dd7cddfSDavid du Colombier "\377\330\377\340", "jpeg", 4, "image/jpeg", 7817dd7cddfSDavid du Colombier "\377\330\377\341", "jpeg", 4, "image/jpeg", 7827dd7cddfSDavid du Colombier "\377\330\377\333", "jpeg", 4, "image/jpeg", 783da51d93aSDavid du Colombier "BM", "bmp", 2, "image/bmp", 7847dd7cddfSDavid du Colombier "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/octet-stream", 785fb7f0c93SDavid du Colombier "<MakerFile ", "FrameMaker file", 11, "application/framemaker", 786fb7f0c93SDavid du Colombier "\033%-12345X", "HPJCL file", 9, "application/hpjcl", 787ddb951e3SDavid du Colombier "ID3", "mp3 audio with id3", 3, "audio/mpeg", 7887989f6fbSDavid du Colombier "\211PNG", "PNG image", 4, "image/png", 7890c547597SDavid du Colombier "P3\n", "ppm", 3, "image/ppm", 7900c547597SDavid du Colombier "P6\n", "ppm", 3, "image/ppm", 7910c547597SDavid du Colombier "/* XPM */\n", "xbm", 10, "image/xbm", 7927c70c028SDavid du Colombier ".HTML ", "troff -ms input", 6, "text/troff", 7937c70c028SDavid du Colombier ".LP", "troff -ms input", 3, "text/troff", 7947c70c028SDavid du Colombier ".ND", "troff -ms input", 3, "text/troff", 7957c70c028SDavid du Colombier ".PP", "troff -ms input", 3, "text/troff", 7967c70c028SDavid du Colombier ".TL", "troff -ms input", 3, "text/troff", 7977c70c028SDavid du Colombier ".TR", "troff -ms input", 3, "text/troff", 7987c70c028SDavid du Colombier ".TH", "manual page", 3, "text/troff", 7997c70c028SDavid du Colombier ".\\\"", "troff input", 3, "text/troff", 8007c70c028SDavid du Colombier ".de", "troff input", 3, "text/troff", 8017c70c028SDavid du Colombier ".if", "troff input", 3, "text/troff", 8027c70c028SDavid du Colombier ".nr", "troff input", 3, "text/troff", 8037c70c028SDavid du Colombier ".tr", "troff input", 3, "text/troff", 80419a27a12SDavid du Colombier "vac:", "venti score", 4, "text/plain", 8050641ea09SDavid du Colombier "-----BEGIN CERTIFICATE-----\n", 80694aa1c4cSDavid du Colombier "pem certificate", -1, "text/plain", 8070641ea09SDavid du Colombier "-----BEGIN TRUSTED CERTIFICATE-----\n", 80894aa1c4cSDavid du Colombier "pem trusted certificate", -1, "text/plain", 8090641ea09SDavid du Colombier "-----BEGIN X509 CERTIFICATE-----\n", 81094aa1c4cSDavid du Colombier "pem x.509 certificate", -1, "text/plain", 81194aa1c4cSDavid du Colombier "subject=/C=", "pem certificate with header", -1, "text/plain", 81294aa1c4cSDavid du Colombier "process snapshot ", "process snapshot", -1, "application/snapfs", 8137dd7cddfSDavid du Colombier 0,0,0,0 8143e12c5d1SDavid du Colombier }; 8153e12c5d1SDavid du Colombier 8163e12c5d1SDavid du Colombier int 8173e12c5d1SDavid du Colombier istring(void) 8183e12c5d1SDavid du Colombier { 81994aa1c4cSDavid du Colombier int i, l; 8203e12c5d1SDavid du Colombier struct FILE_STRING *p; 8213e12c5d1SDavid du Colombier 8223e12c5d1SDavid du Colombier for(p = file_string; p->key; p++) { 82394aa1c4cSDavid du Colombier l = p->length; 82494aa1c4cSDavid du Colombier if(l == -1) 82594aa1c4cSDavid du Colombier l = strlen(p->key); 82694aa1c4cSDavid du Colombier if(nbuf >= l && memcmp(buf, p->key, l) == 0) { 8277dd7cddfSDavid du Colombier if(mime) 8287dd7cddfSDavid du Colombier print("%s\n", p->mime); 8297dd7cddfSDavid du Colombier else 8303e12c5d1SDavid du Colombier print("%s\n", p->filetype); 8313e12c5d1SDavid du Colombier return 1; 8323e12c5d1SDavid du Colombier } 8333e12c5d1SDavid du Colombier } 8343e12c5d1SDavid du Colombier if(strncmp((char*)buf, "TYPE=", 5) == 0) { /* td */ 8353e12c5d1SDavid du Colombier for(i = 5; i < nbuf; i++) 8363e12c5d1SDavid du Colombier if(buf[i] == '\n') 8373e12c5d1SDavid du Colombier break; 8387dd7cddfSDavid du Colombier if(mime) 8397dd7cddfSDavid du Colombier print(OCTET); 8407dd7cddfSDavid du Colombier else 84159cc4ca5SDavid du Colombier print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5); 8423e12c5d1SDavid du Colombier return 1; 8433e12c5d1SDavid du Colombier } 8443e12c5d1SDavid du Colombier return 0; 8453e12c5d1SDavid du Colombier } 8463e12c5d1SDavid du Colombier 8473306492aSDavid du Colombier struct offstr 8483306492aSDavid du Colombier { 8493306492aSDavid du Colombier ulong off; 8503306492aSDavid du Colombier struct FILE_STRING; 8513306492aSDavid du Colombier } offstrs[] = { 8523306492aSDavid du Colombier 32*1024, "\001CD001\001", "ISO9660 CD image", 7, OCTET, 8533306492aSDavid du Colombier 0, 0, 0, 0, 0 8543306492aSDavid du Colombier }; 8553306492aSDavid du Colombier 8563306492aSDavid du Colombier int 8573306492aSDavid du Colombier isoffstr(void) 8583306492aSDavid du Colombier { 8593306492aSDavid du Colombier int n; 8603306492aSDavid du Colombier char buf[256]; 8613306492aSDavid du Colombier struct offstr *p; 8623306492aSDavid du Colombier 8633306492aSDavid du Colombier for(p = offstrs; p->key; p++) { 8643306492aSDavid du Colombier seek(fd, p->off, 0); 8653306492aSDavid du Colombier n = p->length; 8663306492aSDavid du Colombier if (n > sizeof buf) 8673306492aSDavid du Colombier n = sizeof buf; 868*0dc12738SDavid du Colombier if (readn(fd, buf, n) != n) 8693306492aSDavid du Colombier continue; 8703306492aSDavid du Colombier if(memcmp(buf, p->key, n) == 0) { 8713306492aSDavid du Colombier if(mime) 8723306492aSDavid du Colombier print("%s\n", p->mime); 8733306492aSDavid du Colombier else 8743306492aSDavid du Colombier print("%s\n", p->filetype); 8753306492aSDavid du Colombier return 1; 8763306492aSDavid du Colombier } 8773306492aSDavid du Colombier } 8783306492aSDavid du Colombier return 0; 8793306492aSDavid du Colombier } 8803306492aSDavid du Colombier 881ddb951e3SDavid du Colombier int 882ddb951e3SDavid du Colombier iff(void) 883ddb951e3SDavid du Colombier { 884ddb951e3SDavid du Colombier if (strncmp((char*)buf, "FORM", 4) == 0 && 885ddb951e3SDavid du Colombier strncmp((char*)buf+8, "AIFF", 4) == 0) { 886ddb951e3SDavid du Colombier print("%s\n", mime? "audio/x-aiff": "aiff audio"); 887ddb951e3SDavid du Colombier return 1; 888ddb951e3SDavid du Colombier } 889ddb951e3SDavid du Colombier return 0; 890ddb951e3SDavid du Colombier } 891ddb951e3SDavid du Colombier 8927dd7cddfSDavid du Colombier char* html_string[] = 8937dd7cddfSDavid du Colombier { 8947dd7cddfSDavid du Colombier "title", 8957dd7cddfSDavid du Colombier "body", 8967dd7cddfSDavid du Colombier "head", 8977dd7cddfSDavid du Colombier "strong", 8987dd7cddfSDavid du Colombier "h1", 8997dd7cddfSDavid du Colombier "h2", 9007dd7cddfSDavid du Colombier "h3", 9017dd7cddfSDavid du Colombier "h4", 9027dd7cddfSDavid du Colombier "h5", 9037dd7cddfSDavid du Colombier "h6", 9047dd7cddfSDavid du Colombier "ul", 9057dd7cddfSDavid du Colombier "li", 9067dd7cddfSDavid du Colombier "dl", 9077dd7cddfSDavid du Colombier "br", 9087dd7cddfSDavid du Colombier "em", 9097dd7cddfSDavid du Colombier 0, 9107dd7cddfSDavid du Colombier }; 9117dd7cddfSDavid du Colombier 9127dd7cddfSDavid du Colombier int 9137dd7cddfSDavid du Colombier ishtml(void) 9147dd7cddfSDavid du Colombier { 9157dd7cddfSDavid du Colombier uchar *p, *q; 9167dd7cddfSDavid du Colombier int i, count; 9177dd7cddfSDavid du Colombier 9187dd7cddfSDavid du Colombier /* compare strings between '<' and '>' to html table */ 9197dd7cddfSDavid du Colombier count = 0; 9207dd7cddfSDavid du Colombier p = buf; 9217dd7cddfSDavid du Colombier for(;;) { 9227dd7cddfSDavid du Colombier while (p < buf+nbuf && *p != '<') 9237dd7cddfSDavid du Colombier p++; 9247dd7cddfSDavid du Colombier p++; 9257dd7cddfSDavid du Colombier if (p >= buf+nbuf) 9267dd7cddfSDavid du Colombier break; 9277dd7cddfSDavid du Colombier if(*p == '/') 9287dd7cddfSDavid du Colombier p++; 9297dd7cddfSDavid du Colombier q = p; 9307dd7cddfSDavid du Colombier while(p < buf+nbuf && *p != '>') 9317dd7cddfSDavid du Colombier p++; 9327dd7cddfSDavid du Colombier if (p >= buf+nbuf) 9337dd7cddfSDavid du Colombier break; 9347dd7cddfSDavid du Colombier for(i = 0; html_string[i]; i++) { 9357dd7cddfSDavid du Colombier if(cistrncmp(html_string[i], (char*)q, p-q) == 0) { 9367dd7cddfSDavid du Colombier if(count++ > 4) { 9377dd7cddfSDavid du Colombier print(mime ? "text/html\n" : "HTML file\n"); 9387dd7cddfSDavid du Colombier return 1; 9397dd7cddfSDavid du Colombier } 9407dd7cddfSDavid du Colombier break; 9417dd7cddfSDavid du Colombier } 9427dd7cddfSDavid du Colombier } 9437dd7cddfSDavid du Colombier p++; 9447dd7cddfSDavid du Colombier } 9457dd7cddfSDavid du Colombier return 0; 9467dd7cddfSDavid du Colombier } 9477dd7cddfSDavid du Colombier 9489a747e4fSDavid du Colombier char* rfc822_string[] = 9497dd7cddfSDavid du Colombier { 9509a747e4fSDavid du Colombier "from:", 9519a747e4fSDavid du Colombier "date:", 9529a747e4fSDavid du Colombier "to:", 9539a747e4fSDavid du Colombier "subject:", 9549a747e4fSDavid du Colombier "received:", 955d9306527SDavid du Colombier "reply to:", 956d9306527SDavid du Colombier "sender:", 9579a747e4fSDavid du Colombier 0, 9589a747e4fSDavid du Colombier }; 9597dd7cddfSDavid du Colombier 9609a747e4fSDavid du Colombier int 9619a747e4fSDavid du Colombier isrfc822(void) 9629a747e4fSDavid du Colombier { 9639a747e4fSDavid du Colombier 9649a747e4fSDavid du Colombier char *p, *q, *r; 9659a747e4fSDavid du Colombier int i, count; 9669a747e4fSDavid du Colombier 9679a747e4fSDavid du Colombier count = 0; 9689a747e4fSDavid du Colombier p = (char*)buf; 9699a747e4fSDavid du Colombier for(;;) { 9709a747e4fSDavid du Colombier q = strchr(p, '\n'); 9719a747e4fSDavid du Colombier if(q == nil) 9727dd7cddfSDavid du Colombier break; 973d9306527SDavid du Colombier *q = 0; 974d9306527SDavid du Colombier if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){ 975d9306527SDavid du Colombier count++; 976d9306527SDavid du Colombier *q = '\n'; 977d9306527SDavid du Colombier p = q+1; 978d9306527SDavid du Colombier continue; 979d9306527SDavid du Colombier } 980d9306527SDavid du Colombier *q = '\n'; 9819a747e4fSDavid du Colombier if(*p != '\t' && *p != ' '){ 9829a747e4fSDavid du Colombier r = strchr(p, ':'); 9839a747e4fSDavid du Colombier if(r == 0 || r > q) 9849a747e4fSDavid du Colombier break; 9859a747e4fSDavid du Colombier for(i = 0; rfc822_string[i]; i++) { 9869a747e4fSDavid du Colombier if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){ 9879a747e4fSDavid du Colombier count++; 9889a747e4fSDavid du Colombier break; 9897dd7cddfSDavid du Colombier } 9909a747e4fSDavid du Colombier } 9919a747e4fSDavid du Colombier } 9929a747e4fSDavid du Colombier p = q+1; 9939a747e4fSDavid du Colombier } 9949a747e4fSDavid du Colombier if(count >= 3){ 9959a747e4fSDavid du Colombier print(mime ? "message/rfc822\n" : "email file\n"); 9967dd7cddfSDavid du Colombier return 1; 9977dd7cddfSDavid du Colombier } 9989a747e4fSDavid du Colombier return 0; 9999a747e4fSDavid du Colombier } 10007dd7cddfSDavid du Colombier 10013e12c5d1SDavid du Colombier int 1002d9306527SDavid du Colombier ismbox(void) 1003d9306527SDavid du Colombier { 1004d9306527SDavid du Colombier char *p, *q; 1005d9306527SDavid du Colombier 1006d9306527SDavid du Colombier p = (char*)buf; 1007d9306527SDavid du Colombier q = strchr(p, '\n'); 1008d9306527SDavid du Colombier if(q == nil) 1009d9306527SDavid du Colombier return 0; 1010d9306527SDavid du Colombier *q = 0; 1011d9306527SDavid du Colombier if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){ 1012d9306527SDavid du Colombier print(mime ? "text/plain\n" : "mail box\n"); 1013d9306527SDavid du Colombier return 1; 1014d9306527SDavid du Colombier } 1015d9306527SDavid du Colombier *q = '\n'; 1016d9306527SDavid du Colombier return 0; 1017d9306527SDavid du Colombier } 1018d9306527SDavid du Colombier 1019d9306527SDavid du Colombier int 10203e12c5d1SDavid du Colombier iscint(void) 10213e12c5d1SDavid du Colombier { 1022219b2ee8SDavid du Colombier int type; 1023219b2ee8SDavid du Colombier char *name; 1024219b2ee8SDavid du Colombier Biobuf b; 10253e12c5d1SDavid du Colombier 1026219b2ee8SDavid du Colombier if(Binit(&b, fd, OREAD) == Beof) 10273e12c5d1SDavid du Colombier return 0; 1028219b2ee8SDavid du Colombier seek(fd, 0, 0); 1029219b2ee8SDavid du Colombier type = objtype(&b, &name); 1030219b2ee8SDavid du Colombier if(type < 0) 1031219b2ee8SDavid du Colombier return 0; 10327dd7cddfSDavid du Colombier if(mime) 10337dd7cddfSDavid du Colombier print(OCTET); 10347dd7cddfSDavid du Colombier else 1035219b2ee8SDavid du Colombier print("%s intermediate\n", name); 1036219b2ee8SDavid du Colombier return 1; 10373e12c5d1SDavid du Colombier } 10383e12c5d1SDavid du Colombier 10393e12c5d1SDavid du Colombier int 10403e12c5d1SDavid du Colombier isc(void) 10413e12c5d1SDavid du Colombier { 10423e12c5d1SDavid du Colombier int n; 10433e12c5d1SDavid du Colombier 10443e12c5d1SDavid du Colombier n = wfreq[I1]; 10453e12c5d1SDavid du Colombier /* 10463e12c5d1SDavid du Colombier * includes 10473e12c5d1SDavid du Colombier */ 10483e12c5d1SDavid du Colombier if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n) 10493e12c5d1SDavid du Colombier goto yes; 1050219b2ee8SDavid du Colombier if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n) 1051219b2ee8SDavid du Colombier goto yes; 10523e12c5d1SDavid du Colombier /* 10533e12c5d1SDavid du Colombier * declarations 10543e12c5d1SDavid du Colombier */ 10553e12c5d1SDavid du Colombier if(wfreq[Cword] >= 5 && cfreq[';'] >= 5) 10563e12c5d1SDavid du Colombier goto yes; 10573e12c5d1SDavid du Colombier /* 10583e12c5d1SDavid du Colombier * assignments 10593e12c5d1SDavid du Colombier */ 10603e12c5d1SDavid du Colombier if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1) 10613e12c5d1SDavid du Colombier goto yes; 10623e12c5d1SDavid du Colombier return 0; 10633e12c5d1SDavid du Colombier 10643e12c5d1SDavid du Colombier yes: 10657dd7cddfSDavid du Colombier if(mime){ 10667dd7cddfSDavid du Colombier print(PLAIN); 10677dd7cddfSDavid du Colombier return 1; 10687dd7cddfSDavid du Colombier } 1069219b2ee8SDavid du Colombier if(wfreq[Alword] > 0) 1070219b2ee8SDavid du Colombier print("alef program\n"); 1071219b2ee8SDavid du Colombier else 10723e12c5d1SDavid du Colombier print("c program\n"); 10733e12c5d1SDavid du Colombier return 1; 10743e12c5d1SDavid du Colombier } 10753e12c5d1SDavid du Colombier 10763e12c5d1SDavid du Colombier int 10777dd7cddfSDavid du Colombier islimbo(void) 10787dd7cddfSDavid du Colombier { 10797dd7cddfSDavid du Colombier 10807dd7cddfSDavid du Colombier /* 10817dd7cddfSDavid du Colombier * includes 10827dd7cddfSDavid du Colombier */ 10837dd7cddfSDavid du Colombier if(wfreq[Lword] < 4) 10847dd7cddfSDavid du Colombier return 0; 10857dd7cddfSDavid du Colombier print(mime ? PLAIN : "limbo program\n"); 10867dd7cddfSDavid du Colombier return 1; 10877dd7cddfSDavid du Colombier } 10887dd7cddfSDavid du Colombier 10897dd7cddfSDavid du Colombier int 10903e12c5d1SDavid du Colombier isas(void) 10913e12c5d1SDavid du Colombier { 10923e12c5d1SDavid du Colombier 10933e12c5d1SDavid du Colombier /* 10943e12c5d1SDavid du Colombier * includes 10953e12c5d1SDavid du Colombier */ 10963e12c5d1SDavid du Colombier if(wfreq[Aword] < 2) 10973e12c5d1SDavid du Colombier return 0; 10987dd7cddfSDavid du Colombier print(mime ? PLAIN : "as program\n"); 10993e12c5d1SDavid du Colombier return 1; 11003e12c5d1SDavid du Colombier } 11013e12c5d1SDavid du Colombier 11023e12c5d1SDavid du Colombier /* 11033e12c5d1SDavid du Colombier * low entropy means encrypted 11043e12c5d1SDavid du Colombier */ 11053e12c5d1SDavid du Colombier int 11063e12c5d1SDavid du Colombier ismung(void) 11073e12c5d1SDavid du Colombier { 11083e12c5d1SDavid du Colombier int i, bucket[8]; 11093e12c5d1SDavid du Colombier float cs; 11103e12c5d1SDavid du Colombier 11113e12c5d1SDavid du Colombier if(nbuf < 64) 11123e12c5d1SDavid du Colombier return 0; 11133e12c5d1SDavid du Colombier memset(bucket, 0, sizeof(bucket)); 111490630c3aSDavid du Colombier for(i=nbuf-64; i<nbuf; i++) 11153e12c5d1SDavid du Colombier bucket[(buf[i]>>5)&07] += 1; 11163e12c5d1SDavid du Colombier 11173e12c5d1SDavid du Colombier cs = 0.; 11183e12c5d1SDavid du Colombier for(i=0; i<8; i++) 11193e12c5d1SDavid du Colombier cs += (bucket[i]-8)*(bucket[i]-8); 11203e12c5d1SDavid du Colombier cs /= 8.; 11213e12c5d1SDavid du Colombier if(cs <= 24.322) { 112290630c3aSDavid du Colombier if(buf[0]==0x1f && buf[1]==0x9d) 11237dd7cddfSDavid du Colombier print(mime ? OCTET : "compressed\n"); 11243e12c5d1SDavid du Colombier else 112590630c3aSDavid du Colombier if(buf[0]==0x1f && buf[1]==0x8b) 112690630c3aSDavid du Colombier print(mime ? OCTET : "gzip compressed\n"); 112790630c3aSDavid du Colombier else 112890630c3aSDavid du Colombier if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h') 112990630c3aSDavid du Colombier print(mime ? OCTET : "bzip2 compressed\n"); 113090630c3aSDavid du Colombier else 11317dd7cddfSDavid du Colombier print(mime ? OCTET : "encrypted\n"); 11323e12c5d1SDavid du Colombier return 1; 11333e12c5d1SDavid du Colombier } 11343e12c5d1SDavid du Colombier return 0; 11353e12c5d1SDavid du Colombier } 11363e12c5d1SDavid du Colombier 11373e12c5d1SDavid du Colombier /* 11383e12c5d1SDavid du Colombier * english by punctuation and frequencies 11393e12c5d1SDavid du Colombier */ 11403e12c5d1SDavid du Colombier int 11413e12c5d1SDavid du Colombier isenglish(void) 11423e12c5d1SDavid du Colombier { 11433e12c5d1SDavid du Colombier int vow, comm, rare, badpun, punct; 11443e12c5d1SDavid du Colombier char *p; 11453e12c5d1SDavid du Colombier 11463e12c5d1SDavid du Colombier if(guess != Fascii && guess != Feascii) 11473e12c5d1SDavid du Colombier return 0; 11483e12c5d1SDavid du Colombier badpun = 0; 11493e12c5d1SDavid du Colombier punct = 0; 11503e12c5d1SDavid du Colombier for(p = (char *)buf; p < (char *)buf+nbuf-1; p++) 11513e12c5d1SDavid du Colombier switch(*p) { 11523e12c5d1SDavid du Colombier case '.': 11533e12c5d1SDavid du Colombier case ',': 11543e12c5d1SDavid du Colombier case ')': 11553e12c5d1SDavid du Colombier case '%': 11563e12c5d1SDavid du Colombier case ';': 11573e12c5d1SDavid du Colombier case ':': 11583e12c5d1SDavid du Colombier case '?': 11593e12c5d1SDavid du Colombier punct++; 11603e12c5d1SDavid du Colombier if(p[1] != ' ' && p[1] != '\n') 11613e12c5d1SDavid du Colombier badpun++; 11623e12c5d1SDavid du Colombier } 11633e12c5d1SDavid du Colombier if(badpun*5 > punct) 11643e12c5d1SDavid du Colombier return 0; 11653e12c5d1SDavid du Colombier if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e']) /* shell file test */ 11663e12c5d1SDavid du Colombier return 0; 11673e12c5d1SDavid du Colombier if(2*cfreq[';'] > cfreq['e']) 11683e12c5d1SDavid du Colombier return 0; 11693e12c5d1SDavid du Colombier 11703e12c5d1SDavid du Colombier vow = 0; 11713e12c5d1SDavid du Colombier for(p="AEIOU"; *p; p++) { 11723e12c5d1SDavid du Colombier vow += cfreq[*p]; 11733e12c5d1SDavid du Colombier vow += cfreq[tolower(*p)]; 11743e12c5d1SDavid du Colombier } 11753e12c5d1SDavid du Colombier comm = 0; 11763e12c5d1SDavid du Colombier for(p="ETAION"; *p; p++) { 11773e12c5d1SDavid du Colombier comm += cfreq[*p]; 11783e12c5d1SDavid du Colombier comm += cfreq[tolower(*p)]; 11793e12c5d1SDavid du Colombier } 11803e12c5d1SDavid du Colombier rare = 0; 11813e12c5d1SDavid du Colombier for(p="VJKQXZ"; *p; p++) { 11823e12c5d1SDavid du Colombier rare += cfreq[*p]; 11833e12c5d1SDavid du Colombier rare += cfreq[tolower(*p)]; 11843e12c5d1SDavid du Colombier } 11853e12c5d1SDavid du Colombier if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) { 11867dd7cddfSDavid du Colombier print(mime ? PLAIN : "English text\n"); 11873e12c5d1SDavid du Colombier return 1; 11883e12c5d1SDavid du Colombier } 11893e12c5d1SDavid du Colombier return 0; 11903e12c5d1SDavid du Colombier } 11913e12c5d1SDavid du Colombier 11923e12c5d1SDavid du Colombier /* 11933e12c5d1SDavid du Colombier * pick up a number with 11943e12c5d1SDavid du Colombier * syntax _*[0-9]+_ 11953e12c5d1SDavid du Colombier */ 11963e12c5d1SDavid du Colombier #define P9BITLEN 12 11973e12c5d1SDavid du Colombier int 11983e12c5d1SDavid du Colombier p9bitnum(uchar *bp) 11993e12c5d1SDavid du Colombier { 12003e12c5d1SDavid du Colombier int n, c, len; 12013e12c5d1SDavid du Colombier 12023e12c5d1SDavid du Colombier len = P9BITLEN; 12033e12c5d1SDavid du Colombier while(*bp == ' ') { 12043e12c5d1SDavid du Colombier bp++; 12053e12c5d1SDavid du Colombier len--; 12063e12c5d1SDavid du Colombier if(len <= 0) 12073e12c5d1SDavid du Colombier return -1; 12083e12c5d1SDavid du Colombier } 12093e12c5d1SDavid du Colombier n = 0; 12103e12c5d1SDavid du Colombier while(len > 1) { 12113e12c5d1SDavid du Colombier c = *bp++; 12123e12c5d1SDavid du Colombier if(!isdigit(c)) 12133e12c5d1SDavid du Colombier return -1; 12143e12c5d1SDavid du Colombier n = n*10 + c-'0'; 12153e12c5d1SDavid du Colombier len--; 12163e12c5d1SDavid du Colombier } 12173e12c5d1SDavid du Colombier if(*bp != ' ') 12183e12c5d1SDavid du Colombier return -1; 12193e12c5d1SDavid du Colombier return n; 12203e12c5d1SDavid du Colombier } 12213e12c5d1SDavid du Colombier 12223e12c5d1SDavid du Colombier int 12237dd7cddfSDavid du Colombier depthof(char *s, int *newp) 12247dd7cddfSDavid du Colombier { 12257dd7cddfSDavid du Colombier char *es; 12267dd7cddfSDavid du Colombier int d; 12277dd7cddfSDavid du Colombier 12287dd7cddfSDavid du Colombier *newp = 0; 12297dd7cddfSDavid du Colombier es = s+12; 12307dd7cddfSDavid du Colombier while(s<es && *s==' ') 12317dd7cddfSDavid du Colombier s++; 12327dd7cddfSDavid du Colombier if(s == es) 12337dd7cddfSDavid du Colombier return -1; 12347dd7cddfSDavid du Colombier if('0'<=*s && *s<='9') 123516941224SDavid du Colombier return 1<<strtol(s, 0, 0); 12367dd7cddfSDavid du Colombier 12377dd7cddfSDavid du Colombier *newp = 1; 12387dd7cddfSDavid du Colombier d = 0; 12397dd7cddfSDavid du Colombier while(s<es && *s!=' '){ 12407dd7cddfSDavid du Colombier s++; /* skip letter */ 12417dd7cddfSDavid du Colombier d += strtoul(s, &s, 10); 12427dd7cddfSDavid du Colombier } 12437dd7cddfSDavid du Colombier 1244883a8c51SDavid du Colombier if(d % 8 == 0 || 8 % d == 0) 12457dd7cddfSDavid du Colombier return d; 1246883a8c51SDavid du Colombier else 12477dd7cddfSDavid du Colombier return -1; 12487dd7cddfSDavid du Colombier } 12497dd7cddfSDavid du Colombier 12507dd7cddfSDavid du Colombier int 12513e12c5d1SDavid du Colombier isp9bit(void) 12523e12c5d1SDavid du Colombier { 1253883a8c51SDavid du Colombier int dep, lox, loy, hix, hiy, px, new, cmpr; 1254219b2ee8SDavid du Colombier ulong t; 12553e12c5d1SDavid du Colombier long len; 12567dd7cddfSDavid du Colombier char *newlabel; 1257883a8c51SDavid du Colombier uchar *cp; 12583e12c5d1SDavid du Colombier 1259883a8c51SDavid du Colombier cp = buf; 1260883a8c51SDavid du Colombier cmpr = 0; 12617dd7cddfSDavid du Colombier newlabel = "old "; 12627dd7cddfSDavid du Colombier 1263883a8c51SDavid du Colombier if(memcmp(cp, "compressed\n", 11) == 0) { 1264883a8c51SDavid du Colombier cmpr = 1; 1265883a8c51SDavid du Colombier cp = buf + 11; 1266883a8c51SDavid du Colombier } 1267883a8c51SDavid du Colombier 1268883a8c51SDavid du Colombier dep = depthof((char*)cp + 0*P9BITLEN, &new); 12697dd7cddfSDavid du Colombier if(new) 12707dd7cddfSDavid du Colombier newlabel = ""; 1271883a8c51SDavid du Colombier lox = p9bitnum(cp + 1*P9BITLEN); 1272883a8c51SDavid du Colombier loy = p9bitnum(cp + 2*P9BITLEN); 1273883a8c51SDavid du Colombier hix = p9bitnum(cp + 3*P9BITLEN); 1274883a8c51SDavid du Colombier hiy = p9bitnum(cp + 4*P9BITLEN); 12757dd7cddfSDavid du Colombier if(dep < 0 || lox < 0 || loy < 0 || hix < 0 || hiy < 0) 12763e12c5d1SDavid du Colombier return 0; 12773e12c5d1SDavid du Colombier 12787dd7cddfSDavid du Colombier if(dep < 8){ 12797dd7cddfSDavid du Colombier px = 8/dep; /* pixels per byte */ 1280219b2ee8SDavid du Colombier /* set l to number of bytes of data per scan line */ 1281219b2ee8SDavid du Colombier if(lox >= 0) 1282219b2ee8SDavid du Colombier len = (hix+px-1)/px - lox/px; 1283219b2ee8SDavid du Colombier else{ /* make positive before divide */ 1284219b2ee8SDavid du Colombier t = (-lox)+px-1; 1285219b2ee8SDavid du Colombier t = (t/px)*px; 1286219b2ee8SDavid du Colombier len = (t+hix+px-1)/px; 1287219b2ee8SDavid du Colombier } 12887dd7cddfSDavid du Colombier }else 12897dd7cddfSDavid du Colombier len = (hix-lox)*dep/8; 1290883a8c51SDavid du Colombier len *= hiy - loy; /* col length */ 12913e12c5d1SDavid du Colombier len += 5 * P9BITLEN; /* size of initial ascii */ 12923e12c5d1SDavid du Colombier 12933e12c5d1SDavid du Colombier /* 1294883a8c51SDavid du Colombier * for compressed images, don't look any further. otherwise: 1295*0dc12738SDavid du Colombier * for image file, length is non-zero and must match calculation above. 1296*0dc12738SDavid du Colombier * for /dev/window and /dev/screen the length is always zero. 12973e12c5d1SDavid du Colombier * for subfont, the subfont header should follow immediately. 12983e12c5d1SDavid du Colombier */ 1299883a8c51SDavid du Colombier if (cmpr) { 1300883a8c51SDavid du Colombier print(mime ? OCTET : "Compressed %splan 9 image or subfont, depth %d\n", 1301883a8c51SDavid du Colombier newlabel, dep); 1302883a8c51SDavid du Colombier return 1; 1303883a8c51SDavid du Colombier } 1304*0dc12738SDavid du Colombier /* 1305*0dc12738SDavid du Colombier * mbuf->length == 0 probably indicates reading a pipe. 1306*0dc12738SDavid du Colombier * Ghostscript sometimes produces a little extra on the end. 1307*0dc12738SDavid du Colombier */ 1308*0dc12738SDavid du Colombier if (len != 0 && (mbuf->length == 0 || mbuf->length == len || 1309*0dc12738SDavid du Colombier mbuf->length > len && mbuf->length < len+P9BITLEN)) { 1310883a8c51SDavid du Colombier print(mime ? OCTET : "%splan 9 image, depth %d\n", newlabel, dep); 13113e12c5d1SDavid du Colombier return 1; 13123e12c5d1SDavid du Colombier } 13133e12c5d1SDavid du Colombier if (p9subfont(buf+len)) { 1314883a8c51SDavid du Colombier print(mime ? OCTET : "%ssubfont file, depth %d\n", newlabel, dep); 13153e12c5d1SDavid du Colombier return 1; 13163e12c5d1SDavid du Colombier } 13173e12c5d1SDavid du Colombier return 0; 13183e12c5d1SDavid du Colombier } 13193e12c5d1SDavid du Colombier 13203e12c5d1SDavid du Colombier int 13213e12c5d1SDavid du Colombier p9subfont(uchar *p) 13223e12c5d1SDavid du Colombier { 13233e12c5d1SDavid du Colombier int n, h, a; 13243e12c5d1SDavid du Colombier 13257dd7cddfSDavid du Colombier /* if image too big, assume it's a subfont */ 13263e12c5d1SDavid du Colombier if (p+3*P9BITLEN > buf+sizeof(buf)) 13273e12c5d1SDavid du Colombier return 1; 13283e12c5d1SDavid du Colombier 13293e12c5d1SDavid du Colombier n = p9bitnum(p + 0*P9BITLEN); /* char count */ 13303e12c5d1SDavid du Colombier if (n < 0) 13313e12c5d1SDavid du Colombier return 0; 13323e12c5d1SDavid du Colombier h = p9bitnum(p + 1*P9BITLEN); /* height */ 13333e12c5d1SDavid du Colombier if (h < 0) 13343e12c5d1SDavid du Colombier return 0; 13353e12c5d1SDavid du Colombier a = p9bitnum(p + 2*P9BITLEN); /* ascent */ 13363e12c5d1SDavid du Colombier if (a < 0) 13373e12c5d1SDavid du Colombier return 0; 13383e12c5d1SDavid du Colombier return 1; 13393e12c5d1SDavid du Colombier } 13403e12c5d1SDavid du Colombier 13413e12c5d1SDavid du Colombier #define WHITESPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') 13423e12c5d1SDavid du Colombier 13433e12c5d1SDavid du Colombier int 13443e12c5d1SDavid du Colombier isp9font(void) 13453e12c5d1SDavid du Colombier { 13463e12c5d1SDavid du Colombier uchar *cp, *p; 13473e12c5d1SDavid du Colombier int i, n; 13483e12c5d1SDavid du Colombier char pathname[1024]; 13493e12c5d1SDavid du Colombier 13503e12c5d1SDavid du Colombier cp = buf; 13513e12c5d1SDavid du Colombier if (!getfontnum(cp, &cp)) /* height */ 13523e12c5d1SDavid du Colombier return 0; 13533e12c5d1SDavid du Colombier if (!getfontnum(cp, &cp)) /* ascent */ 13543e12c5d1SDavid du Colombier return 0; 13555e492409SDavid du Colombier for (i = 0; cp=(uchar*)strchr((char*)cp, '\n'); i++) { 13563e12c5d1SDavid du Colombier if (!getfontnum(cp, &cp)) /* min */ 13573e12c5d1SDavid du Colombier break; 13583e12c5d1SDavid du Colombier if (!getfontnum(cp, &cp)) /* max */ 13593e12c5d1SDavid du Colombier return 0; 13605e492409SDavid du Colombier getfontnum(cp, &cp); /* optional offset */ 13613e12c5d1SDavid du Colombier while (WHITESPACE(*cp)) 13623e12c5d1SDavid du Colombier cp++; 13633e12c5d1SDavid du Colombier for (p = cp; *cp && !WHITESPACE(*cp); cp++) 13643e12c5d1SDavid du Colombier ; 13653e12c5d1SDavid du Colombier /* construct a path name, if needed */ 13663e12c5d1SDavid du Colombier n = 0; 13673e12c5d1SDavid du Colombier if (*p != '/' && slash) { 13683e12c5d1SDavid du Colombier n = slash-fname+1; 13693e12c5d1SDavid du Colombier if (n < sizeof(pathname)) 13703e12c5d1SDavid du Colombier memcpy(pathname, fname, n); 13713e12c5d1SDavid du Colombier else n = 0; 13723e12c5d1SDavid du Colombier } 13735e492409SDavid du Colombier if (n+cp-p+4 < sizeof(pathname)) { 13743e12c5d1SDavid du Colombier memcpy(pathname+n, p, cp-p); 13753e12c5d1SDavid du Colombier n += cp-p; 13763e12c5d1SDavid du Colombier pathname[n] = 0; 13775e492409SDavid du Colombier if (access(pathname, AEXIST) < 0) { 13785e492409SDavid du Colombier strcpy(pathname+n, ".0"); 13799a747e4fSDavid du Colombier if (access(pathname, AEXIST) < 0) 13803e12c5d1SDavid du Colombier return 0; 13813e12c5d1SDavid du Colombier } 13823e12c5d1SDavid du Colombier } 13835e492409SDavid du Colombier } 13843e12c5d1SDavid du Colombier if (i) { 13858d37e088SDavid du Colombier print(mime ? "text/plain\n" : "font file\n"); 13863e12c5d1SDavid du Colombier return 1; 13873e12c5d1SDavid du Colombier } 13883e12c5d1SDavid du Colombier return 0; 13893e12c5d1SDavid du Colombier } 13903e12c5d1SDavid du Colombier 13913e12c5d1SDavid du Colombier int 13923e12c5d1SDavid du Colombier getfontnum(uchar *cp, uchar **rp) 13933e12c5d1SDavid du Colombier { 13943e12c5d1SDavid du Colombier while (WHITESPACE(*cp)) /* extract ulong delimited by whitespace */ 13953e12c5d1SDavid du Colombier cp++; 13963e12c5d1SDavid du Colombier if (*cp < '0' || *cp > '9') 13973e12c5d1SDavid du Colombier return 0; 13983e12c5d1SDavid du Colombier strtoul((char *)cp, (char **)rp, 0); 13995e492409SDavid du Colombier if (!WHITESPACE(**rp)) { 14005e492409SDavid du Colombier *rp = cp; 14013e12c5d1SDavid du Colombier return 0; 14025e492409SDavid du Colombier } 14033e12c5d1SDavid du Colombier return 1; 14043e12c5d1SDavid du Colombier } 14057dd7cddfSDavid du Colombier 14067dd7cddfSDavid du Colombier int 1407fb7f0c93SDavid du Colombier isrtf(void) 14087dd7cddfSDavid du Colombier { 1409fb7f0c93SDavid du Colombier if(strstr((char *)buf, "\\rtf1")){ 1410f2e8132aSDavid du Colombier print(mime ? "application/rtf\n" : "rich text format\n"); 1411f2e8132aSDavid du Colombier return 1; 1412f2e8132aSDavid du Colombier } 1413f2e8132aSDavid du Colombier return 0; 1414f2e8132aSDavid du Colombier } 1415f2e8132aSDavid du Colombier 1416f2e8132aSDavid du Colombier int 1417f2e8132aSDavid du Colombier ismsdos(void) 1418f2e8132aSDavid du Colombier { 1419f2e8132aSDavid du Colombier if (buf[0] == 0x4d && buf[1] == 0x5a){ 1420f2e8132aSDavid du Colombier print(mime ? "application/x-msdownload\n" : "MSDOS executable\n"); 14217dd7cddfSDavid du Colombier return 1; 14227dd7cddfSDavid du Colombier } 14237dd7cddfSDavid du Colombier return 0; 14247dd7cddfSDavid du Colombier } 1425b7327ca2SDavid du Colombier 1426b7327ca2SDavid du Colombier int 1427b7327ca2SDavid du Colombier iself(void) 1428b7327ca2SDavid du Colombier { 1429ee7057f8SDavid du Colombier static char *cpu[] = { /* NB: incomplete and arbitary list */ 1430b7327ca2SDavid du Colombier [1] "WE32100", 1431b7327ca2SDavid du Colombier [2] "SPARC", 1432b7327ca2SDavid du Colombier [3] "i386", 1433b7327ca2SDavid du Colombier [4] "M68000", 1434b7327ca2SDavid du Colombier [5] "M88000", 1435b7327ca2SDavid du Colombier [6] "i486", 1436b7327ca2SDavid du Colombier [7] "i860", 1437b7327ca2SDavid du Colombier [8] "R3000", 1438b7327ca2SDavid du Colombier [9] "S370", 1439b7327ca2SDavid du Colombier [10] "R4000", 1440b7327ca2SDavid du Colombier [15] "HP-PA", 1441b7327ca2SDavid du Colombier [18] "sparc v8+", 1442b7327ca2SDavid du Colombier [19] "i960", 1443b7327ca2SDavid du Colombier [20] "PPC-32", 1444b7327ca2SDavid du Colombier [21] "PPC-64", 1445b7327ca2SDavid du Colombier [40] "ARM", 1446b7327ca2SDavid du Colombier [41] "Alpha", 1447b7327ca2SDavid du Colombier [43] "sparc v9", 1448ea43b5ecSDavid du Colombier [50] "IA-64", 1449f9247424SDavid du Colombier [62] "AMD64", 1450b7327ca2SDavid du Colombier [75] "VAX", 1451b7327ca2SDavid du Colombier }; 1452ee7057f8SDavid du Colombier static char *type[] = { 1453ee7057f8SDavid du Colombier [1] "relocatable object", 1454ee7057f8SDavid du Colombier [2] "executable", 1455ee7057f8SDavid du Colombier [3] "shared library", 1456ee7057f8SDavid du Colombier [4] "core dump", 1457ee7057f8SDavid du Colombier }; 1458b7327ca2SDavid du Colombier 1459b7327ca2SDavid du Colombier if (memcmp(buf, "\x7fELF", 4) == 0){ 1460b7327ca2SDavid du Colombier if (!mime){ 1461883a8c51SDavid du Colombier int isdifend = 0; 1462b7327ca2SDavid du Colombier int n = (buf[19] << 8) | buf[18]; 14638a2c5ad0SDavid du Colombier char *p = "unknown"; 1464ee7057f8SDavid du Colombier char *t = "unknown"; 14658a2c5ad0SDavid du Colombier 14668a2c5ad0SDavid du Colombier if (n > 0 && n < nelem(cpu) && cpu[n]) 14678a2c5ad0SDavid du Colombier p = cpu[n]; 14688a2c5ad0SDavid du Colombier else { 14698a2c5ad0SDavid du Colombier /* try the other byte order */ 1470883a8c51SDavid du Colombier isdifend = 1; 14718a2c5ad0SDavid du Colombier n = (buf[18] << 8) | buf[19]; 14728a2c5ad0SDavid du Colombier if (n > 0 && n < nelem(cpu) && cpu[n]) 14738a2c5ad0SDavid du Colombier p = cpu[n]; 14748a2c5ad0SDavid du Colombier } 1475883a8c51SDavid du Colombier if(isdifend) 1476883a8c51SDavid du Colombier n = (buf[16]<< 8) | buf[17]; 1477883a8c51SDavid du Colombier else 1478883a8c51SDavid du Colombier n = (buf[17]<< 8) | buf[16]; 1479883a8c51SDavid du Colombier 1480ee7057f8SDavid du Colombier if(n>0 && n < nelem(type) && type[n]) 1481ee7057f8SDavid du Colombier t = type[n]; 1482ee7057f8SDavid du Colombier print("%s ELF %s\n", p, t); 1483b7327ca2SDavid du Colombier } 1484b7327ca2SDavid du Colombier else 1485b7327ca2SDavid du Colombier print("application/x-elf-executable"); 1486b7327ca2SDavid du Colombier return 1; 1487b7327ca2SDavid du Colombier } 1488b7327ca2SDavid du Colombier 1489b7327ca2SDavid du Colombier return 0; 1490b7327ca2SDavid du Colombier } 14910c547597SDavid du Colombier 14920c547597SDavid du Colombier int 14930c547597SDavid du Colombier isface(void) 14940c547597SDavid du Colombier { 14950c547597SDavid du Colombier int i, j, ldepth, l; 14960c547597SDavid du Colombier char *p; 14970c547597SDavid du Colombier 14980c547597SDavid du Colombier ldepth = -1; 14990c547597SDavid du Colombier for(j = 0; j < 3; j++){ 15000c547597SDavid du Colombier for(p = (char*)buf, i=0; i<3; i++){ 15010c547597SDavid du Colombier if(p[0] != '0' || p[1] != 'x') 15020c547597SDavid du Colombier return 0; 15030c547597SDavid du Colombier if(buf[2+8] == ',') 15040c547597SDavid du Colombier l = 2; 15050c547597SDavid du Colombier else if(buf[2+4] == ',') 15060c547597SDavid du Colombier l = 1; 15070c547597SDavid du Colombier else 15080c547597SDavid du Colombier return 0; 15090c547597SDavid du Colombier if(ldepth == -1) 15100c547597SDavid du Colombier ldepth = l; 15110c547597SDavid du Colombier if(l != ldepth) 15120c547597SDavid du Colombier return 0; 15130c547597SDavid du Colombier strtoul(p, &p, 16); 15140c547597SDavid du Colombier if(*p++ != ',') 15150c547597SDavid du Colombier return 0; 15160c547597SDavid du Colombier while(*p == ' ' || *p == '\t') 15170c547597SDavid du Colombier p++; 15180c547597SDavid du Colombier } 15190c547597SDavid du Colombier if (*p++ != '\n') 15200c547597SDavid du Colombier return 0; 15210c547597SDavid du Colombier } 15220c547597SDavid du Colombier 15230c547597SDavid du Colombier if(mime) 15240c547597SDavid du Colombier print("application/x-face\n"); 15250c547597SDavid du Colombier else 15260c547597SDavid du Colombier print("face image depth %d\n", ldepth); 15270c547597SDavid du Colombier return 1; 15280c547597SDavid du Colombier } 15290c547597SDavid du Colombier 1530