1bd389b36SDavid du Colombier #include <u.h>
2bd389b36SDavid du Colombier #include <libc.h>
3bd389b36SDavid du Colombier #include <bio.h>
4bd389b36SDavid du Colombier #include <ctype.h>
5bd389b36SDavid du Colombier #include <mach.h>
63e12c5d1SDavid du Colombier
73e12c5d1SDavid du Colombier /*
83e12c5d1SDavid du Colombier * file - determine type of file
93e12c5d1SDavid du Colombier */
103e12c5d1SDavid du Colombier #define LENDIAN(p) ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24))
113e12c5d1SDavid du Colombier
129a747e4fSDavid du Colombier uchar buf[6001];
133e12c5d1SDavid du Colombier short cfreq[140];
143e12c5d1SDavid du Colombier short wfreq[50];
153e12c5d1SDavid du Colombier int nbuf;
169a747e4fSDavid du Colombier Dir* mbuf;
173e12c5d1SDavid du Colombier int fd;
183e12c5d1SDavid du Colombier char *fname;
193e12c5d1SDavid du Colombier char *slash;
203e12c5d1SDavid du Colombier
213e12c5d1SDavid du Colombier enum
223e12c5d1SDavid du Colombier {
233e12c5d1SDavid du Colombier Cword,
243e12c5d1SDavid du Colombier Fword,
253e12c5d1SDavid du Colombier Aword,
26219b2ee8SDavid du Colombier Alword,
277dd7cddfSDavid du Colombier Lword,
283e12c5d1SDavid du Colombier I1,
293e12c5d1SDavid du Colombier I2,
303e12c5d1SDavid du Colombier I3,
313e12c5d1SDavid du Colombier Clatin = 128,
323e12c5d1SDavid du Colombier Cbinary,
333e12c5d1SDavid du Colombier Cnull,
343e12c5d1SDavid du Colombier Ceascii,
353e12c5d1SDavid du Colombier Cutf,
363e12c5d1SDavid du Colombier };
373e12c5d1SDavid du Colombier struct
383e12c5d1SDavid du Colombier {
393e12c5d1SDavid du Colombier char* word;
403e12c5d1SDavid du Colombier int class;
413e12c5d1SDavid du Colombier } dict[] =
423e12c5d1SDavid du Colombier {
437dd7cddfSDavid du Colombier "PATH", Lword,
44219b2ee8SDavid du Colombier "TEXT", Aword,
45219b2ee8SDavid du Colombier "adt", Alword,
46219b2ee8SDavid du Colombier "aggr", Alword,
47219b2ee8SDavid du Colombier "alef", Alword,
487dd7cddfSDavid du Colombier "array", Lword,
49219b2ee8SDavid du Colombier "block", Fword,
50219b2ee8SDavid du Colombier "char", Cword,
51219b2ee8SDavid du Colombier "common", Fword,
527dd7cddfSDavid du Colombier "con", Lword,
53219b2ee8SDavid du Colombier "data", Fword,
54219b2ee8SDavid du Colombier "dimension", Fword,
55219b2ee8SDavid du Colombier "double", Cword,
56219b2ee8SDavid du Colombier "extern", Cword,
57219b2ee8SDavid du Colombier "bio", I2,
58219b2ee8SDavid du Colombier "float", Cword,
597dd7cddfSDavid du Colombier "fn", Lword,
60219b2ee8SDavid du Colombier "function", Fword,
61219b2ee8SDavid du Colombier "h", I3,
627dd7cddfSDavid du Colombier "implement", Lword,
637dd7cddfSDavid du Colombier "import", Lword,
64219b2ee8SDavid du Colombier "include", I1,
65219b2ee8SDavid du Colombier "int", Cword,
66219b2ee8SDavid du Colombier "integer", Fword,
677dd7cddfSDavid du Colombier "iota", Lword,
68219b2ee8SDavid du Colombier "libc", I2,
69219b2ee8SDavid du Colombier "long", Cword,
707dd7cddfSDavid du Colombier "module", Lword,
71219b2ee8SDavid du Colombier "real", Fword,
727dd7cddfSDavid du Colombier "ref", Lword,
73219b2ee8SDavid du Colombier "register", Cword,
747dd7cddfSDavid du Colombier "self", Lword,
75219b2ee8SDavid du Colombier "short", Cword,
76219b2ee8SDavid du Colombier "static", Cword,
77219b2ee8SDavid du Colombier "stdio", I2,
78219b2ee8SDavid du Colombier "struct", Cword,
79219b2ee8SDavid du Colombier "subroutine", Fword,
80219b2ee8SDavid du Colombier "u", I2,
81219b2ee8SDavid du Colombier "void", Cword,
82219b2ee8SDavid du Colombier };
83219b2ee8SDavid du Colombier
84219b2ee8SDavid du Colombier /* codes for 'mode' field in language structure */
85219b2ee8SDavid du Colombier enum {
86219b2ee8SDavid du Colombier Normal = 0,
87219b2ee8SDavid du Colombier First, /* first entry for language spanning several ranges */
88219b2ee8SDavid du Colombier Multi, /* later entries " " " ... */
89219b2ee8SDavid du Colombier Shared, /* codes used in several languages */
903e12c5d1SDavid du Colombier };
913e12c5d1SDavid du Colombier
923e12c5d1SDavid du Colombier struct
933e12c5d1SDavid du Colombier {
94219b2ee8SDavid du Colombier int mode; /* see enum above */
953e12c5d1SDavid du Colombier int count;
963e12c5d1SDavid du Colombier int low;
973e12c5d1SDavid du Colombier int high;
983e12c5d1SDavid du Colombier char *name;
993e12c5d1SDavid du Colombier
1003e12c5d1SDavid du Colombier } language[] =
1013e12c5d1SDavid du Colombier {
102219b2ee8SDavid du Colombier Normal, 0, 0x0100, 0x01FF, "Extended Latin",
103219b2ee8SDavid du Colombier Normal, 0, 0x0370, 0x03FF, "Greek",
104219b2ee8SDavid du Colombier Normal, 0, 0x0400, 0x04FF, "Cyrillic",
105219b2ee8SDavid du Colombier Normal, 0, 0x0530, 0x058F, "Armenian",
106219b2ee8SDavid du Colombier Normal, 0, 0x0590, 0x05FF, "Hebrew",
107219b2ee8SDavid du Colombier Normal, 0, 0x0600, 0x06FF, "Arabic",
108219b2ee8SDavid du Colombier Normal, 0, 0x0900, 0x097F, "Devanagari",
109219b2ee8SDavid du Colombier Normal, 0, 0x0980, 0x09FF, "Bengali",
110219b2ee8SDavid du Colombier Normal, 0, 0x0A00, 0x0A7F, "Gurmukhi",
111219b2ee8SDavid du Colombier Normal, 0, 0x0A80, 0x0AFF, "Gujarati",
112219b2ee8SDavid du Colombier Normal, 0, 0x0B00, 0x0B7F, "Oriya",
113219b2ee8SDavid du Colombier Normal, 0, 0x0B80, 0x0BFF, "Tamil",
114219b2ee8SDavid du Colombier Normal, 0, 0x0C00, 0x0C7F, "Telugu",
115219b2ee8SDavid du Colombier Normal, 0, 0x0C80, 0x0CFF, "Kannada",
116219b2ee8SDavid du Colombier Normal, 0, 0x0D00, 0x0D7F, "Malayalam",
117219b2ee8SDavid du Colombier Normal, 0, 0x0E00, 0x0E7F, "Thai",
118219b2ee8SDavid du Colombier Normal, 0, 0x0E80, 0x0EFF, "Lao",
119219b2ee8SDavid du Colombier Normal, 0, 0x1000, 0x105F, "Tibetan",
120219b2ee8SDavid du Colombier Normal, 0, 0x10A0, 0x10FF, "Georgian",
121219b2ee8SDavid du Colombier Normal, 0, 0x3040, 0x30FF, "Japanese",
122219b2ee8SDavid du Colombier Normal, 0, 0x3100, 0x312F, "Chinese",
123219b2ee8SDavid du Colombier First, 0, 0x3130, 0x318F, "Korean",
124219b2ee8SDavid du Colombier Multi, 0, 0x3400, 0x3D2F, "Korean",
125219b2ee8SDavid du Colombier Shared, 0, 0x4e00, 0x9fff, "CJK",
126219b2ee8SDavid du Colombier Normal, 0, 0, 0, 0, /* terminal entry */
1273e12c5d1SDavid du Colombier };
1283e12c5d1SDavid du Colombier
1293e12c5d1SDavid du Colombier
1303e12c5d1SDavid du Colombier enum
1313e12c5d1SDavid du Colombier {
1323e12c5d1SDavid du Colombier Fascii, /* printable ascii */
1333e12c5d1SDavid du Colombier Flatin, /* latin 1*/
1345e492409SDavid du Colombier Futf, /* UTF character set */
1353e12c5d1SDavid du Colombier Fbinary, /* binary */
1363e12c5d1SDavid du Colombier Feascii, /* ASCII with control chars */
1373e12c5d1SDavid du Colombier Fnull, /* NULL in file */
1383e12c5d1SDavid du Colombier } guess;
1393e12c5d1SDavid du Colombier
1403e12c5d1SDavid du Colombier void bump_utf_count(Rune);
1417dd7cddfSDavid du Colombier int cistrncmp(char*, char*, int);
1423e12c5d1SDavid du Colombier void filetype(int);
1433e12c5d1SDavid du Colombier int getfontnum(uchar*, uchar**);
1443e12c5d1SDavid du Colombier int isas(void);
1453e12c5d1SDavid du Colombier int isc(void);
1463e12c5d1SDavid du Colombier int iscint(void);
1473e12c5d1SDavid du Colombier int isenglish(void);
1487dd7cddfSDavid du Colombier int ishp(void);
1497dd7cddfSDavid du Colombier int ishtml(void);
1509a747e4fSDavid du Colombier int isrfc822(void);
151d9306527SDavid du Colombier int ismbox(void);
1527dd7cddfSDavid du Colombier int islimbo(void);
1533e12c5d1SDavid du Colombier int ismung(void);
1543e12c5d1SDavid du Colombier int isp9bit(void);
1553e12c5d1SDavid du Colombier int isp9font(void);
156fb7f0c93SDavid du Colombier int isrtf(void);
157f2e8132aSDavid du Colombier int ismsdos(void);
158b7327ca2SDavid du Colombier int iself(void);
1593e12c5d1SDavid du Colombier int istring(void);
1603306492aSDavid du Colombier int isoffstr(void);
161ddb951e3SDavid du Colombier int iff(void);
1623e12c5d1SDavid du Colombier int long0(void);
1633306492aSDavid du Colombier int longoff(void);
1644b30ca09SDavid du Colombier int istar(void);
1650c547597SDavid du Colombier int isface(void);
1660c547597SDavid du Colombier int isexec(void);
1673e12c5d1SDavid du Colombier int p9bitnum(uchar*);
1683e12c5d1SDavid du Colombier int p9subfont(uchar*);
1693e12c5d1SDavid du Colombier void print_utf(void);
1703e12c5d1SDavid du Colombier void type(char*, int);
1713e12c5d1SDavid du Colombier int utf_count(void);
1723e12c5d1SDavid du Colombier void wordfreq(void);
1733e12c5d1SDavid du Colombier
1743e12c5d1SDavid du Colombier int (*call[])(void) =
1753e12c5d1SDavid du Colombier {
1763e12c5d1SDavid du Colombier long0, /* recognizable by first 4 bytes */
1773e12c5d1SDavid du Colombier istring, /* recognizable by first string */
1780c547597SDavid du Colombier iself, /* ELF (foreign) executable */
1790c547597SDavid du Colombier isexec, /* native executables */
180ddb951e3SDavid du Colombier iff, /* interchange file format (strings) */
1813306492aSDavid du Colombier longoff, /* recognizable by 4 bytes at some offset */
1823306492aSDavid du Colombier isoffstr, /* recognizable by string at some offset */
1839a747e4fSDavid du Colombier isrfc822, /* email file */
184d9306527SDavid du Colombier ismbox, /* mail box */
1854b30ca09SDavid du Colombier istar, /* recognizable by tar checksum */
186643074abSDavid du Colombier ishtml, /* html keywords */
187219b2ee8SDavid du Colombier iscint, /* compiler/assembler intermediate */
1887dd7cddfSDavid du Colombier islimbo, /* limbo source */
189219b2ee8SDavid du Colombier isc, /* c & alef compiler key words */
1903e12c5d1SDavid du Colombier isas, /* assembler key words */
1913e12c5d1SDavid du Colombier isp9font, /* plan 9 font */
1927dd7cddfSDavid du Colombier isp9bit, /* plan 9 image (as from /dev/window) */
193fb7f0c93SDavid du Colombier isrtf, /* rich text format */
194f2e8132aSDavid du Colombier ismsdos, /* msdos exe (virus file attachement) */
1950c547597SDavid du Colombier isface, /* ascii face file */
196e5cffcb6SDavid du Colombier
197e5cffcb6SDavid du Colombier /* last resorts */
198e5cffcb6SDavid du Colombier ismung, /* entropy compressed/encrypted */
199e5cffcb6SDavid du Colombier isenglish, /* char frequency English */
2003e12c5d1SDavid du Colombier 0
2013e12c5d1SDavid du Colombier };
2023e12c5d1SDavid du Colombier
2037dd7cddfSDavid du Colombier int mime;
2047dd7cddfSDavid du Colombier
20511a574d2SDavid du Colombier char OCTET[] = "application/octet-stream\n";
20611a574d2SDavid du Colombier char PLAIN[] = "text/plain\n";
2077dd7cddfSDavid du Colombier
2083e12c5d1SDavid du Colombier void
main(int argc,char * argv[])2093e12c5d1SDavid du Colombier main(int argc, char *argv[])
2103e12c5d1SDavid du Colombier {
2113e12c5d1SDavid du Colombier int i, j, maxlen;
2123e12c5d1SDavid du Colombier char *cp;
2133e12c5d1SDavid du Colombier Rune r;
2143e12c5d1SDavid du Colombier
2157dd7cddfSDavid du Colombier ARGBEGIN{
2167dd7cddfSDavid du Colombier case 'm':
2177dd7cddfSDavid du Colombier mime = 1;
2187dd7cddfSDavid du Colombier break;
2197dd7cddfSDavid du Colombier default:
2207dd7cddfSDavid du Colombier fprint(2, "usage: file [-m] [file...]\n");
2217dd7cddfSDavid du Colombier exits("usage");
2227dd7cddfSDavid du Colombier }ARGEND;
2237dd7cddfSDavid du Colombier
2243e12c5d1SDavid du Colombier maxlen = 0;
2257dd7cddfSDavid du Colombier if(mime == 0 || argc > 1){
2267dd7cddfSDavid du Colombier for(i = 0; i < argc; i++) {
2273e12c5d1SDavid du Colombier for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp))
2283e12c5d1SDavid du Colombier ;
2293e12c5d1SDavid du Colombier if(j > maxlen)
2303e12c5d1SDavid du Colombier maxlen = j;
2313e12c5d1SDavid du Colombier }
2327dd7cddfSDavid du Colombier }
2337dd7cddfSDavid du Colombier if (argc <= 0) {
2347dd7cddfSDavid du Colombier if(!mime)
2353e12c5d1SDavid du Colombier print ("stdin: ");
2363e12c5d1SDavid du Colombier filetype(0);
2373e12c5d1SDavid du Colombier }
2383e12c5d1SDavid du Colombier else {
2397dd7cddfSDavid du Colombier for(i = 0; i < argc; i++)
2403e12c5d1SDavid du Colombier type(argv[i], maxlen);
2413e12c5d1SDavid du Colombier }
2423e12c5d1SDavid du Colombier exits(0);
2433e12c5d1SDavid du Colombier }
2443e12c5d1SDavid du Colombier
2453e12c5d1SDavid du Colombier void
type(char * file,int nlen)2463e12c5d1SDavid du Colombier type(char *file, int nlen)
2473e12c5d1SDavid du Colombier {
2483e12c5d1SDavid du Colombier Rune r;
2493e12c5d1SDavid du Colombier int i;
2503e12c5d1SDavid du Colombier char *p;
2513e12c5d1SDavid du Colombier
2527dd7cddfSDavid du Colombier if(nlen > 0){
2533e12c5d1SDavid du Colombier slash = 0;
2543e12c5d1SDavid du Colombier for (i = 0, p = file; *p; i++) {
2553e12c5d1SDavid du Colombier if (*p == '/') /* find rightmost slash */
2563e12c5d1SDavid du Colombier slash = p;
2573e12c5d1SDavid du Colombier p += chartorune(&r, p); /* count runes */
2583e12c5d1SDavid du Colombier }
2593e12c5d1SDavid du Colombier print("%s:%*s",file, nlen-i+1, "");
2607dd7cddfSDavid du Colombier }
2613e12c5d1SDavid du Colombier fname = file;
2623e12c5d1SDavid du Colombier if ((fd = open(file, OREAD)) < 0) {
2633801c5d3SDavid du Colombier print("cannot open: %r\n");
2643e12c5d1SDavid du Colombier return;
2653e12c5d1SDavid du Colombier }
2663e12c5d1SDavid du Colombier filetype(fd);
2673e12c5d1SDavid du Colombier close(fd);
2683e12c5d1SDavid du Colombier }
2693e12c5d1SDavid du Colombier
2703e12c5d1SDavid du Colombier void
filetype(int fd)2713e12c5d1SDavid du Colombier filetype(int fd)
2723e12c5d1SDavid du Colombier {
27382726826SDavid du Colombier Rune r;
274219b2ee8SDavid du Colombier int i, f, n;
275219b2ee8SDavid du Colombier char *p, *eob;
2763e12c5d1SDavid du Colombier
2779a747e4fSDavid du Colombier free(mbuf);
2789a747e4fSDavid du Colombier mbuf = dirfstat(fd);
2799a747e4fSDavid du Colombier if(mbuf == nil){
2809a747e4fSDavid du Colombier print("cannot stat: %r\n");
2813e12c5d1SDavid du Colombier return;
2823e12c5d1SDavid du Colombier }
2839a747e4fSDavid du Colombier if(mbuf->mode & DMDIR) {
2841f533253SDavid du Colombier print(mime ? OCTET : "directory\n");
2853e12c5d1SDavid du Colombier return;
2863e12c5d1SDavid du Colombier }
2879a747e4fSDavid du Colombier if(mbuf->type != 'M' && mbuf->type != '|') {
2889b558a26SDavid du Colombier print(mime ? OCTET : "special file #%C/%s\n",
2899a747e4fSDavid du Colombier mbuf->type, mbuf->name);
2903e12c5d1SDavid du Colombier return;
2913e12c5d1SDavid du Colombier }
2920dc12738SDavid du Colombier /* may be reading a pipe on standard input */
2930dc12738SDavid du Colombier nbuf = readn(fd, buf, sizeof(buf)-1);
2943e12c5d1SDavid du Colombier if(nbuf < 0) {
2953801c5d3SDavid du Colombier print("cannot read: %r\n");
2963e12c5d1SDavid du Colombier return;
2973e12c5d1SDavid du Colombier }
2983e12c5d1SDavid du Colombier if(nbuf == 0) {
2997dd7cddfSDavid du Colombier print(mime ? PLAIN : "empty file\n");
3003e12c5d1SDavid du Colombier return;
3013e12c5d1SDavid du Colombier }
3029a747e4fSDavid du Colombier buf[nbuf] = 0;
3033e12c5d1SDavid du Colombier
3043e12c5d1SDavid du Colombier /*
3053e12c5d1SDavid du Colombier * build histogram table
3063e12c5d1SDavid du Colombier */
3073e12c5d1SDavid du Colombier memset(cfreq, 0, sizeof(cfreq));
3083e12c5d1SDavid du Colombier for (i = 0; language[i].name; i++)
3093e12c5d1SDavid du Colombier language[i].count = 0;
310219b2ee8SDavid du Colombier eob = (char *)buf+nbuf;
311219b2ee8SDavid du Colombier for(n = 0, p = (char *)buf; p < eob; n++) {
31282726826SDavid du Colombier if (!fullrune(p, eob-p) && eob-p < UTFmax)
313219b2ee8SDavid du Colombier break;
31482726826SDavid du Colombier p += chartorune(&r, p);
3153e12c5d1SDavid du Colombier if (r == 0)
3163e12c5d1SDavid du Colombier f = Cnull;
3173e12c5d1SDavid du Colombier else if (r <= 0x7f) {
3183e12c5d1SDavid du Colombier if (!isprint(r) && !isspace(r))
3193e12c5d1SDavid du Colombier f = Ceascii; /* ASCII control char */
3203e12c5d1SDavid du Colombier else f = r;
3215e492409SDavid du Colombier } else if (r == 0x80) {
322219b2ee8SDavid du Colombier bump_utf_count(r);
323219b2ee8SDavid du Colombier f = Cutf;
3243e12c5d1SDavid du Colombier } else if (r < 0xA0)
3253e12c5d1SDavid du Colombier f = Cbinary; /* Invalid Runes */
3263e12c5d1SDavid du Colombier else if (r <= 0xff)
3273e12c5d1SDavid du Colombier f = Clatin; /* Latin 1 */
3283e12c5d1SDavid du Colombier else {
3293e12c5d1SDavid du Colombier bump_utf_count(r);
3303e12c5d1SDavid du Colombier f = Cutf; /* UTF extension */
3313e12c5d1SDavid du Colombier }
3323e12c5d1SDavid du Colombier cfreq[f]++; /* ASCII chars peg directly */
3333e12c5d1SDavid du Colombier }
3343e12c5d1SDavid du Colombier /*
3353e12c5d1SDavid du Colombier * gross classify
3363e12c5d1SDavid du Colombier */
3373e12c5d1SDavid du Colombier if (cfreq[Cbinary])
3383e12c5d1SDavid du Colombier guess = Fbinary;
3393e12c5d1SDavid du Colombier else if (cfreq[Cutf])
3403e12c5d1SDavid du Colombier guess = Futf;
3413e12c5d1SDavid du Colombier else if (cfreq[Clatin])
3423e12c5d1SDavid du Colombier guess = Flatin;
3433e12c5d1SDavid du Colombier else if (cfreq[Ceascii])
3443e12c5d1SDavid du Colombier guess = Feascii;
3455e492409SDavid du Colombier else if (cfreq[Cnull])
3463306492aSDavid du Colombier guess = Fbinary;
3475e492409SDavid du Colombier else
3485e492409SDavid du Colombier guess = Fascii;
3493e12c5d1SDavid du Colombier /*
3503e12c5d1SDavid du Colombier * lookup dictionary words
3513e12c5d1SDavid du Colombier */
352219b2ee8SDavid du Colombier memset(wfreq, 0, sizeof(wfreq));
3537dd7cddfSDavid du Colombier if(guess == Fascii || guess == Flatin || guess == Futf)
3543e12c5d1SDavid du Colombier wordfreq();
3553e12c5d1SDavid du Colombier /*
3563e12c5d1SDavid du Colombier * call individual classify routines
3573e12c5d1SDavid du Colombier */
3583e12c5d1SDavid du Colombier for(i=0; call[i]; i++)
3593e12c5d1SDavid du Colombier if((*call[i])())
3603e12c5d1SDavid du Colombier return;
3613e12c5d1SDavid du Colombier
3623e12c5d1SDavid du Colombier /*
3633e12c5d1SDavid du Colombier * if all else fails,
3643e12c5d1SDavid du Colombier * print out gross classification
3653e12c5d1SDavid du Colombier */
36680ee5cbfSDavid du Colombier if (nbuf < 100 && !mime)
3677dd7cddfSDavid du Colombier print(mime ? PLAIN : "short ");
3683e12c5d1SDavid du Colombier if (guess == Fascii)
3697dd7cddfSDavid du Colombier print(mime ? PLAIN : "Ascii\n");
3703e12c5d1SDavid du Colombier else if (guess == Feascii)
3717dd7cddfSDavid du Colombier print(mime ? PLAIN : "extended ascii\n");
3723e12c5d1SDavid du Colombier else if (guess == Flatin)
3737dd7cddfSDavid du Colombier print(mime ? PLAIN : "latin ascii\n");
3743e12c5d1SDavid du Colombier else if (guess == Futf && utf_count() < 4)
3753e12c5d1SDavid du Colombier print_utf();
3767dd7cddfSDavid du Colombier else print(mime ? OCTET : "binary\n");
3773e12c5d1SDavid du Colombier }
3783e12c5d1SDavid du Colombier
3793e12c5d1SDavid du Colombier void
bump_utf_count(Rune r)3803e12c5d1SDavid du Colombier bump_utf_count(Rune r)
3813e12c5d1SDavid du Colombier {
3823e12c5d1SDavid du Colombier int low, high, mid;
3833e12c5d1SDavid du Colombier
3843e12c5d1SDavid du Colombier high = sizeof(language)/sizeof(language[0])-1;
3853e12c5d1SDavid du Colombier for (low = 0; low < high;) {
3863e12c5d1SDavid du Colombier mid = (low+high)/2;
3873e12c5d1SDavid du Colombier if (r >= language[mid].low) {
3883e12c5d1SDavid du Colombier if (r <= language[mid].high) {
3893e12c5d1SDavid du Colombier language[mid].count++;
3903e12c5d1SDavid du Colombier break;
3913e12c5d1SDavid du Colombier } else low = mid+1;
3923e12c5d1SDavid du Colombier } else high = mid;
3933e12c5d1SDavid du Colombier }
3943e12c5d1SDavid du Colombier }
3953e12c5d1SDavid du Colombier
3963e12c5d1SDavid du Colombier int
utf_count(void)3973e12c5d1SDavid du Colombier utf_count(void)
3983e12c5d1SDavid du Colombier {
3993e12c5d1SDavid du Colombier int i, count;
4003e12c5d1SDavid du Colombier
401219b2ee8SDavid du Colombier count = 0;
402219b2ee8SDavid du Colombier for (i = 0; language[i].name; i++)
4033e12c5d1SDavid du Colombier if (language[i].count > 0)
404219b2ee8SDavid du Colombier switch (language[i].mode) {
405219b2ee8SDavid du Colombier case Normal:
406219b2ee8SDavid du Colombier case First:
4073e12c5d1SDavid du Colombier count++;
408219b2ee8SDavid du Colombier break;
409219b2ee8SDavid du Colombier default:
410219b2ee8SDavid du Colombier break;
411219b2ee8SDavid du Colombier }
4123e12c5d1SDavid du Colombier return count;
4133e12c5d1SDavid du Colombier }
4143e12c5d1SDavid du Colombier
415219b2ee8SDavid du Colombier int
chkascii(void)416219b2ee8SDavid du Colombier chkascii(void)
417219b2ee8SDavid du Colombier {
418219b2ee8SDavid du Colombier int i;
419219b2ee8SDavid du Colombier
420219b2ee8SDavid du Colombier for (i = 'a'; i < 'z'; i++)
421219b2ee8SDavid du Colombier if (cfreq[i])
422219b2ee8SDavid du Colombier return 1;
423219b2ee8SDavid du Colombier for (i = 'A'; i < 'Z'; i++)
424219b2ee8SDavid du Colombier if (cfreq[i])
425219b2ee8SDavid du Colombier return 1;
426219b2ee8SDavid du Colombier return 0;
427219b2ee8SDavid du Colombier }
428219b2ee8SDavid du Colombier
429219b2ee8SDavid du Colombier int
find_first(char * name)430219b2ee8SDavid du Colombier find_first(char *name)
431219b2ee8SDavid du Colombier {
432219b2ee8SDavid du Colombier int i;
433219b2ee8SDavid du Colombier
434219b2ee8SDavid du Colombier for (i = 0; language[i].name != 0; i++)
435219b2ee8SDavid du Colombier if (language[i].mode == First
436219b2ee8SDavid du Colombier && strcmp(language[i].name, name) == 0)
437219b2ee8SDavid du Colombier return i;
438219b2ee8SDavid du Colombier return -1;
439219b2ee8SDavid du Colombier }
440219b2ee8SDavid du Colombier
4413e12c5d1SDavid du Colombier void
print_utf(void)4423e12c5d1SDavid du Colombier print_utf(void)
4433e12c5d1SDavid du Colombier {
444219b2ee8SDavid du Colombier int i, printed, j;
4453e12c5d1SDavid du Colombier
4467dd7cddfSDavid du Colombier if(mime){
4477dd7cddfSDavid du Colombier print(PLAIN);
4487dd7cddfSDavid du Colombier return;
4497dd7cddfSDavid du Colombier }
450219b2ee8SDavid du Colombier if (chkascii()) {
451219b2ee8SDavid du Colombier printed = 1;
452219b2ee8SDavid du Colombier print("Ascii");
453219b2ee8SDavid du Colombier } else
454219b2ee8SDavid du Colombier printed = 0;
455219b2ee8SDavid du Colombier for (i = 0; language[i].name; i++)
4563e12c5d1SDavid du Colombier if (language[i].count) {
457219b2ee8SDavid du Colombier switch(language[i].mode) {
458219b2ee8SDavid du Colombier case Multi:
459219b2ee8SDavid du Colombier j = find_first(language[i].name);
460219b2ee8SDavid du Colombier if (j < 0)
461219b2ee8SDavid du Colombier break;
462219b2ee8SDavid du Colombier if (language[j].count > 0)
463219b2ee8SDavid du Colombier break;
464219b2ee8SDavid du Colombier /* Fall through */
465219b2ee8SDavid du Colombier case Normal:
466219b2ee8SDavid du Colombier case First:
4673e12c5d1SDavid du Colombier if (printed)
4683e12c5d1SDavid du Colombier print(" & ");
4693e12c5d1SDavid du Colombier else printed = 1;
4703e12c5d1SDavid du Colombier print("%s", language[i].name);
471219b2ee8SDavid du Colombier break;
472219b2ee8SDavid du Colombier case Shared:
473219b2ee8SDavid du Colombier default:
474219b2ee8SDavid du Colombier break;
475219b2ee8SDavid du Colombier }
4763e12c5d1SDavid du Colombier }
4773e12c5d1SDavid du Colombier if(!printed)
4783e12c5d1SDavid du Colombier print("UTF");
4793e12c5d1SDavid du Colombier print(" text\n");
4803e12c5d1SDavid du Colombier }
4813e12c5d1SDavid du Colombier
4823e12c5d1SDavid du Colombier void
wordfreq(void)4833e12c5d1SDavid du Colombier wordfreq(void)
4843e12c5d1SDavid du Colombier {
485219b2ee8SDavid du Colombier int low, high, mid, r;
486219b2ee8SDavid du Colombier uchar *p, *p2, c;
4873e12c5d1SDavid du Colombier
488219b2ee8SDavid du Colombier p = buf;
489219b2ee8SDavid du Colombier for(;;) {
490219b2ee8SDavid du Colombier while (p < buf+nbuf && !isalpha(*p))
491219b2ee8SDavid du Colombier p++;
492219b2ee8SDavid du Colombier if (p >= buf+nbuf)
493219b2ee8SDavid du Colombier return;
494219b2ee8SDavid du Colombier p2 = p;
495219b2ee8SDavid du Colombier while(p < buf+nbuf && isalpha(*p))
496219b2ee8SDavid du Colombier p++;
497219b2ee8SDavid du Colombier c = *p;
498219b2ee8SDavid du Colombier *p = 0;
4993e12c5d1SDavid du Colombier high = sizeof(dict)/sizeof(dict[0]);
5003e12c5d1SDavid du Colombier for(low = 0;low < high;) {
5013e12c5d1SDavid du Colombier mid = (low+high)/2;
502219b2ee8SDavid du Colombier r = strcmp(dict[mid].word, (char*)p2);
503219b2ee8SDavid du Colombier if(r == 0) {
5043e12c5d1SDavid du Colombier wfreq[dict[mid].class]++;
5053e12c5d1SDavid du Colombier break;
5063e12c5d1SDavid du Colombier }
507219b2ee8SDavid du Colombier if(r < 0)
5083e12c5d1SDavid du Colombier low = mid+1;
5093e12c5d1SDavid du Colombier else
5103e12c5d1SDavid du Colombier high = mid;
5113e12c5d1SDavid du Colombier }
512219b2ee8SDavid du Colombier *p++ = c;
5133e12c5d1SDavid du Colombier }
5143e12c5d1SDavid du Colombier }
5153e12c5d1SDavid du Colombier
5169a747e4fSDavid du Colombier typedef struct Filemagic Filemagic;
5179a747e4fSDavid du Colombier struct Filemagic {
5189a747e4fSDavid du Colombier ulong x;
5199a747e4fSDavid du Colombier ulong mask;
5209a747e4fSDavid du Colombier char *desc;
5219a747e4fSDavid du Colombier char *mime;
5229a747e4fSDavid du Colombier };
5239a747e4fSDavid du Colombier
5243306492aSDavid du Colombier /*
5253306492aSDavid du Colombier * integers in this table must be as seen on a little-endian machine
5263306492aSDavid du Colombier * when read from a file.
5273306492aSDavid du Colombier */
5289a747e4fSDavid du Colombier Filemagic long0tab[] = {
5299a747e4fSDavid du Colombier 0xF16DF16D, 0xFFFFFFFF, "pac1 audio file\n", OCTET,
5303306492aSDavid du Colombier /* "pac1" */
5319a747e4fSDavid du Colombier 0x31636170, 0xFFFFFFFF, "pac3 audio file\n", OCTET,
5323306492aSDavid du Colombier /* "pXc2 */
5333306492aSDavid du Colombier 0x32630070, 0xFFFF00FF, "pac4 audio file\n", OCTET,
5349a747e4fSDavid du Colombier 0xBA010000, 0xFFFFFFFF, "mpeg system stream\n", OCTET,
535e24e4987SDavid du Colombier 0x43614c66, 0xFFFFFFFF, "FLAC audio file\n", OCTET,
5369a747e4fSDavid du Colombier 0x30800CC0, 0xFFFFFFFF, "inferno .dis executable\n", OCTET,
537fb7f0c93SDavid du Colombier 0x04034B50, 0xFFFFFFFF, "zip archive\n", "application/zip",
5389a747e4fSDavid du Colombier 070707, 0xFFFF, "cpio archive\n", OCTET,
539fb7f0c93SDavid du Colombier 0x2F7, 0xFFFF, "tex dvi\n", "application/dvi",
5409552e201SDavid du Colombier 0xfaff, 0xfeff, "mp3 audio\n", "audio/mpeg",
541*28684b1dSDavid du Colombier 0xf0ff, 0xf6ff, "aac audio\n", "audio/mpeg",
542ee7057f8SDavid du Colombier 0xfeff0000, 0xffffffff, "utf-32be\n", "text/plain charset=utf-32be",
543ee7057f8SDavid du Colombier 0xfffe, 0xffffffff, "utf-32le\n", "text/plain charset=utf-32le",
544ee7057f8SDavid du Colombier 0xfeff, 0xffff, "utf-16be\n", "text/plain charset=utf-16be",
545ee7057f8SDavid du Colombier 0xfffe, 0xffff, "utf-16le\n", "text/plain charset=utf-16le",
5469b558a26SDavid du Colombier /* 0xfeedface: this could alternately be a Next Plan 9 boot image */
5479b558a26SDavid du Colombier 0xcefaedfe, 0xFFFFFFFF, "32-bit power Mach-O executable\n", OCTET,
5489b558a26SDavid du Colombier /* 0xfeedfacf */
5499b558a26SDavid du Colombier 0xcffaedfe, 0xFFFFFFFF, "64-bit power Mach-O executable\n", OCTET,
5509b558a26SDavid du Colombier /* 0xcefaedfe */
5519b558a26SDavid du Colombier 0xfeedface, 0xFFFFFFFF, "386 Mach-O executable\n", OCTET,
5529b558a26SDavid du Colombier /* 0xcffaedfe */
5539b558a26SDavid du Colombier 0xfeedfacf, 0xFFFFFFFF, "amd64 Mach-O executable\n", OCTET,
5549b558a26SDavid du Colombier /* 0xcafebabe */
5559b558a26SDavid du Colombier 0xbebafeca, 0xFFFFFFFF, "Mach-O universal executable\n", OCTET,
5563306492aSDavid du Colombier /*
5570fc59513SDavid du Colombier * these magic numbers are stored big-endian on disk,
5583306492aSDavid du Colombier * thus the numbers appear reversed in this table.
5593306492aSDavid du Colombier */
5603306492aSDavid du Colombier 0xad4e5cd1, 0xFFFFFFFF, "venti arena\n", OCTET,
5610fc59513SDavid du Colombier 0x2bb19a52, 0xFFFFFFFF, "paq archive\n", OCTET,
5629a747e4fSDavid du Colombier };
5639a747e4fSDavid du Colombier
5649a747e4fSDavid du Colombier int
filemagic(Filemagic * tab,int ntab,ulong x)5659a747e4fSDavid du Colombier filemagic(Filemagic *tab, int ntab, ulong x)
5669a747e4fSDavid du Colombier {
5679a747e4fSDavid du Colombier int i;
5689a747e4fSDavid du Colombier
5699a747e4fSDavid du Colombier for(i=0; i<ntab; i++)
5709a747e4fSDavid du Colombier if((x&tab[i].mask) == tab[i].x){
5719a747e4fSDavid du Colombier print(mime ? tab[i].mime : tab[i].desc);
5729a747e4fSDavid du Colombier return 1;
5739a747e4fSDavid du Colombier }
5749a747e4fSDavid du Colombier return 0;
5759a747e4fSDavid du Colombier }
5769a747e4fSDavid du Colombier
5773e12c5d1SDavid du Colombier int
long0(void)5783e12c5d1SDavid du Colombier long0(void)
5793e12c5d1SDavid du Colombier {
5803306492aSDavid du Colombier return filemagic(long0tab, nelem(long0tab), LENDIAN(buf));
5813306492aSDavid du Colombier }
5823e12c5d1SDavid du Colombier
5833306492aSDavid du Colombier typedef struct Fileoffmag Fileoffmag;
5843306492aSDavid du Colombier struct Fileoffmag {
5853306492aSDavid du Colombier ulong off;
5863306492aSDavid du Colombier Filemagic;
5873306492aSDavid du Colombier };
5883306492aSDavid du Colombier
5893306492aSDavid du Colombier /*
5903306492aSDavid du Colombier * integers in this table must be as seen on a little-endian machine
5913306492aSDavid du Colombier * when read from a file.
5923306492aSDavid du Colombier */
5933306492aSDavid du Colombier Fileoffmag longofftab[] = {
5943306492aSDavid du Colombier /*
5950fc59513SDavid du Colombier * these magic numbers are stored big-endian on disk,
5963306492aSDavid du Colombier * thus the numbers appear reversed in this table.
5973306492aSDavid du Colombier */
5983306492aSDavid du Colombier 256*1024, 0xe7a5e4a9, 0xFFFFFFFF, "venti arenas partition\n", OCTET,
5993306492aSDavid du Colombier 256*1024, 0xc75e5cd1, 0xFFFFFFFF, "venti index section\n", OCTET,
6003306492aSDavid du Colombier 128*1024, 0x89ae7637, 0xFFFFFFFF, "fossil write buffer\n", OCTET,
60138bb6201SDavid du Colombier 4, 0x31647542, 0xFFFFFFFF, "OS X finder properties\n", OCTET,
6023306492aSDavid du Colombier };
6033306492aSDavid du Colombier
6043306492aSDavid du Colombier int
fileoffmagic(Fileoffmag * tab,int ntab)6053306492aSDavid du Colombier fileoffmagic(Fileoffmag *tab, int ntab)
6063306492aSDavid du Colombier {
6073306492aSDavid du Colombier int i;
6083306492aSDavid du Colombier ulong x;
6093306492aSDavid du Colombier Fileoffmag *tp;
6103306492aSDavid du Colombier uchar buf[sizeof(long)];
6113306492aSDavid du Colombier
6123306492aSDavid du Colombier for(i=0; i<ntab; i++) {
6133306492aSDavid du Colombier tp = tab + i;
6143306492aSDavid du Colombier seek(fd, tp->off, 0);
6150dc12738SDavid du Colombier if (readn(fd, buf, sizeof buf) != sizeof buf)
6163306492aSDavid du Colombier continue;
6170c547597SDavid du Colombier x = LENDIAN(buf);
6183306492aSDavid du Colombier if((x&tp->mask) == tp->x){
6193306492aSDavid du Colombier print(mime? tp->mime: tp->desc);
6200c547597SDavid du Colombier return 1;
6213306492aSDavid du Colombier }
6223306492aSDavid du Colombier }
6230c547597SDavid du Colombier return 0;
6240c547597SDavid du Colombier }
6250c547597SDavid du Colombier
6260c547597SDavid du Colombier int
longoff(void)6273306492aSDavid du Colombier longoff(void)
6283306492aSDavid du Colombier {
6293306492aSDavid du Colombier return fileoffmagic(longofftab, nelem(longofftab));
6303306492aSDavid du Colombier }
6313306492aSDavid du Colombier
6323306492aSDavid du Colombier int
isexec(void)6330c547597SDavid du Colombier isexec(void)
6340c547597SDavid du Colombier {
6350c547597SDavid du Colombier Fhdr f;
6360c547597SDavid du Colombier
6373e12c5d1SDavid du Colombier seek(fd, 0, 0); /* reposition to start of file */
6383e12c5d1SDavid du Colombier if(crackhdr(fd, &f)) {
6397dd7cddfSDavid du Colombier print(mime ? OCTET : "%s\n", f.name);
6403e12c5d1SDavid du Colombier return 1;
6413e12c5d1SDavid du Colombier }
6427dd7cddfSDavid du Colombier return 0;
6437dd7cddfSDavid du Colombier }
6443e12c5d1SDavid du Colombier
6450c547597SDavid du Colombier
6464b30ca09SDavid du Colombier /* from tar.c */
6474b30ca09SDavid du Colombier enum { NAMSIZ = 100, TBLOCK = 512 };
6484b30ca09SDavid du Colombier
6494b30ca09SDavid du Colombier union hblock
6504b30ca09SDavid du Colombier {
6514b30ca09SDavid du Colombier char dummy[TBLOCK];
6524b30ca09SDavid du Colombier struct header
6534b30ca09SDavid du Colombier {
6544b30ca09SDavid du Colombier char name[NAMSIZ];
6554b30ca09SDavid du Colombier char mode[8];
6564b30ca09SDavid du Colombier char uid[8];
6574b30ca09SDavid du Colombier char gid[8];
6584b30ca09SDavid du Colombier char size[12];
6594b30ca09SDavid du Colombier char mtime[12];
6604b30ca09SDavid du Colombier char chksum[8];
6614b30ca09SDavid du Colombier char linkflag;
6624b30ca09SDavid du Colombier char linkname[NAMSIZ];
6634b30ca09SDavid du Colombier /* rest are defined by POSIX's ustar format; see p1003.2b */
6644b30ca09SDavid du Colombier char magic[6]; /* "ustar" */
6654b30ca09SDavid du Colombier char version[2];
6664b30ca09SDavid du Colombier char uname[32];
6674b30ca09SDavid du Colombier char gname[32];
6684b30ca09SDavid du Colombier char devmajor[8];
6694b30ca09SDavid du Colombier char devminor[8];
6704b30ca09SDavid du Colombier char prefix[155]; /* if non-null, path = prefix "/" name */
6714b30ca09SDavid du Colombier } dbuf;
6724b30ca09SDavid du Colombier };
6734b30ca09SDavid du Colombier
6744b30ca09SDavid du Colombier int
checksum(union hblock * hp)6754b30ca09SDavid du Colombier checksum(union hblock *hp)
6764b30ca09SDavid du Colombier {
6774b30ca09SDavid du Colombier int i;
6784b30ca09SDavid du Colombier char *cp;
6794b30ca09SDavid du Colombier struct header *hdr = &hp->dbuf;
6804b30ca09SDavid du Colombier
6814b30ca09SDavid du Colombier for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++)
6824b30ca09SDavid du Colombier *cp = ' ';
6834b30ca09SDavid du Colombier i = 0;
6844b30ca09SDavid du Colombier for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++)
6854b30ca09SDavid du Colombier i += *cp & 0xff;
6864b30ca09SDavid du Colombier return i;
6874b30ca09SDavid du Colombier }
6884b30ca09SDavid du Colombier
6894b30ca09SDavid du Colombier int
istar(void)6904b30ca09SDavid du Colombier istar(void)
6914b30ca09SDavid du Colombier {
6924b30ca09SDavid du Colombier int chksum;
6934b30ca09SDavid du Colombier char tblock[TBLOCK];
6944b30ca09SDavid du Colombier union hblock *hp = (union hblock *)tblock;
6954b30ca09SDavid du Colombier struct header *hdr = &hp->dbuf;
6964b30ca09SDavid du Colombier
6974b30ca09SDavid du Colombier seek(fd, 0, 0); /* reposition to start of file */
6984b30ca09SDavid du Colombier if (readn(fd, tblock, sizeof tblock) != sizeof tblock)
6994b30ca09SDavid du Colombier return 0;
7004b30ca09SDavid du Colombier chksum = strtol(hdr->chksum, 0, 8);
7014b30ca09SDavid du Colombier if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
7024b30ca09SDavid du Colombier if (strcmp(hdr->magic, "ustar") == 0)
7034b30ca09SDavid du Colombier print(mime? "application/x-ustar\n":
7044b30ca09SDavid du Colombier "posix tar archive\n");
7054b30ca09SDavid du Colombier else
7064b30ca09SDavid du Colombier print(mime? "application/x-tar\n": "tar archive\n");
7074b30ca09SDavid du Colombier return 1;
7084b30ca09SDavid du Colombier }
7094b30ca09SDavid du Colombier return 0;
7104b30ca09SDavid du Colombier }
7114b30ca09SDavid du Colombier
7123e12c5d1SDavid du Colombier /*
7133e12c5d1SDavid du Colombier * initial words to classify file
7143e12c5d1SDavid du Colombier */
715219b2ee8SDavid du Colombier struct FILE_STRING
716219b2ee8SDavid du Colombier {
7173e12c5d1SDavid du Colombier char *key;
7183e12c5d1SDavid du Colombier char *filetype;
7193e12c5d1SDavid du Colombier int length;
7207dd7cddfSDavid du Colombier char *mime;
7213e12c5d1SDavid du Colombier } file_string[] =
7223e12c5d1SDavid du Colombier {
7237dd7cddfSDavid du Colombier "!<arch>\n__.SYMDEF", "archive random library", 16, "application/octet-stream",
7247dd7cddfSDavid du Colombier "!<arch>\n", "archive", 8, "application/octet-stream",
7257dd7cddfSDavid du Colombier "070707", "cpio archive - ascii header", 6, "application/octet-stream",
7267dd7cddfSDavid du Colombier "#!/bin/rc", "rc executable file", 9, "text/plain",
7277dd7cddfSDavid du Colombier "#!/bin/sh", "sh executable file", 9, "text/plain",
7287dd7cddfSDavid du Colombier "%!", "postscript", 2, "application/postscript",
7297dd7cddfSDavid du Colombier "\004%!", "postscript", 3, "application/postscript",
7307dd7cddfSDavid du Colombier "x T post", "troff output for post", 8, "application/troff",
7317dd7cddfSDavid du Colombier "x T Latin1", "troff output for Latin1", 10, "application/troff",
7327dd7cddfSDavid du Colombier "x T utf", "troff output for UTF", 7, "application/troff",
7337dd7cddfSDavid du Colombier "x T 202", "troff output for 202", 7, "application/troff",
7347dd7cddfSDavid du Colombier "x T aps", "troff output for aps", 7, "application/troff",
7352aedc8c0SDavid du Colombier "x T ", "troff output", 4, "application/troff",
7367dd7cddfSDavid du Colombier "GIF", "GIF image", 3, "image/gif",
7377dd7cddfSDavid du Colombier "\0PC Research, Inc\0", "ghostscript fax file", 18, "application/ghostscript",
73859cc4ca5SDavid du Colombier "%PDF", "PDF", 4, "application/pdf",
7397dd7cddfSDavid du Colombier "<html>\n", "HTML file", 7, "text/html",
7407dd7cddfSDavid du Colombier "<HTML>\n", "HTML file", 7, "text/html",
7417dd7cddfSDavid du Colombier "\111\111\052\000", "tiff", 4, "image/tiff",
7427dd7cddfSDavid du Colombier "\115\115\000\052", "tiff", 4, "image/tiff",
7437dd7cddfSDavid du Colombier "\377\330\377\340", "jpeg", 4, "image/jpeg",
7447dd7cddfSDavid du Colombier "\377\330\377\341", "jpeg", 4, "image/jpeg",
7457dd7cddfSDavid du Colombier "\377\330\377\333", "jpeg", 4, "image/jpeg",
746da51d93aSDavid du Colombier "BM", "bmp", 2, "image/bmp",
7477dd7cddfSDavid du Colombier "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/octet-stream",
748fb7f0c93SDavid du Colombier "<MakerFile ", "FrameMaker file", 11, "application/framemaker",
74911a574d2SDavid du Colombier "\033E\033", "HP PCL printer data", 3, OCTET,
7502aedc8c0SDavid du Colombier "\033&", "HP PCL printer data", 2, OCTET,
751fb7f0c93SDavid du Colombier "\033%-12345X", "HPJCL file", 9, "application/hpjcl",
75248207d97SDavid du Colombier "\033Lua", "Lua bytecode", 4, OCTET,
753ddb951e3SDavid du Colombier "ID3", "mp3 audio with id3", 3, "audio/mpeg",
7547989f6fbSDavid du Colombier "\211PNG", "PNG image", 4, "image/png",
7550c547597SDavid du Colombier "P3\n", "ppm", 3, "image/ppm",
7560c547597SDavid du Colombier "P6\n", "ppm", 3, "image/ppm",
7570c547597SDavid du Colombier "/* XPM */\n", "xbm", 10, "image/xbm",
7587c70c028SDavid du Colombier ".HTML ", "troff -ms input", 6, "text/troff",
7597c70c028SDavid du Colombier ".LP", "troff -ms input", 3, "text/troff",
7607c70c028SDavid du Colombier ".ND", "troff -ms input", 3, "text/troff",
7617c70c028SDavid du Colombier ".PP", "troff -ms input", 3, "text/troff",
7627c70c028SDavid du Colombier ".TL", "troff -ms input", 3, "text/troff",
7637c70c028SDavid du Colombier ".TR", "troff -ms input", 3, "text/troff",
7647c70c028SDavid du Colombier ".TH", "manual page", 3, "text/troff",
7657c70c028SDavid du Colombier ".\\\"", "troff input", 3, "text/troff",
7667c70c028SDavid du Colombier ".de", "troff input", 3, "text/troff",
7677c70c028SDavid du Colombier ".if", "troff input", 3, "text/troff",
7687c70c028SDavid du Colombier ".nr", "troff input", 3, "text/troff",
7697c70c028SDavid du Colombier ".tr", "troff input", 3, "text/troff",
77019a27a12SDavid du Colombier "vac:", "venti score", 4, "text/plain",
7710641ea09SDavid du Colombier "-----BEGIN CERTIFICATE-----\n",
77294aa1c4cSDavid du Colombier "pem certificate", -1, "text/plain",
7730641ea09SDavid du Colombier "-----BEGIN TRUSTED CERTIFICATE-----\n",
77494aa1c4cSDavid du Colombier "pem trusted certificate", -1, "text/plain",
7750641ea09SDavid du Colombier "-----BEGIN X509 CERTIFICATE-----\n",
77694aa1c4cSDavid du Colombier "pem x.509 certificate", -1, "text/plain",
77794aa1c4cSDavid du Colombier "subject=/C=", "pem certificate with header", -1, "text/plain",
77894aa1c4cSDavid du Colombier "process snapshot ", "process snapshot", -1, "application/snapfs",
779175630faSDavid du Colombier "BEGIN:VCARD\r\n", "vCard", 13, "text/directory;profile=vcard",
780175630faSDavid du Colombier "BEGIN:VCARD\n", "vCard", 12, "text/directory;profile=vcard",
7817dd7cddfSDavid du Colombier 0,0,0,0
7823e12c5d1SDavid du Colombier };
7833e12c5d1SDavid du Colombier
7843e12c5d1SDavid du Colombier int
istring(void)7853e12c5d1SDavid du Colombier istring(void)
7863e12c5d1SDavid du Colombier {
78794aa1c4cSDavid du Colombier int i, l;
7883e12c5d1SDavid du Colombier struct FILE_STRING *p;
7893e12c5d1SDavid du Colombier
7903e12c5d1SDavid du Colombier for(p = file_string; p->key; p++) {
79194aa1c4cSDavid du Colombier l = p->length;
79294aa1c4cSDavid du Colombier if(l == -1)
79394aa1c4cSDavid du Colombier l = strlen(p->key);
79494aa1c4cSDavid du Colombier if(nbuf >= l && memcmp(buf, p->key, l) == 0) {
7957dd7cddfSDavid du Colombier if(mime)
7967dd7cddfSDavid du Colombier print("%s\n", p->mime);
7977dd7cddfSDavid du Colombier else
7983e12c5d1SDavid du Colombier print("%s\n", p->filetype);
7993e12c5d1SDavid du Colombier return 1;
8003e12c5d1SDavid du Colombier }
8013e12c5d1SDavid du Colombier }
8023e12c5d1SDavid du Colombier if(strncmp((char*)buf, "TYPE=", 5) == 0) { /* td */
8033e12c5d1SDavid du Colombier for(i = 5; i < nbuf; i++)
8043e12c5d1SDavid du Colombier if(buf[i] == '\n')
8053e12c5d1SDavid du Colombier break;
8067dd7cddfSDavid du Colombier if(mime)
8077dd7cddfSDavid du Colombier print(OCTET);
8087dd7cddfSDavid du Colombier else
80959cc4ca5SDavid du Colombier print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5);
8103e12c5d1SDavid du Colombier return 1;
8113e12c5d1SDavid du Colombier }
8123e12c5d1SDavid du Colombier return 0;
8133e12c5d1SDavid du Colombier }
8143e12c5d1SDavid du Colombier
8153306492aSDavid du Colombier struct offstr
8163306492aSDavid du Colombier {
8173306492aSDavid du Colombier ulong off;
8183306492aSDavid du Colombier struct FILE_STRING;
8193306492aSDavid du Colombier } offstrs[] = {
8203306492aSDavid du Colombier 32*1024, "\001CD001\001", "ISO9660 CD image", 7, OCTET,
8213306492aSDavid du Colombier 0, 0, 0, 0, 0
8223306492aSDavid du Colombier };
8233306492aSDavid du Colombier
8243306492aSDavid du Colombier int
isoffstr(void)8253306492aSDavid du Colombier isoffstr(void)
8263306492aSDavid du Colombier {
8273306492aSDavid du Colombier int n;
8283306492aSDavid du Colombier char buf[256];
8293306492aSDavid du Colombier struct offstr *p;
8303306492aSDavid du Colombier
8313306492aSDavid du Colombier for(p = offstrs; p->key; p++) {
8323306492aSDavid du Colombier seek(fd, p->off, 0);
8333306492aSDavid du Colombier n = p->length;
8343306492aSDavid du Colombier if (n > sizeof buf)
8353306492aSDavid du Colombier n = sizeof buf;
8360dc12738SDavid du Colombier if (readn(fd, buf, n) != n)
8373306492aSDavid du Colombier continue;
8383306492aSDavid du Colombier if(memcmp(buf, p->key, n) == 0) {
8393306492aSDavid du Colombier if(mime)
8403306492aSDavid du Colombier print("%s\n", p->mime);
8413306492aSDavid du Colombier else
8423306492aSDavid du Colombier print("%s\n", p->filetype);
8433306492aSDavid du Colombier return 1;
8443306492aSDavid du Colombier }
8453306492aSDavid du Colombier }
8463306492aSDavid du Colombier return 0;
8473306492aSDavid du Colombier }
8483306492aSDavid du Colombier
849ddb951e3SDavid du Colombier int
iff(void)850ddb951e3SDavid du Colombier iff(void)
851ddb951e3SDavid du Colombier {
852ddb951e3SDavid du Colombier if (strncmp((char*)buf, "FORM", 4) == 0 &&
853ddb951e3SDavid du Colombier strncmp((char*)buf+8, "AIFF", 4) == 0) {
854ddb951e3SDavid du Colombier print("%s\n", mime? "audio/x-aiff": "aiff audio");
855ddb951e3SDavid du Colombier return 1;
856ddb951e3SDavid du Colombier }
8574eeb7838SDavid du Colombier if (strncmp((char*)buf, "RIFF", 4) == 0) {
8584eeb7838SDavid du Colombier if (strncmp((char*)buf+8, "WAVE", 4) == 0)
8594eeb7838SDavid du Colombier print("%s\n", mime? "audio/wave": "wave audio");
8604eeb7838SDavid du Colombier else if (strncmp((char*)buf+8, "AVI ", 4) == 0)
8614eeb7838SDavid du Colombier print("%s\n", mime? "video/avi": "avi video");
8624eeb7838SDavid du Colombier else
8634eeb7838SDavid du Colombier print("%s\n", mime? "application/octet-stream":
8644eeb7838SDavid du Colombier "riff file");
8654eeb7838SDavid du Colombier return 1;
8664eeb7838SDavid du Colombier }
867ddb951e3SDavid du Colombier return 0;
868ddb951e3SDavid du Colombier }
869ddb951e3SDavid du Colombier
8707dd7cddfSDavid du Colombier char* html_string[] =
8717dd7cddfSDavid du Colombier {
8727dd7cddfSDavid du Colombier "title",
8737dd7cddfSDavid du Colombier "body",
8747dd7cddfSDavid du Colombier "head",
8757dd7cddfSDavid du Colombier "strong",
8767dd7cddfSDavid du Colombier "h1",
8777dd7cddfSDavid du Colombier "h2",
8787dd7cddfSDavid du Colombier "h3",
8797dd7cddfSDavid du Colombier "h4",
8807dd7cddfSDavid du Colombier "h5",
8817dd7cddfSDavid du Colombier "h6",
8827dd7cddfSDavid du Colombier "ul",
8837dd7cddfSDavid du Colombier "li",
8847dd7cddfSDavid du Colombier "dl",
8857dd7cddfSDavid du Colombier "br",
8867dd7cddfSDavid du Colombier "em",
8877dd7cddfSDavid du Colombier 0,
8887dd7cddfSDavid du Colombier };
8897dd7cddfSDavid du Colombier
8907dd7cddfSDavid du Colombier int
ishtml(void)8917dd7cddfSDavid du Colombier ishtml(void)
8927dd7cddfSDavid du Colombier {
8937dd7cddfSDavid du Colombier uchar *p, *q;
8947dd7cddfSDavid du Colombier int i, count;
8957dd7cddfSDavid du Colombier
8967dd7cddfSDavid du Colombier /* compare strings between '<' and '>' to html table */
8977dd7cddfSDavid du Colombier count = 0;
8987dd7cddfSDavid du Colombier p = buf;
8997dd7cddfSDavid du Colombier for(;;) {
9007dd7cddfSDavid du Colombier while (p < buf+nbuf && *p != '<')
9017dd7cddfSDavid du Colombier p++;
9027dd7cddfSDavid du Colombier p++;
9037dd7cddfSDavid du Colombier if (p >= buf+nbuf)
9047dd7cddfSDavid du Colombier break;
9057dd7cddfSDavid du Colombier if(*p == '/')
9067dd7cddfSDavid du Colombier p++;
9077dd7cddfSDavid du Colombier q = p;
9087dd7cddfSDavid du Colombier while(p < buf+nbuf && *p != '>')
9097dd7cddfSDavid du Colombier p++;
9107dd7cddfSDavid du Colombier if (p >= buf+nbuf)
9117dd7cddfSDavid du Colombier break;
9127dd7cddfSDavid du Colombier for(i = 0; html_string[i]; i++) {
9137dd7cddfSDavid du Colombier if(cistrncmp(html_string[i], (char*)q, p-q) == 0) {
9147dd7cddfSDavid du Colombier if(count++ > 4) {
9157dd7cddfSDavid du Colombier print(mime ? "text/html\n" : "HTML file\n");
9167dd7cddfSDavid du Colombier return 1;
9177dd7cddfSDavid du Colombier }
9187dd7cddfSDavid du Colombier break;
9197dd7cddfSDavid du Colombier }
9207dd7cddfSDavid du Colombier }
9217dd7cddfSDavid du Colombier p++;
9227dd7cddfSDavid du Colombier }
9237dd7cddfSDavid du Colombier return 0;
9247dd7cddfSDavid du Colombier }
9257dd7cddfSDavid du Colombier
9269a747e4fSDavid du Colombier char* rfc822_string[] =
9277dd7cddfSDavid du Colombier {
9289a747e4fSDavid du Colombier "from:",
9299a747e4fSDavid du Colombier "date:",
9309a747e4fSDavid du Colombier "to:",
9319a747e4fSDavid du Colombier "subject:",
9329a747e4fSDavid du Colombier "received:",
933d9306527SDavid du Colombier "reply to:",
934d9306527SDavid du Colombier "sender:",
9359a747e4fSDavid du Colombier 0,
9369a747e4fSDavid du Colombier };
9377dd7cddfSDavid du Colombier
9389a747e4fSDavid du Colombier int
isrfc822(void)9399a747e4fSDavid du Colombier isrfc822(void)
9409a747e4fSDavid du Colombier {
9419a747e4fSDavid du Colombier
9429a747e4fSDavid du Colombier char *p, *q, *r;
9439a747e4fSDavid du Colombier int i, count;
9449a747e4fSDavid du Colombier
9459a747e4fSDavid du Colombier count = 0;
9469a747e4fSDavid du Colombier p = (char*)buf;
9479a747e4fSDavid du Colombier for(;;) {
9489a747e4fSDavid du Colombier q = strchr(p, '\n');
9499a747e4fSDavid du Colombier if(q == nil)
9507dd7cddfSDavid du Colombier break;
951d9306527SDavid du Colombier *q = 0;
952d9306527SDavid du Colombier if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){
953d9306527SDavid du Colombier count++;
954d9306527SDavid du Colombier *q = '\n';
955d9306527SDavid du Colombier p = q+1;
956d9306527SDavid du Colombier continue;
957d9306527SDavid du Colombier }
958d9306527SDavid du Colombier *q = '\n';
9599a747e4fSDavid du Colombier if(*p != '\t' && *p != ' '){
9609a747e4fSDavid du Colombier r = strchr(p, ':');
9619a747e4fSDavid du Colombier if(r == 0 || r > q)
9629a747e4fSDavid du Colombier break;
9639a747e4fSDavid du Colombier for(i = 0; rfc822_string[i]; i++) {
9649a747e4fSDavid du Colombier if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){
9659a747e4fSDavid du Colombier count++;
9669a747e4fSDavid du Colombier break;
9677dd7cddfSDavid du Colombier }
9689a747e4fSDavid du Colombier }
9699a747e4fSDavid du Colombier }
9709a747e4fSDavid du Colombier p = q+1;
9719a747e4fSDavid du Colombier }
9729a747e4fSDavid du Colombier if(count >= 3){
9739a747e4fSDavid du Colombier print(mime ? "message/rfc822\n" : "email file\n");
9747dd7cddfSDavid du Colombier return 1;
9757dd7cddfSDavid du Colombier }
9769a747e4fSDavid du Colombier return 0;
9779a747e4fSDavid du Colombier }
9787dd7cddfSDavid du Colombier
9793e12c5d1SDavid du Colombier int
ismbox(void)980d9306527SDavid du Colombier ismbox(void)
981d9306527SDavid du Colombier {
982d9306527SDavid du Colombier char *p, *q;
983d9306527SDavid du Colombier
984d9306527SDavid du Colombier p = (char*)buf;
985d9306527SDavid du Colombier q = strchr(p, '\n');
986d9306527SDavid du Colombier if(q == nil)
987d9306527SDavid du Colombier return 0;
988d9306527SDavid du Colombier *q = 0;
989d9306527SDavid du Colombier if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){
990d9306527SDavid du Colombier print(mime ? "text/plain\n" : "mail box\n");
991d9306527SDavid du Colombier return 1;
992d9306527SDavid du Colombier }
993d9306527SDavid du Colombier *q = '\n';
994d9306527SDavid du Colombier return 0;
995d9306527SDavid du Colombier }
996d9306527SDavid du Colombier
997d9306527SDavid du Colombier int
iscint(void)9983e12c5d1SDavid du Colombier iscint(void)
9993e12c5d1SDavid du Colombier {
1000219b2ee8SDavid du Colombier int type;
1001219b2ee8SDavid du Colombier char *name;
1002219b2ee8SDavid du Colombier Biobuf b;
10033e12c5d1SDavid du Colombier
1004219b2ee8SDavid du Colombier if(Binit(&b, fd, OREAD) == Beof)
10053e12c5d1SDavid du Colombier return 0;
1006219b2ee8SDavid du Colombier seek(fd, 0, 0);
1007219b2ee8SDavid du Colombier type = objtype(&b, &name);
1008219b2ee8SDavid du Colombier if(type < 0)
1009219b2ee8SDavid du Colombier return 0;
10107dd7cddfSDavid du Colombier if(mime)
10117dd7cddfSDavid du Colombier print(OCTET);
10127dd7cddfSDavid du Colombier else
1013219b2ee8SDavid du Colombier print("%s intermediate\n", name);
1014219b2ee8SDavid du Colombier return 1;
10153e12c5d1SDavid du Colombier }
10163e12c5d1SDavid du Colombier
10173e12c5d1SDavid du Colombier int
isc(void)10183e12c5d1SDavid du Colombier isc(void)
10193e12c5d1SDavid du Colombier {
10203e12c5d1SDavid du Colombier int n;
10213e12c5d1SDavid du Colombier
10223e12c5d1SDavid du Colombier n = wfreq[I1];
10233e12c5d1SDavid du Colombier /*
10243e12c5d1SDavid du Colombier * includes
10253e12c5d1SDavid du Colombier */
10263e12c5d1SDavid du Colombier if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
10273e12c5d1SDavid du Colombier goto yes;
1028219b2ee8SDavid du Colombier if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1029219b2ee8SDavid du Colombier goto yes;
10303e12c5d1SDavid du Colombier /*
10313e12c5d1SDavid du Colombier * declarations
10323e12c5d1SDavid du Colombier */
10333e12c5d1SDavid du Colombier if(wfreq[Cword] >= 5 && cfreq[';'] >= 5)
10343e12c5d1SDavid du Colombier goto yes;
10353e12c5d1SDavid du Colombier /*
10363e12c5d1SDavid du Colombier * assignments
10373e12c5d1SDavid du Colombier */
10383e12c5d1SDavid du Colombier if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1)
10393e12c5d1SDavid du Colombier goto yes;
10403e12c5d1SDavid du Colombier return 0;
10413e12c5d1SDavid du Colombier
10423e12c5d1SDavid du Colombier yes:
10437dd7cddfSDavid du Colombier if(mime){
10447dd7cddfSDavid du Colombier print(PLAIN);
10457dd7cddfSDavid du Colombier return 1;
10467dd7cddfSDavid du Colombier }
1047219b2ee8SDavid du Colombier if(wfreq[Alword] > 0)
1048219b2ee8SDavid du Colombier print("alef program\n");
1049219b2ee8SDavid du Colombier else
10503e12c5d1SDavid du Colombier print("c program\n");
10513e12c5d1SDavid du Colombier return 1;
10523e12c5d1SDavid du Colombier }
10533e12c5d1SDavid du Colombier
10543e12c5d1SDavid du Colombier int
islimbo(void)10557dd7cddfSDavid du Colombier islimbo(void)
10567dd7cddfSDavid du Colombier {
10577dd7cddfSDavid du Colombier
10587dd7cddfSDavid du Colombier /*
10597dd7cddfSDavid du Colombier * includes
10607dd7cddfSDavid du Colombier */
10617dd7cddfSDavid du Colombier if(wfreq[Lword] < 4)
10627dd7cddfSDavid du Colombier return 0;
10637dd7cddfSDavid du Colombier print(mime ? PLAIN : "limbo program\n");
10647dd7cddfSDavid du Colombier return 1;
10657dd7cddfSDavid du Colombier }
10667dd7cddfSDavid du Colombier
10677dd7cddfSDavid du Colombier int
isas(void)10683e12c5d1SDavid du Colombier isas(void)
10693e12c5d1SDavid du Colombier {
10703e12c5d1SDavid du Colombier
10713e12c5d1SDavid du Colombier /*
10723e12c5d1SDavid du Colombier * includes
10733e12c5d1SDavid du Colombier */
10743e12c5d1SDavid du Colombier if(wfreq[Aword] < 2)
10753e12c5d1SDavid du Colombier return 0;
10767dd7cddfSDavid du Colombier print(mime ? PLAIN : "as program\n");
10773e12c5d1SDavid du Colombier return 1;
10783e12c5d1SDavid du Colombier }
10793e12c5d1SDavid du Colombier
10803e12c5d1SDavid du Colombier /*
10813e12c5d1SDavid du Colombier * low entropy means encrypted
10823e12c5d1SDavid du Colombier */
10833e12c5d1SDavid du Colombier int
ismung(void)10843e12c5d1SDavid du Colombier ismung(void)
10853e12c5d1SDavid du Colombier {
10863e12c5d1SDavid du Colombier int i, bucket[8];
10873e12c5d1SDavid du Colombier float cs;
10883e12c5d1SDavid du Colombier
10893e12c5d1SDavid du Colombier if(nbuf < 64)
10903e12c5d1SDavid du Colombier return 0;
10913e12c5d1SDavid du Colombier memset(bucket, 0, sizeof(bucket));
109290630c3aSDavid du Colombier for(i=nbuf-64; i<nbuf; i++)
10933e12c5d1SDavid du Colombier bucket[(buf[i]>>5)&07] += 1;
10943e12c5d1SDavid du Colombier
10953e12c5d1SDavid du Colombier cs = 0.;
10963e12c5d1SDavid du Colombier for(i=0; i<8; i++)
10973e12c5d1SDavid du Colombier cs += (bucket[i]-8)*(bucket[i]-8);
10983e12c5d1SDavid du Colombier cs /= 8.;
10993e12c5d1SDavid du Colombier if(cs <= 24.322) {
110090630c3aSDavid du Colombier if(buf[0]==0x1f && buf[1]==0x9d)
11017dd7cddfSDavid du Colombier print(mime ? OCTET : "compressed\n");
11023e12c5d1SDavid du Colombier else
110390630c3aSDavid du Colombier if(buf[0]==0x1f && buf[1]==0x8b)
110490630c3aSDavid du Colombier print(mime ? OCTET : "gzip compressed\n");
110590630c3aSDavid du Colombier else
110690630c3aSDavid du Colombier if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h')
110790630c3aSDavid du Colombier print(mime ? OCTET : "bzip2 compressed\n");
110890630c3aSDavid du Colombier else
11097dd7cddfSDavid du Colombier print(mime ? OCTET : "encrypted\n");
11103e12c5d1SDavid du Colombier return 1;
11113e12c5d1SDavid du Colombier }
11123e12c5d1SDavid du Colombier return 0;
11133e12c5d1SDavid du Colombier }
11143e12c5d1SDavid du Colombier
11153e12c5d1SDavid du Colombier /*
11163e12c5d1SDavid du Colombier * english by punctuation and frequencies
11173e12c5d1SDavid du Colombier */
11183e12c5d1SDavid du Colombier int
isenglish(void)11193e12c5d1SDavid du Colombier isenglish(void)
11203e12c5d1SDavid du Colombier {
11213e12c5d1SDavid du Colombier int vow, comm, rare, badpun, punct;
11223e12c5d1SDavid du Colombier char *p;
11233e12c5d1SDavid du Colombier
11243e12c5d1SDavid du Colombier if(guess != Fascii && guess != Feascii)
11253e12c5d1SDavid du Colombier return 0;
11263e12c5d1SDavid du Colombier badpun = 0;
11273e12c5d1SDavid du Colombier punct = 0;
11283e12c5d1SDavid du Colombier for(p = (char *)buf; p < (char *)buf+nbuf-1; p++)
11293e12c5d1SDavid du Colombier switch(*p) {
11303e12c5d1SDavid du Colombier case '.':
11313e12c5d1SDavid du Colombier case ',':
11323e12c5d1SDavid du Colombier case ')':
11333e12c5d1SDavid du Colombier case '%':
11343e12c5d1SDavid du Colombier case ';':
11353e12c5d1SDavid du Colombier case ':':
11363e12c5d1SDavid du Colombier case '?':
11373e12c5d1SDavid du Colombier punct++;
11383e12c5d1SDavid du Colombier if(p[1] != ' ' && p[1] != '\n')
11393e12c5d1SDavid du Colombier badpun++;
11403e12c5d1SDavid du Colombier }
11413e12c5d1SDavid du Colombier if(badpun*5 > punct)
11423e12c5d1SDavid du Colombier return 0;
11433e12c5d1SDavid du Colombier if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e']) /* shell file test */
11443e12c5d1SDavid du Colombier return 0;
11453e12c5d1SDavid du Colombier if(2*cfreq[';'] > cfreq['e'])
11463e12c5d1SDavid du Colombier return 0;
11473e12c5d1SDavid du Colombier
11483e12c5d1SDavid du Colombier vow = 0;
11493e12c5d1SDavid du Colombier for(p="AEIOU"; *p; p++) {
11503e12c5d1SDavid du Colombier vow += cfreq[*p];
11513e12c5d1SDavid du Colombier vow += cfreq[tolower(*p)];
11523e12c5d1SDavid du Colombier }
11533e12c5d1SDavid du Colombier comm = 0;
11543e12c5d1SDavid du Colombier for(p="ETAION"; *p; p++) {
11553e12c5d1SDavid du Colombier comm += cfreq[*p];
11563e12c5d1SDavid du Colombier comm += cfreq[tolower(*p)];
11573e12c5d1SDavid du Colombier }
11583e12c5d1SDavid du Colombier rare = 0;
11593e12c5d1SDavid du Colombier for(p="VJKQXZ"; *p; p++) {
11603e12c5d1SDavid du Colombier rare += cfreq[*p];
11613e12c5d1SDavid du Colombier rare += cfreq[tolower(*p)];
11623e12c5d1SDavid du Colombier }
11633e12c5d1SDavid du Colombier if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) {
11647dd7cddfSDavid du Colombier print(mime ? PLAIN : "English text\n");
11653e12c5d1SDavid du Colombier return 1;
11663e12c5d1SDavid du Colombier }
11673e12c5d1SDavid du Colombier return 0;
11683e12c5d1SDavid du Colombier }
11693e12c5d1SDavid du Colombier
11703e12c5d1SDavid du Colombier /*
11713e12c5d1SDavid du Colombier * pick up a number with
11723e12c5d1SDavid du Colombier * syntax _*[0-9]+_
11733e12c5d1SDavid du Colombier */
11743e12c5d1SDavid du Colombier #define P9BITLEN 12
11753e12c5d1SDavid du Colombier int
p9bitnum(uchar * bp)11763e12c5d1SDavid du Colombier p9bitnum(uchar *bp)
11773e12c5d1SDavid du Colombier {
11783e12c5d1SDavid du Colombier int n, c, len;
11793e12c5d1SDavid du Colombier
11803e12c5d1SDavid du Colombier len = P9BITLEN;
11813e12c5d1SDavid du Colombier while(*bp == ' ') {
11823e12c5d1SDavid du Colombier bp++;
11833e12c5d1SDavid du Colombier len--;
11843e12c5d1SDavid du Colombier if(len <= 0)
11853e12c5d1SDavid du Colombier return -1;
11863e12c5d1SDavid du Colombier }
11873e12c5d1SDavid du Colombier n = 0;
11883e12c5d1SDavid du Colombier while(len > 1) {
11893e12c5d1SDavid du Colombier c = *bp++;
11903e12c5d1SDavid du Colombier if(!isdigit(c))
11913e12c5d1SDavid du Colombier return -1;
11923e12c5d1SDavid du Colombier n = n*10 + c-'0';
11933e12c5d1SDavid du Colombier len--;
11943e12c5d1SDavid du Colombier }
11953e12c5d1SDavid du Colombier if(*bp != ' ')
11963e12c5d1SDavid du Colombier return -1;
11973e12c5d1SDavid du Colombier return n;
11983e12c5d1SDavid du Colombier }
11993e12c5d1SDavid du Colombier
12003e12c5d1SDavid du Colombier int
depthof(char * s,int * newp)12017dd7cddfSDavid du Colombier depthof(char *s, int *newp)
12027dd7cddfSDavid du Colombier {
12037dd7cddfSDavid du Colombier char *es;
12047dd7cddfSDavid du Colombier int d;
12057dd7cddfSDavid du Colombier
12067dd7cddfSDavid du Colombier *newp = 0;
12077dd7cddfSDavid du Colombier es = s+12;
12087dd7cddfSDavid du Colombier while(s<es && *s==' ')
12097dd7cddfSDavid du Colombier s++;
12107dd7cddfSDavid du Colombier if(s == es)
12117dd7cddfSDavid du Colombier return -1;
12127dd7cddfSDavid du Colombier if('0'<=*s && *s<='9')
121316941224SDavid du Colombier return 1<<strtol(s, 0, 0);
12147dd7cddfSDavid du Colombier
12157dd7cddfSDavid du Colombier *newp = 1;
12167dd7cddfSDavid du Colombier d = 0;
12177dd7cddfSDavid du Colombier while(s<es && *s!=' '){
12187dd7cddfSDavid du Colombier s++; /* skip letter */
12197dd7cddfSDavid du Colombier d += strtoul(s, &s, 10);
12207dd7cddfSDavid du Colombier }
12217dd7cddfSDavid du Colombier
1222883a8c51SDavid du Colombier if(d % 8 == 0 || 8 % d == 0)
12237dd7cddfSDavid du Colombier return d;
1224883a8c51SDavid du Colombier else
12257dd7cddfSDavid du Colombier return -1;
12267dd7cddfSDavid du Colombier }
12277dd7cddfSDavid du Colombier
12287dd7cddfSDavid du Colombier int
isp9bit(void)12293e12c5d1SDavid du Colombier isp9bit(void)
12303e12c5d1SDavid du Colombier {
1231883a8c51SDavid du Colombier int dep, lox, loy, hix, hiy, px, new, cmpr;
1232219b2ee8SDavid du Colombier ulong t;
12333e12c5d1SDavid du Colombier long len;
12347dd7cddfSDavid du Colombier char *newlabel;
1235883a8c51SDavid du Colombier uchar *cp;
12363e12c5d1SDavid du Colombier
1237883a8c51SDavid du Colombier cp = buf;
1238883a8c51SDavid du Colombier cmpr = 0;
12397dd7cddfSDavid du Colombier newlabel = "old ";
12407dd7cddfSDavid du Colombier
1241883a8c51SDavid du Colombier if(memcmp(cp, "compressed\n", 11) == 0) {
1242883a8c51SDavid du Colombier cmpr = 1;
1243883a8c51SDavid du Colombier cp = buf + 11;
1244883a8c51SDavid du Colombier }
1245883a8c51SDavid du Colombier
1246883a8c51SDavid du Colombier dep = depthof((char*)cp + 0*P9BITLEN, &new);
12477dd7cddfSDavid du Colombier if(new)
12487dd7cddfSDavid du Colombier newlabel = "";
1249883a8c51SDavid du Colombier lox = p9bitnum(cp + 1*P9BITLEN);
1250883a8c51SDavid du Colombier loy = p9bitnum(cp + 2*P9BITLEN);
1251883a8c51SDavid du Colombier hix = p9bitnum(cp + 3*P9BITLEN);
1252883a8c51SDavid du Colombier hiy = p9bitnum(cp + 4*P9BITLEN);
12537dd7cddfSDavid du Colombier if(dep < 0 || lox < 0 || loy < 0 || hix < 0 || hiy < 0)
12543e12c5d1SDavid du Colombier return 0;
12553e12c5d1SDavid du Colombier
12567dd7cddfSDavid du Colombier if(dep < 8){
12577dd7cddfSDavid du Colombier px = 8/dep; /* pixels per byte */
1258219b2ee8SDavid du Colombier /* set l to number of bytes of data per scan line */
1259219b2ee8SDavid du Colombier if(lox >= 0)
1260219b2ee8SDavid du Colombier len = (hix+px-1)/px - lox/px;
1261219b2ee8SDavid du Colombier else{ /* make positive before divide */
1262219b2ee8SDavid du Colombier t = (-lox)+px-1;
1263219b2ee8SDavid du Colombier t = (t/px)*px;
1264219b2ee8SDavid du Colombier len = (t+hix+px-1)/px;
1265219b2ee8SDavid du Colombier }
12667dd7cddfSDavid du Colombier }else
12677dd7cddfSDavid du Colombier len = (hix-lox)*dep/8;
1268883a8c51SDavid du Colombier len *= hiy - loy; /* col length */
12693e12c5d1SDavid du Colombier len += 5 * P9BITLEN; /* size of initial ascii */
12703e12c5d1SDavid du Colombier
12713e12c5d1SDavid du Colombier /*
1272883a8c51SDavid du Colombier * for compressed images, don't look any further. otherwise:
12730dc12738SDavid du Colombier * for image file, length is non-zero and must match calculation above.
12740dc12738SDavid du Colombier * for /dev/window and /dev/screen the length is always zero.
12753e12c5d1SDavid du Colombier * for subfont, the subfont header should follow immediately.
12763e12c5d1SDavid du Colombier */
1277883a8c51SDavid du Colombier if (cmpr) {
1278883a8c51SDavid du Colombier print(mime ? OCTET : "Compressed %splan 9 image or subfont, depth %d\n",
1279883a8c51SDavid du Colombier newlabel, dep);
1280883a8c51SDavid du Colombier return 1;
1281883a8c51SDavid du Colombier }
12820dc12738SDavid du Colombier /*
12830dc12738SDavid du Colombier * mbuf->length == 0 probably indicates reading a pipe.
12840dc12738SDavid du Colombier * Ghostscript sometimes produces a little extra on the end.
12850dc12738SDavid du Colombier */
12860dc12738SDavid du Colombier if (len != 0 && (mbuf->length == 0 || mbuf->length == len ||
12870dc12738SDavid du Colombier mbuf->length > len && mbuf->length < len+P9BITLEN)) {
1288883a8c51SDavid du Colombier print(mime ? OCTET : "%splan 9 image, depth %d\n", newlabel, dep);
12893e12c5d1SDavid du Colombier return 1;
12903e12c5d1SDavid du Colombier }
12913e12c5d1SDavid du Colombier if (p9subfont(buf+len)) {
1292883a8c51SDavid du Colombier print(mime ? OCTET : "%ssubfont file, depth %d\n", newlabel, dep);
12933e12c5d1SDavid du Colombier return 1;
12943e12c5d1SDavid du Colombier }
12953e12c5d1SDavid du Colombier return 0;
12963e12c5d1SDavid du Colombier }
12973e12c5d1SDavid du Colombier
12983e12c5d1SDavid du Colombier int
p9subfont(uchar * p)12993e12c5d1SDavid du Colombier p9subfont(uchar *p)
13003e12c5d1SDavid du Colombier {
13013e12c5d1SDavid du Colombier int n, h, a;
13023e12c5d1SDavid du Colombier
13037dd7cddfSDavid du Colombier /* if image too big, assume it's a subfont */
13043e12c5d1SDavid du Colombier if (p+3*P9BITLEN > buf+sizeof(buf))
13053e12c5d1SDavid du Colombier return 1;
13063e12c5d1SDavid du Colombier
13073e12c5d1SDavid du Colombier n = p9bitnum(p + 0*P9BITLEN); /* char count */
13083e12c5d1SDavid du Colombier if (n < 0)
13093e12c5d1SDavid du Colombier return 0;
13103e12c5d1SDavid du Colombier h = p9bitnum(p + 1*P9BITLEN); /* height */
13113e12c5d1SDavid du Colombier if (h < 0)
13123e12c5d1SDavid du Colombier return 0;
13133e12c5d1SDavid du Colombier a = p9bitnum(p + 2*P9BITLEN); /* ascent */
13143e12c5d1SDavid du Colombier if (a < 0)
13153e12c5d1SDavid du Colombier return 0;
13163e12c5d1SDavid du Colombier return 1;
13173e12c5d1SDavid du Colombier }
13183e12c5d1SDavid du Colombier
13193e12c5d1SDavid du Colombier #define WHITESPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
13203e12c5d1SDavid du Colombier
13213e12c5d1SDavid du Colombier int
isp9font(void)13223e12c5d1SDavid du Colombier isp9font(void)
13233e12c5d1SDavid du Colombier {
13243e12c5d1SDavid du Colombier uchar *cp, *p;
13253e12c5d1SDavid du Colombier int i, n;
13263e12c5d1SDavid du Colombier char pathname[1024];
13273e12c5d1SDavid du Colombier
13283e12c5d1SDavid du Colombier cp = buf;
13293e12c5d1SDavid du Colombier if (!getfontnum(cp, &cp)) /* height */
13303e12c5d1SDavid du Colombier return 0;
13313e12c5d1SDavid du Colombier if (!getfontnum(cp, &cp)) /* ascent */
13323e12c5d1SDavid du Colombier return 0;
13335e492409SDavid du Colombier for (i = 0; cp=(uchar*)strchr((char*)cp, '\n'); i++) {
13343e12c5d1SDavid du Colombier if (!getfontnum(cp, &cp)) /* min */
13353e12c5d1SDavid du Colombier break;
13363e12c5d1SDavid du Colombier if (!getfontnum(cp, &cp)) /* max */
13373e12c5d1SDavid du Colombier return 0;
13385e492409SDavid du Colombier getfontnum(cp, &cp); /* optional offset */
13393e12c5d1SDavid du Colombier while (WHITESPACE(*cp))
13403e12c5d1SDavid du Colombier cp++;
13413e12c5d1SDavid du Colombier for (p = cp; *cp && !WHITESPACE(*cp); cp++)
13423e12c5d1SDavid du Colombier ;
13433e12c5d1SDavid du Colombier /* construct a path name, if needed */
13443e12c5d1SDavid du Colombier n = 0;
13453e12c5d1SDavid du Colombier if (*p != '/' && slash) {
13463e12c5d1SDavid du Colombier n = slash-fname+1;
13473e12c5d1SDavid du Colombier if (n < sizeof(pathname))
13483e12c5d1SDavid du Colombier memcpy(pathname, fname, n);
13493e12c5d1SDavid du Colombier else n = 0;
13503e12c5d1SDavid du Colombier }
13515e492409SDavid du Colombier if (n+cp-p+4 < sizeof(pathname)) {
13523e12c5d1SDavid du Colombier memcpy(pathname+n, p, cp-p);
13533e12c5d1SDavid du Colombier n += cp-p;
13543e12c5d1SDavid du Colombier pathname[n] = 0;
13555e492409SDavid du Colombier if (access(pathname, AEXIST) < 0) {
13565e492409SDavid du Colombier strcpy(pathname+n, ".0");
13579a747e4fSDavid du Colombier if (access(pathname, AEXIST) < 0)
13583e12c5d1SDavid du Colombier return 0;
13593e12c5d1SDavid du Colombier }
13603e12c5d1SDavid du Colombier }
13615e492409SDavid du Colombier }
13623e12c5d1SDavid du Colombier if (i) {
13638d37e088SDavid du Colombier print(mime ? "text/plain\n" : "font file\n");
13643e12c5d1SDavid du Colombier return 1;
13653e12c5d1SDavid du Colombier }
13663e12c5d1SDavid du Colombier return 0;
13673e12c5d1SDavid du Colombier }
13683e12c5d1SDavid du Colombier
13693e12c5d1SDavid du Colombier int
getfontnum(uchar * cp,uchar ** rp)13703e12c5d1SDavid du Colombier getfontnum(uchar *cp, uchar **rp)
13713e12c5d1SDavid du Colombier {
13723e12c5d1SDavid du Colombier while (WHITESPACE(*cp)) /* extract ulong delimited by whitespace */
13733e12c5d1SDavid du Colombier cp++;
13743e12c5d1SDavid du Colombier if (*cp < '0' || *cp > '9')
13753e12c5d1SDavid du Colombier return 0;
13763e12c5d1SDavid du Colombier strtoul((char *)cp, (char **)rp, 0);
13775e492409SDavid du Colombier if (!WHITESPACE(**rp)) {
13785e492409SDavid du Colombier *rp = cp;
13793e12c5d1SDavid du Colombier return 0;
13805e492409SDavid du Colombier }
13813e12c5d1SDavid du Colombier return 1;
13823e12c5d1SDavid du Colombier }
13837dd7cddfSDavid du Colombier
13847dd7cddfSDavid du Colombier int
isrtf(void)1385fb7f0c93SDavid du Colombier isrtf(void)
13867dd7cddfSDavid du Colombier {
1387fb7f0c93SDavid du Colombier if(strstr((char *)buf, "\\rtf1")){
1388f2e8132aSDavid du Colombier print(mime ? "application/rtf\n" : "rich text format\n");
1389f2e8132aSDavid du Colombier return 1;
1390f2e8132aSDavid du Colombier }
1391f2e8132aSDavid du Colombier return 0;
1392f2e8132aSDavid du Colombier }
1393f2e8132aSDavid du Colombier
1394f2e8132aSDavid du Colombier int
ismsdos(void)1395f2e8132aSDavid du Colombier ismsdos(void)
1396f2e8132aSDavid du Colombier {
1397f2e8132aSDavid du Colombier if (buf[0] == 0x4d && buf[1] == 0x5a){
1398f2e8132aSDavid du Colombier print(mime ? "application/x-msdownload\n" : "MSDOS executable\n");
13997dd7cddfSDavid du Colombier return 1;
14007dd7cddfSDavid du Colombier }
14017dd7cddfSDavid du Colombier return 0;
14027dd7cddfSDavid du Colombier }
1403b7327ca2SDavid du Colombier
1404b7327ca2SDavid du Colombier int
iself(void)1405b7327ca2SDavid du Colombier iself(void)
1406b7327ca2SDavid du Colombier {
1407ee7057f8SDavid du Colombier static char *cpu[] = { /* NB: incomplete and arbitary list */
1408b7327ca2SDavid du Colombier [1] "WE32100",
1409b7327ca2SDavid du Colombier [2] "SPARC",
1410b7327ca2SDavid du Colombier [3] "i386",
1411b7327ca2SDavid du Colombier [4] "M68000",
1412b7327ca2SDavid du Colombier [5] "M88000",
1413b7327ca2SDavid du Colombier [6] "i486",
1414b7327ca2SDavid du Colombier [7] "i860",
1415b7327ca2SDavid du Colombier [8] "R3000",
1416b7327ca2SDavid du Colombier [9] "S370",
1417b7327ca2SDavid du Colombier [10] "R4000",
1418b7327ca2SDavid du Colombier [15] "HP-PA",
1419b7327ca2SDavid du Colombier [18] "sparc v8+",
1420b7327ca2SDavid du Colombier [19] "i960",
1421b7327ca2SDavid du Colombier [20] "PPC-32",
1422b7327ca2SDavid du Colombier [21] "PPC-64",
1423b7327ca2SDavid du Colombier [40] "ARM",
1424b7327ca2SDavid du Colombier [41] "Alpha",
1425b7327ca2SDavid du Colombier [43] "sparc v9",
1426ea43b5ecSDavid du Colombier [50] "IA-64",
1427f9247424SDavid du Colombier [62] "AMD64",
1428b7327ca2SDavid du Colombier [75] "VAX",
1429b7327ca2SDavid du Colombier };
1430ee7057f8SDavid du Colombier static char *type[] = {
1431ee7057f8SDavid du Colombier [1] "relocatable object",
1432ee7057f8SDavid du Colombier [2] "executable",
1433ee7057f8SDavid du Colombier [3] "shared library",
1434ee7057f8SDavid du Colombier [4] "core dump",
1435ee7057f8SDavid du Colombier };
1436b7327ca2SDavid du Colombier
1437b7327ca2SDavid du Colombier if (memcmp(buf, "\x7fELF", 4) == 0){
1438b7327ca2SDavid du Colombier if (!mime){
1439883a8c51SDavid du Colombier int isdifend = 0;
1440b7327ca2SDavid du Colombier int n = (buf[19] << 8) | buf[18];
14418a2c5ad0SDavid du Colombier char *p = "unknown";
1442ee7057f8SDavid du Colombier char *t = "unknown";
14438a2c5ad0SDavid du Colombier
14448a2c5ad0SDavid du Colombier if (n > 0 && n < nelem(cpu) && cpu[n])
14458a2c5ad0SDavid du Colombier p = cpu[n];
14468a2c5ad0SDavid du Colombier else {
14478a2c5ad0SDavid du Colombier /* try the other byte order */
1448883a8c51SDavid du Colombier isdifend = 1;
14498a2c5ad0SDavid du Colombier n = (buf[18] << 8) | buf[19];
14508a2c5ad0SDavid du Colombier if (n > 0 && n < nelem(cpu) && cpu[n])
14518a2c5ad0SDavid du Colombier p = cpu[n];
14528a2c5ad0SDavid du Colombier }
1453883a8c51SDavid du Colombier if(isdifend)
1454883a8c51SDavid du Colombier n = (buf[16]<< 8) | buf[17];
1455883a8c51SDavid du Colombier else
1456883a8c51SDavid du Colombier n = (buf[17]<< 8) | buf[16];
1457883a8c51SDavid du Colombier
1458ee7057f8SDavid du Colombier if(n>0 && n < nelem(type) && type[n])
1459ee7057f8SDavid du Colombier t = type[n];
14604439694fSDavid du Colombier print("%s ELF%s %s\n", p, (buf[4] == 2? "64": "32"), t);
1461b7327ca2SDavid du Colombier }
1462b7327ca2SDavid du Colombier else
1463b7327ca2SDavid du Colombier print("application/x-elf-executable");
1464b7327ca2SDavid du Colombier return 1;
1465b7327ca2SDavid du Colombier }
1466b7327ca2SDavid du Colombier
1467b7327ca2SDavid du Colombier return 0;
1468b7327ca2SDavid du Colombier }
14690c547597SDavid du Colombier
14700c547597SDavid du Colombier int
isface(void)14710c547597SDavid du Colombier isface(void)
14720c547597SDavid du Colombier {
14730c547597SDavid du Colombier int i, j, ldepth, l;
14740c547597SDavid du Colombier char *p;
14750c547597SDavid du Colombier
14760c547597SDavid du Colombier ldepth = -1;
14770c547597SDavid du Colombier for(j = 0; j < 3; j++){
14780c547597SDavid du Colombier for(p = (char*)buf, i=0; i<3; i++){
14790c547597SDavid du Colombier if(p[0] != '0' || p[1] != 'x')
14800c547597SDavid du Colombier return 0;
14810c547597SDavid du Colombier if(buf[2+8] == ',')
14820c547597SDavid du Colombier l = 2;
14830c547597SDavid du Colombier else if(buf[2+4] == ',')
14840c547597SDavid du Colombier l = 1;
14850c547597SDavid du Colombier else
14860c547597SDavid du Colombier return 0;
14870c547597SDavid du Colombier if(ldepth == -1)
14880c547597SDavid du Colombier ldepth = l;
14890c547597SDavid du Colombier if(l != ldepth)
14900c547597SDavid du Colombier return 0;
14910c547597SDavid du Colombier strtoul(p, &p, 16);
14920c547597SDavid du Colombier if(*p++ != ',')
14930c547597SDavid du Colombier return 0;
14940c547597SDavid du Colombier while(*p == ' ' || *p == '\t')
14950c547597SDavid du Colombier p++;
14960c547597SDavid du Colombier }
14970c547597SDavid du Colombier if (*p++ != '\n')
14980c547597SDavid du Colombier return 0;
14990c547597SDavid du Colombier }
15000c547597SDavid du Colombier
15010c547597SDavid du Colombier if(mime)
15020c547597SDavid du Colombier print("application/x-face\n");
15030c547597SDavid du Colombier else
15040c547597SDavid du Colombier print("face image depth %d\n", ldepth);
15050c547597SDavid du Colombier return 1;
15060c547597SDavid du Colombier }
15070c547597SDavid du Colombier
1508