1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 #include <ctype.h> 5 #include <mach.h> 6 7 /* 8 * file - determine type of file 9 */ 10 #define LENDIAN(p) ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24)) 11 12 uchar buf[6001]; 13 short cfreq[140]; 14 short wfreq[50]; 15 int nbuf; 16 Dir* mbuf; 17 int fd; 18 char *fname; 19 char *slash; 20 21 enum 22 { 23 Cword, 24 Fword, 25 Aword, 26 Alword, 27 Lword, 28 I1, 29 I2, 30 I3, 31 Clatin = 128, 32 Cbinary, 33 Cnull, 34 Ceascii, 35 Cutf, 36 }; 37 struct 38 { 39 char* word; 40 int class; 41 } dict[] = 42 { 43 "PATH", Lword, 44 "TEXT", Aword, 45 "adt", Alword, 46 "aggr", Alword, 47 "alef", Alword, 48 "array", Lword, 49 "block", Fword, 50 "chan", Alword, 51 "char", Cword, 52 "common", Fword, 53 "con", Lword, 54 "data", Fword, 55 "dimension", Fword, 56 "double", Cword, 57 "extern", Cword, 58 "bio", I2, 59 "float", Cword, 60 "fn", Lword, 61 "function", Fword, 62 "h", I3, 63 "implement", Lword, 64 "import", Lword, 65 "include", I1, 66 "int", Cword, 67 "integer", Fword, 68 "iota", Lword, 69 "libc", I2, 70 "long", Cword, 71 "module", Lword, 72 "real", Fword, 73 "ref", Lword, 74 "register", Cword, 75 "self", Lword, 76 "short", Cword, 77 "static", Cword, 78 "stdio", I2, 79 "struct", Cword, 80 "subroutine", Fword, 81 "u", I2, 82 "void", Cword, 83 }; 84 85 /* codes for 'mode' field in language structure */ 86 enum { 87 Normal = 0, 88 First, /* first entry for language spanning several ranges */ 89 Multi, /* later entries " " " ... */ 90 Shared, /* codes used in several languages */ 91 }; 92 93 struct 94 { 95 int mode; /* see enum above */ 96 int count; 97 int low; 98 int high; 99 char *name; 100 101 } language[] = 102 { 103 Normal, 0, 0x0080, 0x0080, "Extended Latin", 104 Normal, 0, 0x0100, 0x01FF, "Extended Latin", 105 Normal, 0, 0x0370, 0x03FF, "Greek", 106 Normal, 0, 0x0400, 0x04FF, "Cyrillic", 107 Normal, 0, 0x0530, 0x058F, "Armenian", 108 Normal, 0, 0x0590, 0x05FF, "Hebrew", 109 Normal, 0, 0x0600, 0x06FF, "Arabic", 110 Normal, 0, 0x0900, 0x097F, "Devanagari", 111 Normal, 0, 0x0980, 0x09FF, "Bengali", 112 Normal, 0, 0x0A00, 0x0A7F, "Gurmukhi", 113 Normal, 0, 0x0A80, 0x0AFF, "Gujarati", 114 Normal, 0, 0x0B00, 0x0B7F, "Oriya", 115 Normal, 0, 0x0B80, 0x0BFF, "Tamil", 116 Normal, 0, 0x0C00, 0x0C7F, "Telugu", 117 Normal, 0, 0x0C80, 0x0CFF, "Kannada", 118 Normal, 0, 0x0D00, 0x0D7F, "Malayalam", 119 Normal, 0, 0x0E00, 0x0E7F, "Thai", 120 Normal, 0, 0x0E80, 0x0EFF, "Lao", 121 Normal, 0, 0x1000, 0x105F, "Tibetan", 122 Normal, 0, 0x10A0, 0x10FF, "Georgian", 123 Normal, 0, 0x3040, 0x30FF, "Japanese", 124 Normal, 0, 0x3100, 0x312F, "Chinese", 125 First, 0, 0x3130, 0x318F, "Korean", 126 Multi, 0, 0x3400, 0x3D2F, "Korean", 127 Shared, 0, 0x4e00, 0x9fff, "CJK", 128 Normal, 0, 0, 0, 0, /* terminal entry */ 129 }; 130 131 132 enum 133 { 134 Fascii, /* printable ascii */ 135 Flatin, /* latin 1*/ 136 Futf, /* UTf character set */ 137 Fbinary, /* binary */ 138 Feascii, /* ASCII with control chars */ 139 Fnull, /* NULL in file */ 140 } guess; 141 142 void bump_utf_count(Rune); 143 int cistrncmp(char*, char*, int); 144 void filetype(int); 145 int getfontnum(uchar*, uchar**); 146 int isas(void); 147 int isc(void); 148 int iscint(void); 149 int isenglish(void); 150 int ishp(void); 151 int ishtml(void); 152 int isrfc822(void); 153 int ismbox(void); 154 int islimbo(void); 155 int ismung(void); 156 int isp9bit(void); 157 int isp9font(void); 158 int isrtf(void); 159 int ismsdos(void); 160 int istring(void); 161 int long0(void); 162 int p9bitnum(uchar*); 163 int p9subfont(uchar*); 164 void print_utf(void); 165 void type(char*, int); 166 int utf_count(void); 167 void wordfreq(void); 168 169 int (*call[])(void) = 170 { 171 long0, /* recognizable by first 4 bytes */ 172 istring, /* recognizable by first string */ 173 isrfc822, /* email file */ 174 ismbox, /* mail box */ 175 ishtml, /* html keywords */ 176 iscint, /* compiler/assembler intermediate */ 177 islimbo, /* limbo source */ 178 isc, /* c & alef compiler key words */ 179 isas, /* assembler key words */ 180 ismung, /* entropy compressed/encrypted */ 181 isp9font, /* plan 9 font */ 182 isp9bit, /* plan 9 image (as from /dev/window) */ 183 isenglish, /* char frequency English */ 184 isrtf, /* rich text format */ 185 ismsdos, /* msdos exe (virus file attachement) */ 186 0 187 }; 188 189 int mime; 190 191 #define OCTET "application/octet-stream\n" 192 #define PLAIN "text/plain\n" 193 194 void 195 main(int argc, char *argv[]) 196 { 197 int i, j, maxlen; 198 char *cp; 199 Rune r; 200 201 ARGBEGIN{ 202 case 'm': 203 mime = 1; 204 break; 205 default: 206 fprint(2, "usage: file [-m] [file...]\n"); 207 exits("usage"); 208 }ARGEND; 209 210 maxlen = 0; 211 if(mime == 0 || argc > 1){ 212 for(i = 0; i < argc; i++) { 213 for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp)) 214 ; 215 if(j > maxlen) 216 maxlen = j; 217 } 218 } 219 if (argc <= 0) { 220 if(!mime) 221 print ("stdin: "); 222 filetype(0); 223 } 224 else { 225 for(i = 0; i < argc; i++) 226 type(argv[i], maxlen); 227 } 228 exits(0); 229 } 230 231 void 232 type(char *file, int nlen) 233 { 234 Rune r; 235 int i; 236 char *p; 237 238 if(nlen > 0){ 239 slash = 0; 240 for (i = 0, p = file; *p; i++) { 241 if (*p == '/') /* find rightmost slash */ 242 slash = p; 243 p += chartorune(&r, p); /* count runes */ 244 } 245 print("%s:%*s",file, nlen-i+1, ""); 246 } 247 fname = file; 248 if ((fd = open(file, OREAD)) < 0) { 249 print("cannot open\n"); 250 return; 251 } 252 filetype(fd); 253 close(fd); 254 } 255 256 void 257 filetype(int fd) 258 { 259 Rune r; 260 int i, f, n; 261 char *p, *eob; 262 263 free(mbuf); 264 mbuf = dirfstat(fd); 265 if(mbuf == nil){ 266 print("cannot stat: %r\n"); 267 return; 268 } 269 if(mbuf->mode & DMDIR) { 270 print(mime ? "text/directory\n" : "directory\n"); 271 return; 272 } 273 if(mbuf->type != 'M' && mbuf->type != '|') { 274 print(mime ? OCTET : "special file #%c/%s\n", 275 mbuf->type, mbuf->name); 276 return; 277 } 278 nbuf = read(fd, buf, sizeof(buf)-1); 279 280 if(nbuf < 0) { 281 print("cannot read\n"); 282 return; 283 } 284 if(nbuf == 0) { 285 print(mime ? PLAIN : "empty file\n"); 286 return; 287 } 288 buf[nbuf] = 0; 289 290 /* 291 * build histogram table 292 */ 293 memset(cfreq, 0, sizeof(cfreq)); 294 for (i = 0; language[i].name; i++) 295 language[i].count = 0; 296 eob = (char *)buf+nbuf; 297 for(n = 0, p = (char *)buf; p < eob; n++) { 298 if (!fullrune(p, eob-p) && eob-p < UTFmax) 299 break; 300 p += chartorune(&r, p); 301 if (r == 0) 302 f = Cnull; 303 else if (r <= 0x7f) { 304 if (!isprint(r) && !isspace(r)) 305 f = Ceascii; /* ASCII control char */ 306 else f = r; 307 } else if (r == 0x080) { 308 bump_utf_count(r); 309 f = Cutf; 310 } else if (r < 0xA0) 311 f = Cbinary; /* Invalid Runes */ 312 else if (r <= 0xff) 313 f = Clatin; /* Latin 1 */ 314 else { 315 bump_utf_count(r); 316 f = Cutf; /* UTF extension */ 317 } 318 cfreq[f]++; /* ASCII chars peg directly */ 319 } 320 /* 321 * gross classify 322 */ 323 if (cfreq[Cbinary]) 324 guess = Fbinary; 325 else if (cfreq[Cutf]) 326 guess = Futf; 327 else if (cfreq[Clatin]) 328 guess = Flatin; 329 else if (cfreq[Ceascii]) 330 guess = Feascii; 331 else if (cfreq[Cnull] == n) { 332 print(mime ? OCTET : "first block all null bytes\n"); 333 return; 334 } 335 else guess = Fascii; 336 /* 337 * lookup dictionary words 338 */ 339 memset(wfreq, 0, sizeof(wfreq)); 340 if(guess == Fascii || guess == Flatin || guess == Futf) 341 wordfreq(); 342 /* 343 * call individual classify routines 344 */ 345 for(i=0; call[i]; i++) 346 if((*call[i])()) 347 return; 348 349 /* 350 * if all else fails, 351 * print out gross classification 352 */ 353 if (nbuf < 100 && !mime) 354 print(mime ? PLAIN : "short "); 355 if (guess == Fascii) 356 print(mime ? PLAIN : "Ascii\n"); 357 else if (guess == Feascii) 358 print(mime ? PLAIN : "extended ascii\n"); 359 else if (guess == Flatin) 360 print(mime ? PLAIN : "latin ascii\n"); 361 else if (guess == Futf && utf_count() < 4) 362 print_utf(); 363 else print(mime ? OCTET : "binary\n"); 364 } 365 366 void 367 bump_utf_count(Rune r) 368 { 369 int low, high, mid; 370 371 high = sizeof(language)/sizeof(language[0])-1; 372 for (low = 0; low < high;) { 373 mid = (low+high)/2; 374 if (r >=language[mid].low) { 375 if (r <= language[mid].high) { 376 language[mid].count++; 377 break; 378 } else low = mid+1; 379 } else high = mid; 380 } 381 } 382 383 int 384 utf_count(void) 385 { 386 int i, count; 387 388 count = 0; 389 for (i = 0; language[i].name; i++) 390 if (language[i].count > 0) 391 switch (language[i].mode) { 392 case Normal: 393 case First: 394 count++; 395 break; 396 default: 397 break; 398 } 399 return count; 400 } 401 402 int 403 chkascii(void) 404 { 405 int i; 406 407 for (i = 'a'; i < 'z'; i++) 408 if (cfreq[i]) 409 return 1; 410 for (i = 'A'; i < 'Z'; i++) 411 if (cfreq[i]) 412 return 1; 413 return 0; 414 } 415 416 int 417 find_first(char *name) 418 { 419 int i; 420 421 for (i = 0; language[i].name != 0; i++) 422 if (language[i].mode == First 423 && strcmp(language[i].name, name) == 0) 424 return i; 425 return -1; 426 } 427 428 void 429 print_utf(void) 430 { 431 int i, printed, j; 432 433 if(mime){ 434 print(PLAIN); 435 return; 436 } 437 if (chkascii()) { 438 printed = 1; 439 print("Ascii"); 440 } else 441 printed = 0; 442 for (i = 0; language[i].name; i++) 443 if (language[i].count) { 444 switch(language[i].mode) { 445 case Multi: 446 j = find_first(language[i].name); 447 if (j < 0) 448 break; 449 if (language[j].count > 0) 450 break; 451 /* Fall through */ 452 case Normal: 453 case First: 454 if (printed) 455 print(" & "); 456 else printed = 1; 457 print("%s", language[i].name); 458 break; 459 case Shared: 460 default: 461 break; 462 } 463 } 464 if(!printed) 465 print("UTF"); 466 print(" text\n"); 467 } 468 469 void 470 wordfreq(void) 471 { 472 int low, high, mid, r; 473 uchar *p, *p2, c; 474 475 p = buf; 476 for(;;) { 477 while (p < buf+nbuf && !isalpha(*p)) 478 p++; 479 if (p >= buf+nbuf) 480 return; 481 p2 = p; 482 while(p < buf+nbuf && isalpha(*p)) 483 p++; 484 c = *p; 485 *p = 0; 486 high = sizeof(dict)/sizeof(dict[0]); 487 for(low = 0;low < high;) { 488 mid = (low+high)/2; 489 r = strcmp(dict[mid].word, (char*)p2); 490 if(r == 0) { 491 wfreq[dict[mid].class]++; 492 break; 493 } 494 if(r < 0) 495 low = mid+1; 496 else 497 high = mid; 498 } 499 *p++ = c; 500 } 501 } 502 503 typedef struct Filemagic Filemagic; 504 struct Filemagic { 505 ulong x; 506 ulong mask; 507 char *desc; 508 char *mime; 509 }; 510 511 Filemagic long0tab[] = { 512 0xF16DF16D, 0xFFFFFFFF, "pac1 audio file\n", OCTET, 513 0x31636170, 0xFFFFFFFF, "pac3 audio file\n", OCTET, 514 0x32636170, 0xFFFF00FF, "pac4 audio file\n", OCTET, 515 0xBA010000, 0xFFFFFFFF, "mpeg system stream\n", OCTET, 516 0x30800CC0, 0xFFFFFFFF, "inferno .dis executable\n", OCTET, 517 0x04034B50, 0xFFFFFFFF, "zip archive\n", "application/zip", 518 070707, 0xFFFF, "cpio archive\n", OCTET, 519 0x2F7, 0xFFFF, "tex dvi\n", "application/dvi", 520 }; 521 522 int 523 filemagic(Filemagic *tab, int ntab, ulong x) 524 { 525 int i; 526 527 for(i=0; i<ntab; i++) 528 if((x&tab[i].mask) == tab[i].x){ 529 print(mime ? tab[i].mime : tab[i].desc); 530 return 1; 531 } 532 return 0; 533 } 534 535 int 536 long0(void) 537 { 538 Fhdr f; 539 long x; 540 541 seek(fd, 0, 0); /* reposition to start of file */ 542 if(crackhdr(fd, &f)) { 543 print(mime ? OCTET : "%s\n", f.name); 544 return 1; 545 } 546 x = LENDIAN(buf); 547 if(filemagic(long0tab, nelem(long0tab), x)) 548 return 1; 549 return 0; 550 } 551 552 /* 553 * initial words to classify file 554 */ 555 struct FILE_STRING 556 { 557 char *key; 558 char *filetype; 559 int length; 560 char *mime; 561 } file_string[] = 562 { 563 "!<arch>\n__.SYMDEF", "archive random library", 16, "application/octet-stream", 564 "!<arch>\n", "archive", 8, "application/octet-stream", 565 "070707", "cpio archive - ascii header", 6, "application/octet-stream", 566 "#!/bin/rc", "rc executable file", 9, "text/plain", 567 "#!/bin/sh", "sh executable file", 9, "text/plain", 568 "%!", "postscript", 2, "application/postscript", 569 "\004%!", "postscript", 3, "application/postscript", 570 "x T post", "troff output for post", 8, "application/troff", 571 "x T Latin1", "troff output for Latin1", 10, "application/troff", 572 "x T utf", "troff output for UTF", 7, "application/troff", 573 "x T 202", "troff output for 202", 7, "application/troff", 574 "x T aps", "troff output for aps", 7, "application/troff", 575 "GIF", "GIF image", 3, "image/gif", 576 "\0PC Research, Inc\0", "ghostscript fax file", 18, "application/ghostscript", 577 "%PDF", "PDF", 4, "application/pdf", 578 "<html>\n", "HTML file", 7, "text/html", 579 "<HTML>\n", "HTML file", 7, "text/html", 580 "compressed\n", "Compressed image or subfont", 11, "application/octet-stream", 581 "\111\111\052\000", "tiff", 4, "image/tiff", 582 "\115\115\000\052", "tiff", 4, "image/tiff", 583 "\377\330\377\340", "jpeg", 4, "image/jpeg", 584 "\377\330\377\341", "jpeg", 4, "image/jpeg", 585 "\377\330\377\333", "jpeg", 4, "image/jpeg", 586 "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/octet-stream", 587 "<MakerFile ", "FrameMaker file", 11, "application/framemaker", 588 "\033%-12345X", "HPJCL file", 9, "application/hpjcl", 589 0,0,0,0 590 }; 591 592 int 593 istring(void) 594 { 595 int i; 596 struct FILE_STRING *p; 597 598 for(p = file_string; p->key; p++) { 599 if(nbuf >= p->length && !memcmp(buf, p->key, p->length)) { 600 if(mime) 601 print("%s\n", p->mime); 602 else 603 print("%s\n", p->filetype); 604 return 1; 605 } 606 } 607 if(strncmp((char*)buf, "TYPE=", 5) == 0) { /* td */ 608 for(i = 5; i < nbuf; i++) 609 if(buf[i] == '\n') 610 break; 611 if(mime) 612 print(OCTET); 613 else 614 print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5); 615 return 1; 616 } 617 return 0; 618 } 619 620 char* html_string[] = 621 { 622 "title", 623 "body", 624 "head", 625 "strong", 626 "h1", 627 "h2", 628 "h3", 629 "h4", 630 "h5", 631 "h6", 632 "ul", 633 "li", 634 "dl", 635 "br", 636 "em", 637 0, 638 }; 639 640 int 641 ishtml(void) 642 { 643 uchar *p, *q; 644 int i, count; 645 646 /* compare strings between '<' and '>' to html table */ 647 count = 0; 648 p = buf; 649 for(;;) { 650 while (p < buf+nbuf && *p != '<') 651 p++; 652 p++; 653 if (p >= buf+nbuf) 654 break; 655 if(*p == '/') 656 p++; 657 q = p; 658 while(p < buf+nbuf && *p != '>') 659 p++; 660 if (p >= buf+nbuf) 661 break; 662 for(i = 0; html_string[i]; i++) { 663 if(cistrncmp(html_string[i], (char*)q, p-q) == 0) { 664 if(count++ > 4) { 665 print(mime ? "text/html\n" : "HTML file\n"); 666 return 1; 667 } 668 break; 669 } 670 } 671 p++; 672 } 673 return 0; 674 } 675 676 char* rfc822_string[] = 677 { 678 "from:", 679 "date:", 680 "to:", 681 "subject:", 682 "received:", 683 "reply to:", 684 "sender:", 685 0, 686 }; 687 688 int 689 isrfc822(void) 690 { 691 692 char *p, *q, *r; 693 int i, count; 694 695 count = 0; 696 p = (char*)buf; 697 for(;;) { 698 q = strchr(p, '\n'); 699 if(q == nil) 700 break; 701 *q = 0; 702 if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){ 703 count++; 704 *q = '\n'; 705 p = q+1; 706 continue; 707 } 708 *q = '\n'; 709 if(*p != '\t' && *p != ' '){ 710 r = strchr(p, ':'); 711 if(r == 0 || r > q) 712 break; 713 for(i = 0; rfc822_string[i]; i++) { 714 if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){ 715 count++; 716 break; 717 } 718 } 719 } 720 p = q+1; 721 } 722 if(count >= 3){ 723 print(mime ? "message/rfc822\n" : "email file\n"); 724 return 1; 725 } 726 return 0; 727 } 728 729 int 730 ismbox(void) 731 { 732 char *p, *q; 733 734 p = (char*)buf; 735 q = strchr(p, '\n'); 736 if(q == nil) 737 return 0; 738 *q = 0; 739 if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){ 740 print(mime ? "text/plain\n" : "mail box\n"); 741 return 1; 742 } 743 *q = '\n'; 744 return 0; 745 } 746 747 int 748 iscint(void) 749 { 750 int type; 751 char *name; 752 Biobuf b; 753 754 if(Binit(&b, fd, OREAD) == Beof) 755 return 0; 756 seek(fd, 0, 0); 757 type = objtype(&b, &name); 758 if(type < 0) 759 return 0; 760 if(mime) 761 print(OCTET); 762 else 763 print("%s intermediate\n", name); 764 return 1; 765 } 766 767 int 768 isc(void) 769 { 770 int n; 771 772 n = wfreq[I1]; 773 /* 774 * includes 775 */ 776 if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n) 777 goto yes; 778 if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n) 779 goto yes; 780 /* 781 * declarations 782 */ 783 if(wfreq[Cword] >= 5 && cfreq[';'] >= 5) 784 goto yes; 785 /* 786 * assignments 787 */ 788 if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1) 789 goto yes; 790 return 0; 791 792 yes: 793 if(mime){ 794 print(PLAIN); 795 return 1; 796 } 797 if(wfreq[Alword] > 0) 798 print("alef program\n"); 799 else 800 print("c program\n"); 801 return 1; 802 } 803 804 int 805 islimbo(void) 806 { 807 808 /* 809 * includes 810 */ 811 if(wfreq[Lword] < 4) 812 return 0; 813 print(mime ? PLAIN : "limbo program\n"); 814 return 1; 815 } 816 817 int 818 isas(void) 819 { 820 821 /* 822 * includes 823 */ 824 if(wfreq[Aword] < 2) 825 return 0; 826 print(mime ? PLAIN : "as program\n"); 827 return 1; 828 } 829 830 /* 831 * low entropy means encrypted 832 */ 833 int 834 ismung(void) 835 { 836 int i, bucket[8]; 837 float cs; 838 839 if(nbuf < 64) 840 return 0; 841 memset(bucket, 0, sizeof(bucket)); 842 for(i=0; i<64; i++) 843 bucket[(buf[i]>>5)&07] += 1; 844 845 cs = 0.; 846 for(i=0; i<8; i++) 847 cs += (bucket[i]-8)*(bucket[i]-8); 848 cs /= 8.; 849 if(cs <= 24.322) { 850 if(buf[0]==0x1f && (buf[1]==0x8b || buf[1]==0x9d)) 851 print(mime ? OCTET : "compressed\n"); 852 else 853 print(mime ? OCTET : "encrypted\n"); 854 return 1; 855 } 856 return 0; 857 } 858 859 /* 860 * english by punctuation and frequencies 861 */ 862 int 863 isenglish(void) 864 { 865 int vow, comm, rare, badpun, punct; 866 char *p; 867 868 if(guess != Fascii && guess != Feascii) 869 return 0; 870 badpun = 0; 871 punct = 0; 872 for(p = (char *)buf; p < (char *)buf+nbuf-1; p++) 873 switch(*p) { 874 case '.': 875 case ',': 876 case ')': 877 case '%': 878 case ';': 879 case ':': 880 case '?': 881 punct++; 882 if(p[1] != ' ' && p[1] != '\n') 883 badpun++; 884 } 885 if(badpun*5 > punct) 886 return 0; 887 if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e']) /* shell file test */ 888 return 0; 889 if(2*cfreq[';'] > cfreq['e']) 890 return 0; 891 892 vow = 0; 893 for(p="AEIOU"; *p; p++) { 894 vow += cfreq[*p]; 895 vow += cfreq[tolower(*p)]; 896 } 897 comm = 0; 898 for(p="ETAION"; *p; p++) { 899 comm += cfreq[*p]; 900 comm += cfreq[tolower(*p)]; 901 } 902 rare = 0; 903 for(p="VJKQXZ"; *p; p++) { 904 rare += cfreq[*p]; 905 rare += cfreq[tolower(*p)]; 906 } 907 if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) { 908 print(mime ? PLAIN : "English text\n"); 909 return 1; 910 } 911 return 0; 912 } 913 914 /* 915 * pick up a number with 916 * syntax _*[0-9]+_ 917 */ 918 #define P9BITLEN 12 919 int 920 p9bitnum(uchar *bp) 921 { 922 int n, c, len; 923 924 len = P9BITLEN; 925 while(*bp == ' ') { 926 bp++; 927 len--; 928 if(len <= 0) 929 return -1; 930 } 931 n = 0; 932 while(len > 1) { 933 c = *bp++; 934 if(!isdigit(c)) 935 return -1; 936 n = n*10 + c-'0'; 937 len--; 938 } 939 if(*bp != ' ') 940 return -1; 941 return n; 942 } 943 944 int 945 depthof(char *s, int *newp) 946 { 947 char *es; 948 int d; 949 950 *newp = 0; 951 es = s+12; 952 while(s<es && *s==' ') 953 s++; 954 if(s == es) 955 return -1; 956 if('0'<=*s && *s<='9') 957 return 1<<atoi(s); 958 959 *newp = 1; 960 d = 0; 961 while(s<es && *s!=' '){ 962 s++; /* skip letter */ 963 d += strtoul(s, &s, 10); 964 } 965 966 switch(d){ 967 case 32: 968 case 24: 969 case 16: 970 case 8: 971 return d; 972 } 973 return -1; 974 } 975 976 int 977 isp9bit(void) 978 { 979 int dep, lox, loy, hix, hiy, px, new; 980 ulong t; 981 long len; 982 char *newlabel; 983 984 newlabel = "old "; 985 986 dep = depthof((char*)buf + 0*P9BITLEN, &new); 987 if(new) 988 newlabel = ""; 989 lox = p9bitnum(buf + 1*P9BITLEN); 990 loy = p9bitnum(buf + 2*P9BITLEN); 991 hix = p9bitnum(buf + 3*P9BITLEN); 992 hiy = p9bitnum(buf + 4*P9BITLEN); 993 if(dep < 0 || lox < 0 || loy < 0 || hix < 0 || hiy < 0) 994 return 0; 995 996 if(dep < 8){ 997 px = 8/dep; /* pixels per byte */ 998 /* set l to number of bytes of data per scan line */ 999 if(lox >= 0) 1000 len = (hix+px-1)/px - lox/px; 1001 else{ /* make positive before divide */ 1002 t = (-lox)+px-1; 1003 t = (t/px)*px; 1004 len = (t+hix+px-1)/px; 1005 } 1006 }else 1007 len = (hix-lox)*dep/8; 1008 len *= (hiy-loy); /* col length */ 1009 len += 5*P9BITLEN; /* size of initial ascii */ 1010 1011 /* 1012 * for image file, length is non-zero and must match calculation above 1013 * for /dev/window and /dev/screen the length is always zero 1014 * for subfont, the subfont header should follow immediately. 1015 */ 1016 if (len != 0 && mbuf->length == 0) { 1017 print("%splan 9 image\n", newlabel); 1018 return 1; 1019 } 1020 if (mbuf->length == len) { 1021 print("%splan 9 image\n", newlabel); 1022 return 1; 1023 } 1024 /* Ghostscript sometimes produces a little extra on the end */ 1025 if (mbuf->length < len+P9BITLEN) { 1026 print("%splan 9 image\n", newlabel); 1027 return 1; 1028 } 1029 if (p9subfont(buf+len)) { 1030 print("%ssubfont file\n", newlabel); 1031 return 1; 1032 } 1033 return 0; 1034 } 1035 1036 int 1037 p9subfont(uchar *p) 1038 { 1039 int n, h, a; 1040 1041 /* if image too big, assume it's a subfont */ 1042 if (p+3*P9BITLEN > buf+sizeof(buf)) 1043 return 1; 1044 1045 n = p9bitnum(p + 0*P9BITLEN); /* char count */ 1046 if (n < 0) 1047 return 0; 1048 h = p9bitnum(p + 1*P9BITLEN); /* height */ 1049 if (h < 0) 1050 return 0; 1051 a = p9bitnum(p + 2*P9BITLEN); /* ascent */ 1052 if (a < 0) 1053 return 0; 1054 return 1; 1055 } 1056 1057 #define WHITESPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') 1058 1059 int 1060 isp9font(void) 1061 { 1062 uchar *cp, *p; 1063 int i, n; 1064 char pathname[1024]; 1065 1066 cp = buf; 1067 if (!getfontnum(cp, &cp)) /* height */ 1068 return 0; 1069 if (!getfontnum(cp, &cp)) /* ascent */ 1070 return 0; 1071 for (i = 0; 1; i++) { 1072 if (!getfontnum(cp, &cp)) /* min */ 1073 break; 1074 if (!getfontnum(cp, &cp)) /* max */ 1075 return 0; 1076 while (WHITESPACE(*cp)) 1077 cp++; 1078 for (p = cp; *cp && !WHITESPACE(*cp); cp++) 1079 ; 1080 /* construct a path name, if needed */ 1081 n = 0; 1082 if (*p != '/' && slash) { 1083 n = slash-fname+1; 1084 if (n < sizeof(pathname)) 1085 memcpy(pathname, fname, n); 1086 else n = 0; 1087 } 1088 if (n+cp-p < sizeof(pathname)) { 1089 memcpy(pathname+n, p, cp-p); 1090 n += cp-p; 1091 pathname[n] = 0; 1092 if (access(pathname, AEXIST) < 0) 1093 return 0; 1094 } 1095 } 1096 if (i) { 1097 print("font file\n"); 1098 return 1; 1099 } 1100 return 0; 1101 } 1102 1103 int 1104 getfontnum(uchar *cp, uchar **rp) 1105 { 1106 while (WHITESPACE(*cp)) /* extract ulong delimited by whitespace */ 1107 cp++; 1108 if (*cp < '0' || *cp > '9') 1109 return 0; 1110 strtoul((char *)cp, (char **)rp, 0); 1111 if (!WHITESPACE(**rp)) 1112 return 0; 1113 return 1; 1114 } 1115 1116 int 1117 isrtf(void) 1118 { 1119 if(strstr((char *)buf, "\\rtf1")){ 1120 print(mime ? "application/rtf\n" : "rich text format\n"); 1121 return 1; 1122 } 1123 return 0; 1124 } 1125 1126 int 1127 ismsdos(void) 1128 { 1129 if (buf[0] == 0x4d && buf[1] == 0x5a){ 1130 print(mime ? "application/x-msdownload\n" : "MSDOS executable\n"); 1131 return 1; 1132 } 1133 return 0; 1134 } 1135