1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 #include <ctype.h> 5 #include <mach.h> 6 7 /* 8 * file - determine type of file 9 */ 10 #define LENDIAN(p) ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24)) 11 12 uchar buf[6001]; 13 short cfreq[140]; 14 short wfreq[50]; 15 int nbuf; 16 Dir* mbuf; 17 int fd; 18 char *fname; 19 char *slash; 20 21 enum 22 { 23 Cword, 24 Fword, 25 Aword, 26 Alword, 27 Lword, 28 I1, 29 I2, 30 I3, 31 Clatin = 128, 32 Cbinary, 33 Cnull, 34 Ceascii, 35 Cutf, 36 }; 37 struct 38 { 39 char* word; 40 int class; 41 } dict[] = 42 { 43 "PATH", Lword, 44 "TEXT", Aword, 45 "adt", Alword, 46 "aggr", Alword, 47 "alef", Alword, 48 "array", Lword, 49 "block", Fword, 50 "chan", Alword, 51 "char", Cword, 52 "common", Fword, 53 "con", Lword, 54 "data", Fword, 55 "dimension", Fword, 56 "double", Cword, 57 "extern", Cword, 58 "bio", I2, 59 "float", Cword, 60 "fn", Lword, 61 "function", Fword, 62 "h", I3, 63 "implement", Lword, 64 "import", Lword, 65 "include", I1, 66 "int", Cword, 67 "integer", Fword, 68 "iota", Lword, 69 "libc", I2, 70 "long", Cword, 71 "module", Lword, 72 "real", Fword, 73 "ref", Lword, 74 "register", Cword, 75 "self", Lword, 76 "short", Cword, 77 "static", Cword, 78 "stdio", I2, 79 "struct", Cword, 80 "subroutine", Fword, 81 "u", I2, 82 "void", Cword, 83 }; 84 85 /* codes for 'mode' field in language structure */ 86 enum { 87 Normal = 0, 88 First, /* first entry for language spanning several ranges */ 89 Multi, /* later entries " " " ... */ 90 Shared, /* codes used in several languages */ 91 }; 92 93 struct 94 { 95 int mode; /* see enum above */ 96 int count; 97 int low; 98 int high; 99 char *name; 100 101 } language[] = 102 { 103 Normal, 0, 0x0080, 0x0080, "Extended Latin", 104 Normal, 0, 0x0100, 0x01FF, "Extended Latin", 105 Normal, 0, 0x0370, 0x03FF, "Greek", 106 Normal, 0, 0x0400, 0x04FF, "Cyrillic", 107 Normal, 0, 0x0530, 0x058F, "Armenian", 108 Normal, 0, 0x0590, 0x05FF, "Hebrew", 109 Normal, 0, 0x0600, 0x06FF, "Arabic", 110 Normal, 0, 0x0900, 0x097F, "Devanagari", 111 Normal, 0, 0x0980, 0x09FF, "Bengali", 112 Normal, 0, 0x0A00, 0x0A7F, "Gurmukhi", 113 Normal, 0, 0x0A80, 0x0AFF, "Gujarati", 114 Normal, 0, 0x0B00, 0x0B7F, "Oriya", 115 Normal, 0, 0x0B80, 0x0BFF, "Tamil", 116 Normal, 0, 0x0C00, 0x0C7F, "Telugu", 117 Normal, 0, 0x0C80, 0x0CFF, "Kannada", 118 Normal, 0, 0x0D00, 0x0D7F, "Malayalam", 119 Normal, 0, 0x0E00, 0x0E7F, "Thai", 120 Normal, 0, 0x0E80, 0x0EFF, "Lao", 121 Normal, 0, 0x1000, 0x105F, "Tibetan", 122 Normal, 0, 0x10A0, 0x10FF, "Georgian", 123 Normal, 0, 0x3040, 0x30FF, "Japanese", 124 Normal, 0, 0x3100, 0x312F, "Chinese", 125 First, 0, 0x3130, 0x318F, "Korean", 126 Multi, 0, 0x3400, 0x3D2F, "Korean", 127 Shared, 0, 0x4e00, 0x9fff, "CJK", 128 Normal, 0, 0, 0, 0, /* terminal entry */ 129 }; 130 131 132 enum 133 { 134 Fascii, /* printable ascii */ 135 Flatin, /* latin 1*/ 136 Futf, /* UTf character set */ 137 Fbinary, /* binary */ 138 Feascii, /* ASCII with control chars */ 139 Fnull, /* NULL in file */ 140 } guess; 141 142 void bump_utf_count(Rune); 143 int cistrncmp(char*, char*, int); 144 void filetype(int); 145 int getfontnum(uchar*, uchar**); 146 int isas(void); 147 int isc(void); 148 int iscint(void); 149 int isenglish(void); 150 int ishp(void); 151 int ishtml(void); 152 int isrfc822(void); 153 int ismbox(void); 154 int islimbo(void); 155 int ismung(void); 156 int isp9bit(void); 157 int isp9font(void); 158 int istring(void); 159 int long0(void); 160 int p9bitnum(uchar*); 161 int p9subfont(uchar*); 162 void print_utf(void); 163 void type(char*, int); 164 int utf_count(void); 165 void wordfreq(void); 166 167 int (*call[])(void) = 168 { 169 long0, /* recognizable by first 4 bytes */ 170 istring, /* recognizable by first string */ 171 ishtml, /* html keywords */ 172 isrfc822, /* email file */ 173 ismbox, /* mail box */ 174 iscint, /* compiler/assembler intermediate */ 175 islimbo, /* limbo source */ 176 isc, /* c & alef compiler key words */ 177 isas, /* assembler key words */ 178 ismung, /* entropy compressed/encrypted */ 179 isp9font, /* plan 9 font */ 180 isp9bit, /* plan 9 image (as from /dev/window) */ 181 isenglish, /* char frequency English */ 182 ishp, /* HP Job Control Language - Postscript */ 183 0 184 }; 185 186 int mime; 187 188 #define OCTET "application/octet-stream\n" 189 #define PLAIN "text/plain\n" 190 191 void 192 main(int argc, char *argv[]) 193 { 194 int i, j, maxlen; 195 char *cp; 196 Rune r; 197 198 ARGBEGIN{ 199 case 'm': 200 mime = 1; 201 break; 202 default: 203 fprint(2, "usage: file [-m] [file...]\n"); 204 exits("usage"); 205 }ARGEND; 206 207 maxlen = 0; 208 if(mime == 0 || argc > 1){ 209 for(i = 0; i < argc; i++) { 210 for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp)) 211 ; 212 if(j > maxlen) 213 maxlen = j; 214 } 215 } 216 if (argc <= 0) { 217 if(!mime) 218 print ("stdin: "); 219 filetype(0); 220 } 221 else { 222 for(i = 0; i < argc; i++) 223 type(argv[i], maxlen); 224 } 225 exits(0); 226 } 227 228 void 229 type(char *file, int nlen) 230 { 231 Rune r; 232 int i; 233 char *p; 234 235 if(nlen > 0){ 236 slash = 0; 237 for (i = 0, p = file; *p; i++) { 238 if (*p == '/') /* find rightmost slash */ 239 slash = p; 240 p += chartorune(&r, p); /* count runes */ 241 } 242 print("%s:%*s",file, nlen-i+1, ""); 243 } 244 fname = file; 245 if ((fd = open(file, OREAD)) < 0) { 246 print("cannot open\n"); 247 return; 248 } 249 filetype(fd); 250 close(fd); 251 } 252 253 void 254 filetype(int fd) 255 { 256 Rune r; 257 int i, f, n; 258 char *p, *eob; 259 260 free(mbuf); 261 mbuf = dirfstat(fd); 262 if(mbuf == nil){ 263 print("cannot stat: %r\n"); 264 return; 265 } 266 if(mbuf->mode & DMDIR) { 267 print(mime ? "text/directory\n" : "directory\n"); 268 return; 269 } 270 if(mbuf->type != 'M' && mbuf->type != '|') { 271 print(mime ? OCTET : "special file #%c/%s\n", 272 mbuf->type, mbuf->name); 273 return; 274 } 275 nbuf = read(fd, buf, sizeof(buf)-1); 276 277 if(nbuf < 0) { 278 print("cannot read\n"); 279 return; 280 } 281 if(nbuf == 0) { 282 print(mime ? PLAIN : "empty file\n"); 283 return; 284 } 285 buf[nbuf] = 0; 286 287 /* 288 * build histogram table 289 */ 290 memset(cfreq, 0, sizeof(cfreq)); 291 for (i = 0; language[i].name; i++) 292 language[i].count = 0; 293 eob = (char *)buf+nbuf; 294 for(n = 0, p = (char *)buf; p < eob; n++) { 295 if (!fullrune(p, eob-p) && eob-p < UTFmax) 296 break; 297 p += chartorune(&r, p); 298 if (r == 0) 299 f = Cnull; 300 else if (r <= 0x7f) { 301 if (!isprint(r) && !isspace(r)) 302 f = Ceascii; /* ASCII control char */ 303 else f = r; 304 } else if (r == 0x080) { 305 bump_utf_count(r); 306 f = Cutf; 307 } else if (r < 0xA0) 308 f = Cbinary; /* Invalid Runes */ 309 else if (r <= 0xff) 310 f = Clatin; /* Latin 1 */ 311 else { 312 bump_utf_count(r); 313 f = Cutf; /* UTF extension */ 314 } 315 cfreq[f]++; /* ASCII chars peg directly */ 316 } 317 /* 318 * gross classify 319 */ 320 if (cfreq[Cbinary]) 321 guess = Fbinary; 322 else if (cfreq[Cutf]) 323 guess = Futf; 324 else if (cfreq[Clatin]) 325 guess = Flatin; 326 else if (cfreq[Ceascii]) 327 guess = Feascii; 328 else if (cfreq[Cnull] == n) { 329 print(mime ? OCTET : "first block all null bytes\n"); 330 return; 331 } 332 else guess = Fascii; 333 /* 334 * lookup dictionary words 335 */ 336 memset(wfreq, 0, sizeof(wfreq)); 337 if(guess == Fascii || guess == Flatin || guess == Futf) 338 wordfreq(); 339 /* 340 * call individual classify routines 341 */ 342 for(i=0; call[i]; i++) 343 if((*call[i])()) 344 return; 345 346 /* 347 * if all else fails, 348 * print out gross classification 349 */ 350 if (nbuf < 100 && !mime) 351 print(mime ? PLAIN : "short "); 352 if (guess == Fascii) 353 print(mime ? PLAIN : "Ascii\n"); 354 else if (guess == Feascii) 355 print(mime ? PLAIN : "extended ascii\n"); 356 else if (guess == Flatin) 357 print(mime ? PLAIN : "latin ascii\n"); 358 else if (guess == Futf && utf_count() < 4) 359 print_utf(); 360 else print(mime ? OCTET : "binary\n"); 361 } 362 363 void 364 bump_utf_count(Rune r) 365 { 366 int low, high, mid; 367 368 high = sizeof(language)/sizeof(language[0])-1; 369 for (low = 0; low < high;) { 370 mid = (low+high)/2; 371 if (r >=language[mid].low) { 372 if (r <= language[mid].high) { 373 language[mid].count++; 374 break; 375 } else low = mid+1; 376 } else high = mid; 377 } 378 } 379 380 int 381 utf_count(void) 382 { 383 int i, count; 384 385 count = 0; 386 for (i = 0; language[i].name; i++) 387 if (language[i].count > 0) 388 switch (language[i].mode) { 389 case Normal: 390 case First: 391 count++; 392 break; 393 default: 394 break; 395 } 396 return count; 397 } 398 399 int 400 chkascii(void) 401 { 402 int i; 403 404 for (i = 'a'; i < 'z'; i++) 405 if (cfreq[i]) 406 return 1; 407 for (i = 'A'; i < 'Z'; i++) 408 if (cfreq[i]) 409 return 1; 410 return 0; 411 } 412 413 int 414 find_first(char *name) 415 { 416 int i; 417 418 for (i = 0; language[i].name != 0; i++) 419 if (language[i].mode == First 420 && strcmp(language[i].name, name) == 0) 421 return i; 422 return -1; 423 } 424 425 void 426 print_utf(void) 427 { 428 int i, printed, j; 429 430 if(mime){ 431 print(PLAIN); 432 return; 433 } 434 if (chkascii()) { 435 printed = 1; 436 print("Ascii"); 437 } else 438 printed = 0; 439 for (i = 0; language[i].name; i++) 440 if (language[i].count) { 441 switch(language[i].mode) { 442 case Multi: 443 j = find_first(language[i].name); 444 if (j < 0) 445 break; 446 if (language[j].count > 0) 447 break; 448 /* Fall through */ 449 case Normal: 450 case First: 451 if (printed) 452 print(" & "); 453 else printed = 1; 454 print("%s", language[i].name); 455 break; 456 case Shared: 457 default: 458 break; 459 } 460 } 461 if(!printed) 462 print("UTF"); 463 print(" text\n"); 464 } 465 466 void 467 wordfreq(void) 468 { 469 int low, high, mid, r; 470 uchar *p, *p2, c; 471 472 p = buf; 473 for(;;) { 474 while (p < buf+nbuf && !isalpha(*p)) 475 p++; 476 if (p >= buf+nbuf) 477 return; 478 p2 = p; 479 while(p < buf+nbuf && isalpha(*p)) 480 p++; 481 c = *p; 482 *p = 0; 483 high = sizeof(dict)/sizeof(dict[0]); 484 for(low = 0;low < high;) { 485 mid = (low+high)/2; 486 r = strcmp(dict[mid].word, (char*)p2); 487 if(r == 0) { 488 wfreq[dict[mid].class]++; 489 break; 490 } 491 if(r < 0) 492 low = mid+1; 493 else 494 high = mid; 495 } 496 *p++ = c; 497 } 498 } 499 500 typedef struct Filemagic Filemagic; 501 struct Filemagic { 502 ulong x; 503 ulong mask; 504 char *desc; 505 char *mime; 506 }; 507 508 Filemagic long0tab[] = { 509 0xF16DF16D, 0xFFFFFFFF, "pac1 audio file\n", OCTET, 510 0x31636170, 0xFFFFFFFF, "pac3 audio file\n", OCTET, 511 0x32636170, 0xFFFF00FF, "pac4 audio file\n", OCTET, 512 0xBA010000, 0xFFFFFFFF, "mpeg system stream\n", OCTET, 513 0x30800CC0, 0xFFFFFFFF, "inferno .dis executable\n", OCTET, 514 0x04034B50, 0xFFFFFFFF, "zip archive\n", OCTET, 515 070707, 0xFFFF, "cpio archive\n", OCTET, 516 0x2F7, 0xFFFF, "tex dvi\n", OCTET, 517 }; 518 519 int 520 filemagic(Filemagic *tab, int ntab, ulong x) 521 { 522 int i; 523 524 for(i=0; i<ntab; i++) 525 if((x&tab[i].mask) == tab[i].x){ 526 print(mime ? tab[i].mime : tab[i].desc); 527 return 1; 528 } 529 return 0; 530 } 531 532 int 533 long0(void) 534 { 535 Fhdr f; 536 long x; 537 538 seek(fd, 0, 0); /* reposition to start of file */ 539 if(crackhdr(fd, &f)) { 540 print(mime ? OCTET : "%s\n", f.name); 541 return 1; 542 } 543 x = LENDIAN(buf); 544 if(filemagic(long0tab, nelem(long0tab), x)) 545 return 1; 546 return 0; 547 } 548 549 /* 550 * initial words to classify file 551 */ 552 struct FILE_STRING 553 { 554 char *key; 555 char *filetype; 556 int length; 557 char *mime; 558 } file_string[] = 559 { 560 "!<arch>\n__.SYMDEF", "archive random library", 16, "application/octet-stream", 561 "!<arch>\n", "archive", 8, "application/octet-stream", 562 "070707", "cpio archive - ascii header", 6, "application/octet-stream", 563 "#!/bin/rc", "rc executable file", 9, "text/plain", 564 "#!/bin/sh", "sh executable file", 9, "text/plain", 565 "%!", "postscript", 2, "application/postscript", 566 "\004%!", "postscript", 3, "application/postscript", 567 "x T post", "troff output for post", 8, "application/troff", 568 "x T Latin1", "troff output for Latin1", 10, "application/troff", 569 "x T utf", "troff output for UTF", 7, "application/troff", 570 "x T 202", "troff output for 202", 7, "application/troff", 571 "x T aps", "troff output for aps", 7, "application/troff", 572 "GIF", "GIF image", 3, "image/gif", 573 "\0PC Research, Inc\0", "ghostscript fax file", 18, "application/ghostscript", 574 "%PDF", "PDF", 4, "application/pdf", 575 "<html>\n", "HTML file", 7, "text/html", 576 "<HTML>\n", "HTML file", 7, "text/html", 577 "compressed\n", "Compressed image or subfont", 11, "application/octet-stream", 578 "\111\111\052\000", "tiff", 4, "image/tiff", 579 "\115\115\000\052", "tiff", 4, "image/tiff", 580 "\377\330\377\340", "jpeg", 4, "image/jpeg", 581 "\377\330\377\341", "jpeg", 4, "image/jpeg", 582 "\377\330\377\333", "jpeg", 4, "image/jpeg", 583 "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/octet-stream", 584 0,0,0,0 585 }; 586 587 int 588 istring(void) 589 { 590 int i; 591 struct FILE_STRING *p; 592 593 for(p = file_string; p->key; p++) { 594 if(nbuf >= p->length && !memcmp(buf, p->key, p->length)) { 595 if(mime) 596 print("%s\n", p->mime); 597 else 598 print("%s\n", p->filetype); 599 return 1; 600 } 601 } 602 if(strncmp((char*)buf, "TYPE=", 5) == 0) { /* td */ 603 for(i = 5; i < nbuf; i++) 604 if(buf[i] == '\n') 605 break; 606 if(mime) 607 print(OCTET); 608 else 609 print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5); 610 return 1; 611 } 612 return 0; 613 } 614 615 char* html_string[] = 616 { 617 "title", 618 "body", 619 "head", 620 "strong", 621 "h1", 622 "h2", 623 "h3", 624 "h4", 625 "h5", 626 "h6", 627 "ul", 628 "li", 629 "dl", 630 "br", 631 "em", 632 0, 633 }; 634 635 int 636 ishtml(void) 637 { 638 uchar *p, *q; 639 int i, count; 640 641 /* compare strings between '<' and '>' to html table */ 642 count = 0; 643 p = buf; 644 for(;;) { 645 while (p < buf+nbuf && *p != '<') 646 p++; 647 p++; 648 if (p >= buf+nbuf) 649 break; 650 if(*p == '/') 651 p++; 652 q = p; 653 while(p < buf+nbuf && *p != '>') 654 p++; 655 if (p >= buf+nbuf) 656 break; 657 for(i = 0; html_string[i]; i++) { 658 if(cistrncmp(html_string[i], (char*)q, p-q) == 0) { 659 if(count++ > 4) { 660 print(mime ? "text/html\n" : "HTML file\n"); 661 return 1; 662 } 663 break; 664 } 665 } 666 p++; 667 } 668 return 0; 669 } 670 671 char* rfc822_string[] = 672 { 673 "from:", 674 "date:", 675 "to:", 676 "subject:", 677 "received:", 678 "reply to:", 679 "sender:", 680 0, 681 }; 682 683 int 684 isrfc822(void) 685 { 686 687 char *p, *q, *r; 688 int i, count; 689 690 count = 0; 691 p = (char*)buf; 692 for(;;) { 693 q = strchr(p, '\n'); 694 if(q == nil) 695 break; 696 *q = 0; 697 if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){ 698 count++; 699 *q = '\n'; 700 p = q+1; 701 continue; 702 } 703 *q = '\n'; 704 if(*p != '\t' && *p != ' '){ 705 r = strchr(p, ':'); 706 if(r == 0 || r > q) 707 break; 708 for(i = 0; rfc822_string[i]; i++) { 709 if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){ 710 count++; 711 break; 712 } 713 } 714 } 715 p = q+1; 716 } 717 if(count >= 3){ 718 print(mime ? "message/rfc822\n" : "email file\n"); 719 return 1; 720 } 721 return 0; 722 } 723 724 int 725 ismbox(void) 726 { 727 char *p, *q; 728 729 p = (char*)buf; 730 q = strchr(p, '\n'); 731 if(q == nil) 732 return 0; 733 *q = 0; 734 if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){ 735 print(mime ? "text/plain\n" : "mail box\n"); 736 return 1; 737 } 738 *q = '\n'; 739 return 0; 740 } 741 742 int 743 iscint(void) 744 { 745 int type; 746 char *name; 747 Biobuf b; 748 749 if(Binit(&b, fd, OREAD) == Beof) 750 return 0; 751 seek(fd, 0, 0); 752 type = objtype(&b, &name); 753 if(type < 0) 754 return 0; 755 if(mime) 756 print(OCTET); 757 else 758 print("%s intermediate\n", name); 759 return 1; 760 } 761 762 int 763 isc(void) 764 { 765 int n; 766 767 n = wfreq[I1]; 768 /* 769 * includes 770 */ 771 if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n) 772 goto yes; 773 if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n) 774 goto yes; 775 /* 776 * declarations 777 */ 778 if(wfreq[Cword] >= 5 && cfreq[';'] >= 5) 779 goto yes; 780 /* 781 * assignments 782 */ 783 if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1) 784 goto yes; 785 return 0; 786 787 yes: 788 if(mime){ 789 print(PLAIN); 790 return 1; 791 } 792 if(wfreq[Alword] > 0) 793 print("alef program\n"); 794 else 795 print("c program\n"); 796 return 1; 797 } 798 799 int 800 islimbo(void) 801 { 802 803 /* 804 * includes 805 */ 806 if(wfreq[Lword] < 4) 807 return 0; 808 print(mime ? PLAIN : "limbo program\n"); 809 return 1; 810 } 811 812 int 813 isas(void) 814 { 815 816 /* 817 * includes 818 */ 819 if(wfreq[Aword] < 2) 820 return 0; 821 print(mime ? PLAIN : "as program\n"); 822 return 1; 823 } 824 825 /* 826 * low entropy means encrypted 827 */ 828 int 829 ismung(void) 830 { 831 int i, bucket[8]; 832 float cs; 833 834 if(nbuf < 64) 835 return 0; 836 memset(bucket, 0, sizeof(bucket)); 837 for(i=0; i<64; i++) 838 bucket[(buf[i]>>5)&07] += 1; 839 840 cs = 0.; 841 for(i=0; i<8; i++) 842 cs += (bucket[i]-8)*(bucket[i]-8); 843 cs /= 8.; 844 if(cs <= 24.322) { 845 if(buf[0]==0x1f && (buf[1]==0x8b || buf[1]==0x9d)) 846 print(mime ? OCTET : "compressed\n"); 847 else 848 print(mime ? OCTET : "encrypted\n"); 849 return 1; 850 } 851 return 0; 852 } 853 854 /* 855 * english by punctuation and frequencies 856 */ 857 int 858 isenglish(void) 859 { 860 int vow, comm, rare, badpun, punct; 861 char *p; 862 863 if(guess != Fascii && guess != Feascii) 864 return 0; 865 badpun = 0; 866 punct = 0; 867 for(p = (char *)buf; p < (char *)buf+nbuf-1; p++) 868 switch(*p) { 869 case '.': 870 case ',': 871 case ')': 872 case '%': 873 case ';': 874 case ':': 875 case '?': 876 punct++; 877 if(p[1] != ' ' && p[1] != '\n') 878 badpun++; 879 } 880 if(badpun*5 > punct) 881 return 0; 882 if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e']) /* shell file test */ 883 return 0; 884 if(2*cfreq[';'] > cfreq['e']) 885 return 0; 886 887 vow = 0; 888 for(p="AEIOU"; *p; p++) { 889 vow += cfreq[*p]; 890 vow += cfreq[tolower(*p)]; 891 } 892 comm = 0; 893 for(p="ETAION"; *p; p++) { 894 comm += cfreq[*p]; 895 comm += cfreq[tolower(*p)]; 896 } 897 rare = 0; 898 for(p="VJKQXZ"; *p; p++) { 899 rare += cfreq[*p]; 900 rare += cfreq[tolower(*p)]; 901 } 902 if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) { 903 print(mime ? PLAIN : "English text\n"); 904 return 1; 905 } 906 return 0; 907 } 908 909 /* 910 * pick up a number with 911 * syntax _*[0-9]+_ 912 */ 913 #define P9BITLEN 12 914 int 915 p9bitnum(uchar *bp) 916 { 917 int n, c, len; 918 919 len = P9BITLEN; 920 while(*bp == ' ') { 921 bp++; 922 len--; 923 if(len <= 0) 924 return -1; 925 } 926 n = 0; 927 while(len > 1) { 928 c = *bp++; 929 if(!isdigit(c)) 930 return -1; 931 n = n*10 + c-'0'; 932 len--; 933 } 934 if(*bp != ' ') 935 return -1; 936 return n; 937 } 938 939 int 940 depthof(char *s, int *newp) 941 { 942 char *es; 943 int d; 944 945 *newp = 0; 946 es = s+12; 947 while(s<es && *s==' ') 948 s++; 949 if(s == es) 950 return -1; 951 if('0'<=*s && *s<='9') 952 return 1<<atoi(s); 953 954 *newp = 1; 955 d = 0; 956 while(s<es && *s!=' '){ 957 s++; /* skip letter */ 958 d += strtoul(s, &s, 10); 959 } 960 961 switch(d){ 962 case 32: 963 case 24: 964 case 16: 965 case 8: 966 return d; 967 } 968 return -1; 969 } 970 971 int 972 isp9bit(void) 973 { 974 int dep, lox, loy, hix, hiy, px, new; 975 ulong t; 976 long len; 977 char *newlabel; 978 979 newlabel = "old "; 980 981 dep = depthof((char*)buf + 0*P9BITLEN, &new); 982 if(new) 983 newlabel = ""; 984 lox = p9bitnum(buf + 1*P9BITLEN); 985 loy = p9bitnum(buf + 2*P9BITLEN); 986 hix = p9bitnum(buf + 3*P9BITLEN); 987 hiy = p9bitnum(buf + 4*P9BITLEN); 988 if(dep < 0 || lox < 0 || loy < 0 || hix < 0 || hiy < 0) 989 return 0; 990 991 if(dep < 8){ 992 px = 8/dep; /* pixels per byte */ 993 /* set l to number of bytes of data per scan line */ 994 if(lox >= 0) 995 len = (hix+px-1)/px - lox/px; 996 else{ /* make positive before divide */ 997 t = (-lox)+px-1; 998 t = (t/px)*px; 999 len = (t+hix+px-1)/px; 1000 } 1001 }else 1002 len = (hix-lox)*dep/8; 1003 len *= (hiy-loy); /* col length */ 1004 len += 5*P9BITLEN; /* size of initial ascii */ 1005 1006 /* 1007 * for image file, length is non-zero and must match calculation above 1008 * for /dev/window and /dev/screen the length is always zero 1009 * for subfont, the subfont header should follow immediately. 1010 */ 1011 if (len != 0 && mbuf->length == 0) { 1012 print("%splan 9 image\n", newlabel); 1013 return 1; 1014 } 1015 if (mbuf->length == len) { 1016 print("%splan 9 image\n", newlabel); 1017 return 1; 1018 } 1019 /* Ghostscript sometimes produces a little extra on the end */ 1020 if (mbuf->length < len+P9BITLEN) { 1021 print("%splan 9 image\n", newlabel); 1022 return 1; 1023 } 1024 if (p9subfont(buf+len)) { 1025 print("%ssubfont file\n", newlabel); 1026 return 1; 1027 } 1028 return 0; 1029 } 1030 1031 int 1032 p9subfont(uchar *p) 1033 { 1034 int n, h, a; 1035 1036 /* if image too big, assume it's a subfont */ 1037 if (p+3*P9BITLEN > buf+sizeof(buf)) 1038 return 1; 1039 1040 n = p9bitnum(p + 0*P9BITLEN); /* char count */ 1041 if (n < 0) 1042 return 0; 1043 h = p9bitnum(p + 1*P9BITLEN); /* height */ 1044 if (h < 0) 1045 return 0; 1046 a = p9bitnum(p + 2*P9BITLEN); /* ascent */ 1047 if (a < 0) 1048 return 0; 1049 return 1; 1050 } 1051 1052 #define WHITESPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') 1053 1054 int 1055 isp9font(void) 1056 { 1057 uchar *cp, *p; 1058 int i, n; 1059 char pathname[1024]; 1060 1061 cp = buf; 1062 if (!getfontnum(cp, &cp)) /* height */ 1063 return 0; 1064 if (!getfontnum(cp, &cp)) /* ascent */ 1065 return 0; 1066 for (i = 0; 1; i++) { 1067 if (!getfontnum(cp, &cp)) /* min */ 1068 break; 1069 if (!getfontnum(cp, &cp)) /* max */ 1070 return 0; 1071 while (WHITESPACE(*cp)) 1072 cp++; 1073 for (p = cp; *cp && !WHITESPACE(*cp); cp++) 1074 ; 1075 /* construct a path name, if needed */ 1076 n = 0; 1077 if (*p != '/' && slash) { 1078 n = slash-fname+1; 1079 if (n < sizeof(pathname)) 1080 memcpy(pathname, fname, n); 1081 else n = 0; 1082 } 1083 if (n+cp-p < sizeof(pathname)) { 1084 memcpy(pathname+n, p, cp-p); 1085 n += cp-p; 1086 pathname[n] = 0; 1087 if (access(pathname, AEXIST) < 0) 1088 return 0; 1089 } 1090 } 1091 if (i) { 1092 print("font file\n"); 1093 return 1; 1094 } 1095 return 0; 1096 } 1097 1098 int 1099 getfontnum(uchar *cp, uchar **rp) 1100 { 1101 while (WHITESPACE(*cp)) /* extract ulong delimited by whitespace */ 1102 cp++; 1103 if (*cp < '0' || *cp > '9') 1104 return 0; 1105 strtoul((char *)cp, (char **)rp, 0); 1106 if (!WHITESPACE(**rp)) 1107 return 0; 1108 return 1; 1109 } 1110 1111 int 1112 ishp(void) 1113 { 1114 if (strncmp("\033%-12345X", (char *)buf, 9)==0) { 1115 print("HPJCL file\n"); 1116 return 1; 1117 } 1118 return 0; 1119 } 1120