1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 #include <ctype.h> 5 #include <mach.h> 6 7 /* 8 * file - determine type of file 9 */ 10 #define LENDIAN(p) ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24)) 11 12 uchar buf[6000]; 13 short cfreq[140]; 14 short wfreq[50]; 15 int nbuf; 16 Dir mbuf; 17 int fd; 18 char *fname; 19 char *slash; 20 21 enum 22 { 23 Cword, 24 Fword, 25 Aword, 26 Alword, 27 Lword, 28 I1, 29 I2, 30 I3, 31 Clatin = 128, 32 Cbinary, 33 Cnull, 34 Ceascii, 35 Cutf, 36 }; 37 struct 38 { 39 char* word; 40 int class; 41 } dict[] = 42 { 43 "PATH", Lword, 44 "TEXT", Aword, 45 "adt", Alword, 46 "aggr", Alword, 47 "alef", Alword, 48 "array", Lword, 49 "block", Fword, 50 "chan", Alword, 51 "char", Cword, 52 "common", Fword, 53 "con", Lword, 54 "data", Fword, 55 "dimension", Fword, 56 "double", Cword, 57 "extern", Cword, 58 "bio", I2, 59 "float", Cword, 60 "fn", Lword, 61 "function", Fword, 62 "h", I3, 63 "implement", Lword, 64 "import", Lword, 65 "include", I1, 66 "int", Cword, 67 "integer", Fword, 68 "iota", Lword, 69 "libc", I2, 70 "long", Cword, 71 "module", Lword, 72 "real", Fword, 73 "ref", Lword, 74 "register", Cword, 75 "self", Lword, 76 "short", Cword, 77 "static", Cword, 78 "stdio", I2, 79 "struct", Cword, 80 "subroutine", Fword, 81 "u", I2, 82 "void", Cword, 83 }; 84 85 /* codes for 'mode' field in language structure */ 86 enum { 87 Normal = 0, 88 First, /* first entry for language spanning several ranges */ 89 Multi, /* later entries " " " ... */ 90 Shared, /* codes used in several languages */ 91 }; 92 93 struct 94 { 95 int mode; /* see enum above */ 96 int count; 97 int low; 98 int high; 99 char *name; 100 101 } language[] = 102 { 103 Normal, 0, 0x0080, 0x0080, "Extended Latin", 104 Normal, 0, 0x0100, 0x01FF, "Extended Latin", 105 Normal, 0, 0x0370, 0x03FF, "Greek", 106 Normal, 0, 0x0400, 0x04FF, "Cyrillic", 107 Normal, 0, 0x0530, 0x058F, "Armenian", 108 Normal, 0, 0x0590, 0x05FF, "Hebrew", 109 Normal, 0, 0x0600, 0x06FF, "Arabic", 110 Normal, 0, 0x0900, 0x097F, "Devanagari", 111 Normal, 0, 0x0980, 0x09FF, "Bengali", 112 Normal, 0, 0x0A00, 0x0A7F, "Gurmukhi", 113 Normal, 0, 0x0A80, 0x0AFF, "Gujarati", 114 Normal, 0, 0x0B00, 0x0B7F, "Oriya", 115 Normal, 0, 0x0B80, 0x0BFF, "Tamil", 116 Normal, 0, 0x0C00, 0x0C7F, "Telugu", 117 Normal, 0, 0x0C80, 0x0CFF, "Kannada", 118 Normal, 0, 0x0D00, 0x0D7F, "Malayalam", 119 Normal, 0, 0x0E00, 0x0E7F, "Thai", 120 Normal, 0, 0x0E80, 0x0EFF, "Lao", 121 Normal, 0, 0x1000, 0x105F, "Tibetan", 122 Normal, 0, 0x10A0, 0x10FF, "Georgian", 123 Normal, 0, 0x3040, 0x30FF, "Japanese", 124 Normal, 0, 0x3100, 0x312F, "Chinese", 125 First, 0, 0x3130, 0x318F, "Korean", 126 Multi, 0, 0x3400, 0x3D2F, "Korean", 127 Shared, 0, 0x4e00, 0x9fff, "CJK", 128 Normal, 0, 0, 0, 0, /* terminal entry */ 129 }; 130 131 132 enum 133 { 134 Fascii, /* printable ascii */ 135 Flatin, /* latin 1*/ 136 Futf, /* UTf character set */ 137 Fbinary, /* binary */ 138 Feascii, /* ASCII with control chars */ 139 Fnull, /* NULL in file */ 140 } guess; 141 142 void bump_utf_count(Rune); 143 int cistrncmp(char*, char*, int); 144 void filetype(int); 145 int getfontnum(uchar*, uchar**); 146 int isas(void); 147 int isc(void); 148 int iscint(void); 149 int isenglish(void); 150 int ishp(void); 151 int ishtml(void); 152 int islimbo(void); 153 int ismung(void); 154 int isp9bit(void); 155 int isp9font(void); 156 int istring(void); 157 int long0(void); 158 int p9bitnum(uchar*); 159 int p9subfont(uchar*); 160 void print_utf(void); 161 int short0(void); 162 void type(char*, int); 163 int utf_count(void); 164 void wordfreq(void); 165 166 int (*call[])(void) = 167 { 168 long0, /* recognizable by first 4 bytes */ 169 short0, /* recognizable by first 2 bytes */ 170 istring, /* recognizable by first string */ 171 ishtml, /* html keywords */ 172 iscint, /* compiler/assembler intermediate */ 173 islimbo, /* limbo source */ 174 isc, /* c & alef compiler key words */ 175 isas, /* assembler key words */ 176 ismung, /* entropy compressed/encrypted */ 177 isp9font, /* plan 9 font */ 178 isp9bit, /* plan 9 image (as from /dev/window) */ 179 isenglish, /* char frequency English */ 180 ishp, /* HP Job Control Language - Postscript */ 181 0 182 }; 183 184 int mime; 185 186 #define OCTET "application/octet-stream\n" 187 #define PLAIN "text/plain\n" 188 189 void 190 main(int argc, char *argv[]) 191 { 192 int i, j, maxlen; 193 char *cp; 194 Rune r; 195 196 ARGBEGIN{ 197 case 'm': 198 mime = 1; 199 break; 200 default: 201 fprint(2, "usage: file [-m] [file...]\n"); 202 exits("usage"); 203 }ARGEND; 204 205 maxlen = 0; 206 if(mime == 0 || argc > 1){ 207 for(i = 0; i < argc; i++) { 208 for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp)) 209 ; 210 if(j > maxlen) 211 maxlen = j; 212 } 213 } 214 if (argc <= 0) { 215 if(!mime) 216 print ("stdin: "); 217 filetype(0); 218 } 219 else { 220 for(i = 0; i < argc; i++) 221 type(argv[i], maxlen); 222 } 223 exits(0); 224 } 225 226 void 227 type(char *file, int nlen) 228 { 229 Rune r; 230 int i; 231 char *p; 232 233 if(nlen > 0){ 234 slash = 0; 235 for (i = 0, p = file; *p; i++) { 236 if (*p == '/') /* find rightmost slash */ 237 slash = p; 238 p += chartorune(&r, p); /* count runes */ 239 } 240 print("%s:%*s",file, nlen-i+1, ""); 241 } 242 fname = file; 243 if ((fd = open(file, OREAD)) < 0) { 244 print("cannot open\n"); 245 return; 246 } 247 filetype(fd); 248 close(fd); 249 } 250 251 void 252 filetype(int fd) 253 { 254 Rune r; 255 int i, f, n; 256 char *p, *eob; 257 258 if(dirfstat(fd, &mbuf) < 0) { 259 print("cannot stat\n"); 260 return; 261 } 262 if(mbuf.mode & CHDIR) { 263 print(mime ? "text/directory\n" : "directory\n"); 264 return; 265 } 266 if(mbuf.type != 'M' && mbuf.type != '|') { 267 print(mime ? OCTET : "special file #%c/%s\n", 268 mbuf.type, mbuf.name); 269 return; 270 } 271 nbuf = read(fd, buf, sizeof(buf)); 272 273 if(nbuf < 0) { 274 print("cannot read\n"); 275 return; 276 } 277 if(nbuf == 0) { 278 print(mime ? PLAIN : "empty file\n"); 279 return; 280 } 281 282 /* 283 * build histogram table 284 */ 285 memset(cfreq, 0, sizeof(cfreq)); 286 for (i = 0; language[i].name; i++) 287 language[i].count = 0; 288 eob = (char *)buf+nbuf; 289 for(n = 0, p = (char *)buf; p < eob; n++) { 290 if (!fullrune(p, eob-p) && eob-p < UTFmax) 291 break; 292 p += chartorune(&r, p); 293 if (r == 0) 294 f = Cnull; 295 else if (r <= 0x7f) { 296 if (!isprint(r) && !isspace(r)) 297 f = Ceascii; /* ASCII control char */ 298 else f = r; 299 } else if (r == 0x080) { 300 bump_utf_count(r); 301 f = Cutf; 302 } else if (r < 0xA0) 303 f = Cbinary; /* Invalid Runes */ 304 else if (r <= 0xff) 305 f = Clatin; /* Latin 1 */ 306 else { 307 bump_utf_count(r); 308 f = Cutf; /* UTF extension */ 309 } 310 cfreq[f]++; /* ASCII chars peg directly */ 311 } 312 /* 313 * gross classify 314 */ 315 if (cfreq[Cbinary]) 316 guess = Fbinary; 317 else if (cfreq[Cutf]) 318 guess = Futf; 319 else if (cfreq[Clatin]) 320 guess = Flatin; 321 else if (cfreq[Ceascii]) 322 guess = Feascii; 323 else if (cfreq[Cnull] == n) { 324 print(mime ? OCTET : "all null bytes\n"); 325 return; 326 } 327 else guess = Fascii; 328 /* 329 * lookup dictionary words 330 */ 331 memset(wfreq, 0, sizeof(wfreq)); 332 if(guess == Fascii || guess == Flatin || guess == Futf) 333 wordfreq(); 334 /* 335 * call individual classify routines 336 */ 337 for(i=0; call[i]; i++) 338 if((*call[i])()) 339 return; 340 341 /* 342 * if all else fails, 343 * print out gross classification 344 */ 345 if (nbuf < 100) 346 print(mime ? PLAIN : "short "); 347 if (guess == Fascii) 348 print(mime ? PLAIN : "Ascii\n"); 349 else if (guess == Feascii) 350 print(mime ? PLAIN : "extended ascii\n"); 351 else if (guess == Flatin) 352 print(mime ? PLAIN : "latin ascii\n"); 353 else if (guess == Futf && utf_count() < 4) 354 print_utf(); 355 else print(mime ? OCTET : "binary\n"); 356 } 357 358 void 359 bump_utf_count(Rune r) 360 { 361 int low, high, mid; 362 363 high = sizeof(language)/sizeof(language[0])-1; 364 for (low = 0; low < high;) { 365 mid = (low+high)/2; 366 if (r >=language[mid].low) { 367 if (r <= language[mid].high) { 368 language[mid].count++; 369 break; 370 } else low = mid+1; 371 } else high = mid; 372 } 373 } 374 375 int 376 utf_count(void) 377 { 378 int i, count; 379 380 count = 0; 381 for (i = 0; language[i].name; i++) 382 if (language[i].count > 0) 383 switch (language[i].mode) { 384 case Normal: 385 case First: 386 count++; 387 break; 388 default: 389 break; 390 } 391 return count; 392 } 393 394 int 395 chkascii(void) 396 { 397 int i; 398 399 for (i = 'a'; i < 'z'; i++) 400 if (cfreq[i]) 401 return 1; 402 for (i = 'A'; i < 'Z'; i++) 403 if (cfreq[i]) 404 return 1; 405 return 0; 406 } 407 408 int 409 find_first(char *name) 410 { 411 int i; 412 413 for (i = 0; language[i].name != 0; i++) 414 if (language[i].mode == First 415 && strcmp(language[i].name, name) == 0) 416 return i; 417 return -1; 418 } 419 420 void 421 print_utf(void) 422 { 423 int i, printed, j; 424 425 if(mime){ 426 print(PLAIN); 427 return; 428 } 429 if (chkascii()) { 430 printed = 1; 431 print("Ascii"); 432 } else 433 printed = 0; 434 for (i = 0; language[i].name; i++) 435 if (language[i].count) { 436 switch(language[i].mode) { 437 case Multi: 438 j = find_first(language[i].name); 439 if (j < 0) 440 break; 441 if (language[j].count > 0) 442 break; 443 /* Fall through */ 444 case Normal: 445 case First: 446 if (printed) 447 print(" & "); 448 else printed = 1; 449 print("%s", language[i].name); 450 break; 451 case Shared: 452 default: 453 break; 454 } 455 } 456 if(!printed) 457 print("UTF"); 458 print(" text\n"); 459 } 460 461 void 462 wordfreq(void) 463 { 464 int low, high, mid, r; 465 uchar *p, *p2, c; 466 467 p = buf; 468 for(;;) { 469 while (p < buf+nbuf && !isalpha(*p)) 470 p++; 471 if (p >= buf+nbuf) 472 return; 473 p2 = p; 474 while(p < buf+nbuf && isalpha(*p)) 475 p++; 476 c = *p; 477 *p = 0; 478 high = sizeof(dict)/sizeof(dict[0]); 479 for(low = 0;low < high;) { 480 mid = (low+high)/2; 481 r = strcmp(dict[mid].word, (char*)p2); 482 if(r == 0) { 483 wfreq[dict[mid].class]++; 484 break; 485 } 486 if(r < 0) 487 low = mid+1; 488 else 489 high = mid; 490 } 491 *p++ = c; 492 } 493 } 494 495 int 496 long0(void) 497 { 498 Fhdr f; 499 long x; 500 501 seek(fd, 0, 0); /* reposition to start of file */ 502 if(crackhdr(fd, &f)) { 503 print(mime ? OCTET : "%s\n", f.name); 504 return 1; 505 } 506 x = LENDIAN(buf); 507 switch(x) { 508 case 0xf16df16d: 509 print(mime ? OCTET : "pac1 audio file\n"); 510 return 1; 511 case 0x31636170: 512 print(mime ? OCTET : "pac3 audio file\n"); 513 return 1; 514 case 0xba010000: 515 print(mime ? OCTET : "mpeg system stream\n"); 516 return 1; 517 case 0x30800cc0: 518 print(mime ? OCTET : "inferno .dis executable\n"); 519 return 1; 520 } 521 if(((x ^ 0x32636170) & 0xffff00ff) == 0) { 522 print(mime ? OCTET : "pac4 audio file\n"); 523 return 1; 524 } 525 return 0; 526 } 527 528 int 529 short0(void) 530 { 531 532 switch(LENDIAN(buf) & 0xffff) { 533 case 070707: 534 print(mime ? OCTET : "cpio archive\n"); 535 break; 536 537 case 0x02f7: 538 print(mime ? OCTET : "tex dvi\n"); 539 break; 540 default: 541 return 0; 542 } 543 return 1; 544 } 545 546 /* 547 * initial words to classify file 548 */ 549 struct FILE_STRING 550 { 551 char *key; 552 char *filetype; 553 int length; 554 char *mime; 555 } file_string[] = 556 { 557 "!<arch>\n__.SYMDEF", "archive random library", 16, "application/octet-stream", 558 "!<arch>\n", "archive", 8, "application/octet-stream", 559 "070707", "cpio archive - ascii header", 6, "application/octet-stream", 560 "#!/bin/rc", "rc executable file", 9, "text/plain", 561 "#!/bin/sh", "sh executable file", 9, "text/plain", 562 "%!", "postscript", 2, "application/postscript", 563 "\004%!", "postscript", 3, "application/postscript", 564 "x T post", "troff output for post", 8, "application/troff", 565 "x T Latin1", "troff output for Latin1", 10, "application/troff", 566 "x T utf", "troff output for UTF", 7, "application/troff", 567 "x T 202", "troff output for 202", 7, "application/troff", 568 "x T aps", "troff output for aps", 7, "application/troff", 569 "GIF", "GIF image", 3, "image/gif", 570 "\0PC Research, Inc\0", "ghostscript fax file", 18, "application/ghostscript", 571 "%PDF", "PDF", 4, "image/pdf", 572 "<html>\n", "HTML file", 7, "text/html", 573 "<HTML>\n", "HTML file", 7, "text/html", 574 "compressed\n", "Compressed image or subfont", 11, "application/octet-stream", 575 "\111\111\052\000", "tiff", 4, "image/tiff", 576 "\115\115\000\052", "tiff", 4, "image/tiff", 577 "\377\330\377\340", "jpeg", 4, "image/jpeg", 578 "\377\330\377\341", "jpeg", 4, "image/jpeg", 579 "\377\330\377\333", "jpeg", 4, "image/jpeg", 580 "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/octet-stream", 581 582 0,0,0,0 583 }; 584 585 int 586 istring(void) 587 { 588 int i; 589 struct FILE_STRING *p; 590 591 for(p = file_string; p->key; p++) { 592 if(nbuf >= p->length && !memcmp(buf, p->key, p->length)) { 593 if(mime) 594 print("%s\n", p->mime); 595 else 596 print("%s\n", p->filetype); 597 return 1; 598 } 599 } 600 if(strncmp((char*)buf, "TYPE=", 5) == 0) { /* td */ 601 for(i = 5; i < nbuf; i++) 602 if(buf[i] == '\n') 603 break; 604 if(mime) 605 print(OCTET); 606 else 607 print("%.*s picture\n", i-5, (char*)buf+5); 608 return 1; 609 } 610 return 0; 611 } 612 613 char* html_string[] = 614 { 615 "title", 616 "body", 617 "head", 618 "strong", 619 "h1", 620 "h2", 621 "h3", 622 "h4", 623 "h5", 624 "h6", 625 "ul", 626 "li", 627 "dl", 628 "br", 629 "em", 630 0, 631 }; 632 633 int 634 ishtml(void) 635 { 636 uchar *p, *q; 637 int i, count; 638 639 /* compare strings between '<' and '>' to html table */ 640 count = 0; 641 p = buf; 642 for(;;) { 643 while (p < buf+nbuf && *p != '<') 644 p++; 645 p++; 646 if (p >= buf+nbuf) 647 break; 648 if(*p == '/') 649 p++; 650 q = p; 651 while(p < buf+nbuf && *p != '>') 652 p++; 653 if (p >= buf+nbuf) 654 break; 655 for(i = 0; html_string[i]; i++) { 656 if(cistrncmp(html_string[i], (char*)q, p-q) == 0) { 657 if(count++ > 4) { 658 print(mime ? "text/html\n" : "HTML file\n"); 659 return 1; 660 } 661 break; 662 } 663 } 664 p++; 665 } 666 return 0; 667 } 668 669 /* 670 * case independent string compare 671 */ 672 int 673 cistrncmp(char *s1, char *s2, int n) 674 { 675 int c1, c2; 676 677 for(; n > 0; n--){ 678 c1 = *s1++; 679 c2 = *s2++; 680 if(isupper(c1)) 681 c1 = tolower(c1); 682 if(isupper(c2)) 683 c2 = tolower(c2); 684 if(c2 != c1) 685 break; 686 if(c1 == 0) 687 return 0; 688 } 689 return 1; 690 } 691 692 int 693 iscint(void) 694 { 695 int type; 696 char *name; 697 Biobuf b; 698 699 if(Binit(&b, fd, OREAD) == Beof) 700 return 0; 701 seek(fd, 0, 0); 702 type = objtype(&b, &name); 703 if(type < 0) 704 return 0; 705 if(mime) 706 print(OCTET); 707 else 708 print("%s intermediate\n", name); 709 return 1; 710 } 711 712 int 713 isc(void) 714 { 715 int n; 716 717 n = wfreq[I1]; 718 /* 719 * includes 720 */ 721 if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n) 722 goto yes; 723 if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n) 724 goto yes; 725 /* 726 * declarations 727 */ 728 if(wfreq[Cword] >= 5 && cfreq[';'] >= 5) 729 goto yes; 730 /* 731 * assignments 732 */ 733 if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1) 734 goto yes; 735 return 0; 736 737 yes: 738 if(mime){ 739 print(PLAIN); 740 return 1; 741 } 742 if(wfreq[Alword] > 0) 743 print("alef program\n"); 744 else 745 print("c program\n"); 746 return 1; 747 } 748 749 int 750 islimbo(void) 751 { 752 753 /* 754 * includes 755 */ 756 if(wfreq[Lword] < 4) 757 return 0; 758 print(mime ? PLAIN : "limbo program\n"); 759 return 1; 760 } 761 762 int 763 isas(void) 764 { 765 766 /* 767 * includes 768 */ 769 if(wfreq[Aword] < 2) 770 return 0; 771 print(mime ? PLAIN : "as program\n"); 772 return 1; 773 } 774 775 /* 776 * low entropy means encrypted 777 */ 778 int 779 ismung(void) 780 { 781 int i, bucket[8]; 782 float cs; 783 784 if(nbuf < 64) 785 return 0; 786 memset(bucket, 0, sizeof(bucket)); 787 for(i=0; i<64; i++) 788 bucket[(buf[i]>>5)&07] += 1; 789 790 cs = 0.; 791 for(i=0; i<8; i++) 792 cs += (bucket[i]-8)*(bucket[i]-8); 793 cs /= 8.; 794 if(cs <= 24.322) { 795 if(buf[0]==0x1f && (buf[1]==0x8b || buf[1]==0x9d)) 796 print(mime ? OCTET : "compressed\n"); 797 else 798 print(mime ? OCTET : "encrypted\n"); 799 return 1; 800 } 801 return 0; 802 } 803 804 /* 805 * english by punctuation and frequencies 806 */ 807 int 808 isenglish(void) 809 { 810 int vow, comm, rare, badpun, punct; 811 char *p; 812 813 if(guess != Fascii && guess != Feascii) 814 return 0; 815 badpun = 0; 816 punct = 0; 817 for(p = (char *)buf; p < (char *)buf+nbuf-1; p++) 818 switch(*p) { 819 case '.': 820 case ',': 821 case ')': 822 case '%': 823 case ';': 824 case ':': 825 case '?': 826 punct++; 827 if(p[1] != ' ' && p[1] != '\n') 828 badpun++; 829 } 830 if(badpun*5 > punct) 831 return 0; 832 if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e']) /* shell file test */ 833 return 0; 834 if(2*cfreq[';'] > cfreq['e']) 835 return 0; 836 837 vow = 0; 838 for(p="AEIOU"; *p; p++) { 839 vow += cfreq[*p]; 840 vow += cfreq[tolower(*p)]; 841 } 842 comm = 0; 843 for(p="ETAION"; *p; p++) { 844 comm += cfreq[*p]; 845 comm += cfreq[tolower(*p)]; 846 } 847 rare = 0; 848 for(p="VJKQXZ"; *p; p++) { 849 rare += cfreq[*p]; 850 rare += cfreq[tolower(*p)]; 851 } 852 if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) { 853 print(mime ? PLAIN : "English text\n"); 854 return 1; 855 } 856 return 0; 857 } 858 859 /* 860 * pick up a number with 861 * syntax _*[0-9]+_ 862 */ 863 #define P9BITLEN 12 864 int 865 p9bitnum(uchar *bp) 866 { 867 int n, c, len; 868 869 len = P9BITLEN; 870 while(*bp == ' ') { 871 bp++; 872 len--; 873 if(len <= 0) 874 return -1; 875 } 876 n = 0; 877 while(len > 1) { 878 c = *bp++; 879 if(!isdigit(c)) 880 return -1; 881 n = n*10 + c-'0'; 882 len--; 883 } 884 if(*bp != ' ') 885 return -1; 886 return n; 887 } 888 889 int 890 depthof(char *s, int *newp) 891 { 892 char *es; 893 int d; 894 895 *newp = 0; 896 es = s+12; 897 while(s<es && *s==' ') 898 s++; 899 if(s == es) 900 return -1; 901 if('0'<=*s && *s<='9') 902 return 1<<atoi(s); 903 904 *newp = 1; 905 d = 0; 906 while(s<es && *s!=' '){ 907 s++; /* skip letter */ 908 d += strtoul(s, &s, 10); 909 } 910 911 switch(d){ 912 case 32: 913 case 24: 914 case 16: 915 case 8: 916 return d; 917 } 918 return -1; 919 } 920 921 int 922 isp9bit(void) 923 { 924 int dep, lox, loy, hix, hiy, px, new; 925 ulong t; 926 long len; 927 char *newlabel; 928 929 newlabel = "old "; 930 931 dep = depthof((char*)buf + 0*P9BITLEN, &new); 932 if(new) 933 newlabel = ""; 934 lox = p9bitnum(buf + 1*P9BITLEN); 935 loy = p9bitnum(buf + 2*P9BITLEN); 936 hix = p9bitnum(buf + 3*P9BITLEN); 937 hiy = p9bitnum(buf + 4*P9BITLEN); 938 if(dep < 0 || lox < 0 || loy < 0 || hix < 0 || hiy < 0) 939 return 0; 940 941 if(dep < 8){ 942 px = 8/dep; /* pixels per byte */ 943 /* set l to number of bytes of data per scan line */ 944 if(lox >= 0) 945 len = (hix+px-1)/px - lox/px; 946 else{ /* make positive before divide */ 947 t = (-lox)+px-1; 948 t = (t/px)*px; 949 len = (t+hix+px-1)/px; 950 } 951 }else 952 len = (hix-lox)*dep/8; 953 len *= (hiy-loy); /* col length */ 954 len += 5*P9BITLEN; /* size of initial ascii */ 955 956 /* 957 * for image file, length is non-zero and must match calculation above 958 * for /dev/window and /dev/screen the length is always zero 959 * for subfont, the subfont header should follow immediately. 960 */ 961 if (len != 0 && mbuf.length == 0) { 962 print("%splan 9 image\n", newlabel); 963 return 1; 964 } 965 if (mbuf.length == len) { 966 print("%splan 9 image\n", newlabel); 967 return 1; 968 } 969 /* Ghostscript sometimes produces a little extra on the end */ 970 if (mbuf.length < len+P9BITLEN) { 971 print("%splan 9 image\n", newlabel); 972 return 1; 973 } 974 if (p9subfont(buf+len)) { 975 print("%ssubfont file\n", newlabel); 976 return 1; 977 } 978 return 0; 979 } 980 981 int 982 p9subfont(uchar *p) 983 { 984 int n, h, a; 985 986 /* if image too big, assume it's a subfont */ 987 if (p+3*P9BITLEN > buf+sizeof(buf)) 988 return 1; 989 990 n = p9bitnum(p + 0*P9BITLEN); /* char count */ 991 if (n < 0) 992 return 0; 993 h = p9bitnum(p + 1*P9BITLEN); /* height */ 994 if (h < 0) 995 return 0; 996 a = p9bitnum(p + 2*P9BITLEN); /* ascent */ 997 if (a < 0) 998 return 0; 999 return 1; 1000 } 1001 1002 #define WHITESPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') 1003 1004 int 1005 isp9font(void) 1006 { 1007 uchar *cp, *p; 1008 int i, n; 1009 char dbuf[DIRLEN]; 1010 char pathname[1024]; 1011 1012 cp = buf; 1013 if (!getfontnum(cp, &cp)) /* height */ 1014 return 0; 1015 if (!getfontnum(cp, &cp)) /* ascent */ 1016 return 0; 1017 for (i = 0; 1; i++) { 1018 if (!getfontnum(cp, &cp)) /* min */ 1019 break; 1020 if (!getfontnum(cp, &cp)) /* max */ 1021 return 0; 1022 while (WHITESPACE(*cp)) 1023 cp++; 1024 for (p = cp; *cp && !WHITESPACE(*cp); cp++) 1025 ; 1026 /* construct a path name, if needed */ 1027 n = 0; 1028 if (*p != '/' && slash) { 1029 n = slash-fname+1; 1030 if (n < sizeof(pathname)) 1031 memcpy(pathname, fname, n); 1032 else n = 0; 1033 } 1034 if (n+cp-p < sizeof(pathname)) { 1035 memcpy(pathname+n, p, cp-p); 1036 n += cp-p; 1037 pathname[n] = 0; 1038 if (stat(pathname, dbuf) < 0) 1039 return 0; 1040 } 1041 } 1042 if (i) { 1043 print("font file\n"); 1044 return 1; 1045 } 1046 return 0; 1047 } 1048 1049 int 1050 getfontnum(uchar *cp, uchar **rp) 1051 { 1052 while (WHITESPACE(*cp)) /* extract ulong delimited by whitespace */ 1053 cp++; 1054 if (*cp < '0' || *cp > '9') 1055 return 0; 1056 strtoul((char *)cp, (char **)rp, 0); 1057 if (!WHITESPACE(**rp)) 1058 return 0; 1059 return 1; 1060 } 1061 1062 int 1063 ishp(void) 1064 { 1065 if (strncmp("\033%-12345X", (char *)buf, 9)==0) { 1066 print("HPJCL file\n"); 1067 return 1; 1068 } 1069 return 0; 1070 } 1071