1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 #include <ctype.h> 5 #include <mach.h> 6 7 /* 8 * file - determine type of file 9 */ 10 #define LENDIAN(p) ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24)) 11 12 uchar buf[6001]; 13 short cfreq[140]; 14 short wfreq[50]; 15 int nbuf; 16 Dir* mbuf; 17 int fd; 18 char *fname; 19 char *slash; 20 21 enum 22 { 23 Cword, 24 Fword, 25 Aword, 26 Alword, 27 Lword, 28 I1, 29 I2, 30 I3, 31 Clatin = 128, 32 Cbinary, 33 Cnull, 34 Ceascii, 35 Cutf, 36 }; 37 struct 38 { 39 char* word; 40 int class; 41 } dict[] = 42 { 43 "PATH", Lword, 44 "TEXT", Aword, 45 "adt", Alword, 46 "aggr", Alword, 47 "alef", Alword, 48 "array", Lword, 49 "block", Fword, 50 "chan", Alword, 51 "char", Cword, 52 "common", Fword, 53 "con", Lword, 54 "data", Fword, 55 "dimension", Fword, 56 "double", Cword, 57 "extern", Cword, 58 "bio", I2, 59 "float", Cword, 60 "fn", Lword, 61 "function", Fword, 62 "h", I3, 63 "implement", Lword, 64 "import", Lword, 65 "include", I1, 66 "int", Cword, 67 "integer", Fword, 68 "iota", Lword, 69 "libc", I2, 70 "long", Cword, 71 "module", Lword, 72 "real", Fword, 73 "ref", Lword, 74 "register", Cword, 75 "self", Lword, 76 "short", Cword, 77 "static", Cword, 78 "stdio", I2, 79 "struct", Cword, 80 "subroutine", Fword, 81 "u", I2, 82 "void", Cword, 83 }; 84 85 /* codes for 'mode' field in language structure */ 86 enum { 87 Normal = 0, 88 First, /* first entry for language spanning several ranges */ 89 Multi, /* later entries " " " ... */ 90 Shared, /* codes used in several languages */ 91 }; 92 93 struct 94 { 95 int mode; /* see enum above */ 96 int count; 97 int low; 98 int high; 99 char *name; 100 101 } language[] = 102 { 103 Normal, 0, 0x0080, 0x0080, "Extended Latin", 104 Normal, 0, 0x0100, 0x01FF, "Extended Latin", 105 Normal, 0, 0x0370, 0x03FF, "Greek", 106 Normal, 0, 0x0400, 0x04FF, "Cyrillic", 107 Normal, 0, 0x0530, 0x058F, "Armenian", 108 Normal, 0, 0x0590, 0x05FF, "Hebrew", 109 Normal, 0, 0x0600, 0x06FF, "Arabic", 110 Normal, 0, 0x0900, 0x097F, "Devanagari", 111 Normal, 0, 0x0980, 0x09FF, "Bengali", 112 Normal, 0, 0x0A00, 0x0A7F, "Gurmukhi", 113 Normal, 0, 0x0A80, 0x0AFF, "Gujarati", 114 Normal, 0, 0x0B00, 0x0B7F, "Oriya", 115 Normal, 0, 0x0B80, 0x0BFF, "Tamil", 116 Normal, 0, 0x0C00, 0x0C7F, "Telugu", 117 Normal, 0, 0x0C80, 0x0CFF, "Kannada", 118 Normal, 0, 0x0D00, 0x0D7F, "Malayalam", 119 Normal, 0, 0x0E00, 0x0E7F, "Thai", 120 Normal, 0, 0x0E80, 0x0EFF, "Lao", 121 Normal, 0, 0x1000, 0x105F, "Tibetan", 122 Normal, 0, 0x10A0, 0x10FF, "Georgian", 123 Normal, 0, 0x3040, 0x30FF, "Japanese", 124 Normal, 0, 0x3100, 0x312F, "Chinese", 125 First, 0, 0x3130, 0x318F, "Korean", 126 Multi, 0, 0x3400, 0x3D2F, "Korean", 127 Shared, 0, 0x4e00, 0x9fff, "CJK", 128 Normal, 0, 0, 0, 0, /* terminal entry */ 129 }; 130 131 132 enum 133 { 134 Fascii, /* printable ascii */ 135 Flatin, /* latin 1*/ 136 Futf, /* UTf character set */ 137 Fbinary, /* binary */ 138 Feascii, /* ASCII with control chars */ 139 Fnull, /* NULL in file */ 140 } guess; 141 142 void bump_utf_count(Rune); 143 int cistrncmp(char*, char*, int); 144 void filetype(int); 145 int getfontnum(uchar*, uchar**); 146 int isas(void); 147 int isc(void); 148 int iscint(void); 149 int isenglish(void); 150 int ishp(void); 151 int ishtml(void); 152 int isrfc822(void); 153 int islimbo(void); 154 int ismung(void); 155 int isp9bit(void); 156 int isp9font(void); 157 int istring(void); 158 int long0(void); 159 int p9bitnum(uchar*); 160 int p9subfont(uchar*); 161 void print_utf(void); 162 void type(char*, int); 163 int utf_count(void); 164 void wordfreq(void); 165 166 int (*call[])(void) = 167 { 168 long0, /* recognizable by first 4 bytes */ 169 istring, /* recognizable by first string */ 170 ishtml, /* html keywords */ 171 isrfc822, /* email file */ 172 iscint, /* compiler/assembler intermediate */ 173 islimbo, /* limbo source */ 174 isc, /* c & alef compiler key words */ 175 isas, /* assembler key words */ 176 ismung, /* entropy compressed/encrypted */ 177 isp9font, /* plan 9 font */ 178 isp9bit, /* plan 9 image (as from /dev/window) */ 179 isenglish, /* char frequency English */ 180 ishp, /* HP Job Control Language - Postscript */ 181 0 182 }; 183 184 int mime; 185 186 #define OCTET "application/octet-stream\n" 187 #define PLAIN "text/plain\n" 188 189 void 190 main(int argc, char *argv[]) 191 { 192 int i, j, maxlen; 193 char *cp; 194 Rune r; 195 196 ARGBEGIN{ 197 case 'm': 198 mime = 1; 199 break; 200 default: 201 fprint(2, "usage: file [-m] [file...]\n"); 202 exits("usage"); 203 }ARGEND; 204 205 maxlen = 0; 206 if(mime == 0 || argc > 1){ 207 for(i = 0; i < argc; i++) { 208 for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp)) 209 ; 210 if(j > maxlen) 211 maxlen = j; 212 } 213 } 214 if (argc <= 0) { 215 if(!mime) 216 print ("stdin: "); 217 filetype(0); 218 } 219 else { 220 for(i = 0; i < argc; i++) 221 type(argv[i], maxlen); 222 } 223 exits(0); 224 } 225 226 void 227 type(char *file, int nlen) 228 { 229 Rune r; 230 int i; 231 char *p; 232 233 if(nlen > 0){ 234 slash = 0; 235 for (i = 0, p = file; *p; i++) { 236 if (*p == '/') /* find rightmost slash */ 237 slash = p; 238 p += chartorune(&r, p); /* count runes */ 239 } 240 print("%s:%*s",file, nlen-i+1, ""); 241 } 242 fname = file; 243 if ((fd = open(file, OREAD)) < 0) { 244 print("cannot open\n"); 245 return; 246 } 247 filetype(fd); 248 close(fd); 249 } 250 251 void 252 filetype(int fd) 253 { 254 Rune r; 255 int i, f, n; 256 char *p, *eob; 257 258 free(mbuf); 259 mbuf = dirfstat(fd); 260 if(mbuf == nil){ 261 print("cannot stat: %r\n"); 262 return; 263 } 264 if(mbuf->mode & DMDIR) { 265 print(mime ? "text/directory\n" : "directory\n"); 266 return; 267 } 268 if(mbuf->type != 'M' && mbuf->type != '|') { 269 print(mime ? OCTET : "special file #%c/%s\n", 270 mbuf->type, mbuf->name); 271 return; 272 } 273 nbuf = read(fd, buf, sizeof(buf)-1); 274 275 if(nbuf < 0) { 276 print("cannot read\n"); 277 return; 278 } 279 if(nbuf == 0) { 280 print(mime ? PLAIN : "empty file\n"); 281 return; 282 } 283 buf[nbuf] = 0; 284 285 /* 286 * build histogram table 287 */ 288 memset(cfreq, 0, sizeof(cfreq)); 289 for (i = 0; language[i].name; i++) 290 language[i].count = 0; 291 eob = (char *)buf+nbuf; 292 for(n = 0, p = (char *)buf; p < eob; n++) { 293 if (!fullrune(p, eob-p) && eob-p < UTFmax) 294 break; 295 p += chartorune(&r, p); 296 if (r == 0) 297 f = Cnull; 298 else if (r <= 0x7f) { 299 if (!isprint(r) && !isspace(r)) 300 f = Ceascii; /* ASCII control char */ 301 else f = r; 302 } else if (r == 0x080) { 303 bump_utf_count(r); 304 f = Cutf; 305 } else if (r < 0xA0) 306 f = Cbinary; /* Invalid Runes */ 307 else if (r <= 0xff) 308 f = Clatin; /* Latin 1 */ 309 else { 310 bump_utf_count(r); 311 f = Cutf; /* UTF extension */ 312 } 313 cfreq[f]++; /* ASCII chars peg directly */ 314 } 315 /* 316 * gross classify 317 */ 318 if (cfreq[Cbinary]) 319 guess = Fbinary; 320 else if (cfreq[Cutf]) 321 guess = Futf; 322 else if (cfreq[Clatin]) 323 guess = Flatin; 324 else if (cfreq[Ceascii]) 325 guess = Feascii; 326 else if (cfreq[Cnull] == n) { 327 print(mime ? OCTET : "first block all null bytes\n"); 328 return; 329 } 330 else guess = Fascii; 331 /* 332 * lookup dictionary words 333 */ 334 memset(wfreq, 0, sizeof(wfreq)); 335 if(guess == Fascii || guess == Flatin || guess == Futf) 336 wordfreq(); 337 /* 338 * call individual classify routines 339 */ 340 for(i=0; call[i]; i++) 341 if((*call[i])()) 342 return; 343 344 /* 345 * if all else fails, 346 * print out gross classification 347 */ 348 if (nbuf < 100 && !mime) 349 print(mime ? PLAIN : "short "); 350 if (guess == Fascii) 351 print(mime ? PLAIN : "Ascii\n"); 352 else if (guess == Feascii) 353 print(mime ? PLAIN : "extended ascii\n"); 354 else if (guess == Flatin) 355 print(mime ? PLAIN : "latin ascii\n"); 356 else if (guess == Futf && utf_count() < 4) 357 print_utf(); 358 else print(mime ? OCTET : "binary\n"); 359 } 360 361 void 362 bump_utf_count(Rune r) 363 { 364 int low, high, mid; 365 366 high = sizeof(language)/sizeof(language[0])-1; 367 for (low = 0; low < high;) { 368 mid = (low+high)/2; 369 if (r >=language[mid].low) { 370 if (r <= language[mid].high) { 371 language[mid].count++; 372 break; 373 } else low = mid+1; 374 } else high = mid; 375 } 376 } 377 378 int 379 utf_count(void) 380 { 381 int i, count; 382 383 count = 0; 384 for (i = 0; language[i].name; i++) 385 if (language[i].count > 0) 386 switch (language[i].mode) { 387 case Normal: 388 case First: 389 count++; 390 break; 391 default: 392 break; 393 } 394 return count; 395 } 396 397 int 398 chkascii(void) 399 { 400 int i; 401 402 for (i = 'a'; i < 'z'; i++) 403 if (cfreq[i]) 404 return 1; 405 for (i = 'A'; i < 'Z'; i++) 406 if (cfreq[i]) 407 return 1; 408 return 0; 409 } 410 411 int 412 find_first(char *name) 413 { 414 int i; 415 416 for (i = 0; language[i].name != 0; i++) 417 if (language[i].mode == First 418 && strcmp(language[i].name, name) == 0) 419 return i; 420 return -1; 421 } 422 423 void 424 print_utf(void) 425 { 426 int i, printed, j; 427 428 if(mime){ 429 print(PLAIN); 430 return; 431 } 432 if (chkascii()) { 433 printed = 1; 434 print("Ascii"); 435 } else 436 printed = 0; 437 for (i = 0; language[i].name; i++) 438 if (language[i].count) { 439 switch(language[i].mode) { 440 case Multi: 441 j = find_first(language[i].name); 442 if (j < 0) 443 break; 444 if (language[j].count > 0) 445 break; 446 /* Fall through */ 447 case Normal: 448 case First: 449 if (printed) 450 print(" & "); 451 else printed = 1; 452 print("%s", language[i].name); 453 break; 454 case Shared: 455 default: 456 break; 457 } 458 } 459 if(!printed) 460 print("UTF"); 461 print(" text\n"); 462 } 463 464 void 465 wordfreq(void) 466 { 467 int low, high, mid, r; 468 uchar *p, *p2, c; 469 470 p = buf; 471 for(;;) { 472 while (p < buf+nbuf && !isalpha(*p)) 473 p++; 474 if (p >= buf+nbuf) 475 return; 476 p2 = p; 477 while(p < buf+nbuf && isalpha(*p)) 478 p++; 479 c = *p; 480 *p = 0; 481 high = sizeof(dict)/sizeof(dict[0]); 482 for(low = 0;low < high;) { 483 mid = (low+high)/2; 484 r = strcmp(dict[mid].word, (char*)p2); 485 if(r == 0) { 486 wfreq[dict[mid].class]++; 487 break; 488 } 489 if(r < 0) 490 low = mid+1; 491 else 492 high = mid; 493 } 494 *p++ = c; 495 } 496 } 497 498 typedef struct Filemagic Filemagic; 499 struct Filemagic { 500 ulong x; 501 ulong mask; 502 char *desc; 503 char *mime; 504 }; 505 506 Filemagic long0tab[] = { 507 0xF16DF16D, 0xFFFFFFFF, "pac1 audio file\n", OCTET, 508 0x31636170, 0xFFFFFFFF, "pac3 audio file\n", OCTET, 509 0x32636170, 0xFFFF00FF, "pac4 audio file\n", OCTET, 510 0xBA010000, 0xFFFFFFFF, "mpeg system stream\n", OCTET, 511 0x30800CC0, 0xFFFFFFFF, "inferno .dis executable\n", OCTET, 512 0x04034B50, 0xFFFFFFFF, "zip archive\n", OCTET, 513 070707, 0xFFFF, "cpio archive\n", OCTET, 514 0x2F7, 0xFFFF, "tex dvi\n", OCTET, 515 }; 516 517 int 518 filemagic(Filemagic *tab, int ntab, ulong x) 519 { 520 int i; 521 522 for(i=0; i<ntab; i++) 523 if((x&tab[i].mask) == tab[i].x){ 524 print(mime ? tab[i].mime : tab[i].desc); 525 return 1; 526 } 527 return 0; 528 } 529 530 int 531 long0(void) 532 { 533 Fhdr f; 534 long x; 535 536 seek(fd, 0, 0); /* reposition to start of file */ 537 if(crackhdr(fd, &f)) { 538 print(mime ? OCTET : "%s\n", f.name); 539 return 1; 540 } 541 x = LENDIAN(buf); 542 if(filemagic(long0tab, nelem(long0tab), x)) 543 return 1; 544 return 0; 545 } 546 547 /* 548 * initial words to classify file 549 */ 550 struct FILE_STRING 551 { 552 char *key; 553 char *filetype; 554 int length; 555 char *mime; 556 } file_string[] = 557 { 558 "!<arch>\n__.SYMDEF", "archive random library", 16, "application/octet-stream", 559 "!<arch>\n", "archive", 8, "application/octet-stream", 560 "070707", "cpio archive - ascii header", 6, "application/octet-stream", 561 "#!/bin/rc", "rc executable file", 9, "text/plain", 562 "#!/bin/sh", "sh executable file", 9, "text/plain", 563 "%!", "postscript", 2, "application/postscript", 564 "\004%!", "postscript", 3, "application/postscript", 565 "x T post", "troff output for post", 8, "application/troff", 566 "x T Latin1", "troff output for Latin1", 10, "application/troff", 567 "x T utf", "troff output for UTF", 7, "application/troff", 568 "x T 202", "troff output for 202", 7, "application/troff", 569 "x T aps", "troff output for aps", 7, "application/troff", 570 "GIF", "GIF image", 3, "image/gif", 571 "\0PC Research, Inc\0", "ghostscript fax file", 18, "application/ghostscript", 572 "%PDF", "PDF", 4, "application/pdf", 573 "From ", "mail box", 5, "text/plain", 574 "<html>\n", "HTML file", 7, "text/html", 575 "<HTML>\n", "HTML file", 7, "text/html", 576 "compressed\n", "Compressed image or subfont", 11, "application/octet-stream", 577 "\111\111\052\000", "tiff", 4, "image/tiff", 578 "\115\115\000\052", "tiff", 4, "image/tiff", 579 "\377\330\377\340", "jpeg", 4, "image/jpeg", 580 "\377\330\377\341", "jpeg", 4, "image/jpeg", 581 "\377\330\377\333", "jpeg", 4, "image/jpeg", 582 "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/octet-stream", 583 0,0,0,0 584 }; 585 586 int 587 istring(void) 588 { 589 int i; 590 struct FILE_STRING *p; 591 592 for(p = file_string; p->key; p++) { 593 if(nbuf >= p->length && !memcmp(buf, p->key, p->length)) { 594 if(mime) 595 print("%s\n", p->mime); 596 else 597 print("%s\n", p->filetype); 598 return 1; 599 } 600 } 601 if(strncmp((char*)buf, "TYPE=", 5) == 0) { /* td */ 602 for(i = 5; i < nbuf; i++) 603 if(buf[i] == '\n') 604 break; 605 if(mime) 606 print(OCTET); 607 else 608 print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5); 609 return 1; 610 } 611 return 0; 612 } 613 614 char* html_string[] = 615 { 616 "title", 617 "body", 618 "head", 619 "strong", 620 "h1", 621 "h2", 622 "h3", 623 "h4", 624 "h5", 625 "h6", 626 "ul", 627 "li", 628 "dl", 629 "br", 630 "em", 631 0, 632 }; 633 634 int 635 ishtml(void) 636 { 637 uchar *p, *q; 638 int i, count; 639 640 /* compare strings between '<' and '>' to html table */ 641 count = 0; 642 p = buf; 643 for(;;) { 644 while (p < buf+nbuf && *p != '<') 645 p++; 646 p++; 647 if (p >= buf+nbuf) 648 break; 649 if(*p == '/') 650 p++; 651 q = p; 652 while(p < buf+nbuf && *p != '>') 653 p++; 654 if (p >= buf+nbuf) 655 break; 656 for(i = 0; html_string[i]; i++) { 657 if(cistrncmp(html_string[i], (char*)q, p-q) == 0) { 658 if(count++ > 4) { 659 print(mime ? "text/html\n" : "HTML file\n"); 660 return 1; 661 } 662 break; 663 } 664 } 665 p++; 666 } 667 return 0; 668 } 669 670 char* rfc822_string[] = 671 { 672 "from:", 673 "date:", 674 "to:", 675 "subject:", 676 "received:", 677 0, 678 }; 679 680 int 681 isrfc822(void) 682 { 683 684 char *p, *q, *r; 685 int i, count; 686 687 count = 0; 688 p = (char*)buf; 689 for(;;) { 690 q = strchr(p, '\n'); 691 if(q == nil) 692 break; 693 if(*p != '\t' && *p != ' '){ 694 r = strchr(p, ':'); 695 if(r == 0 || r > q) 696 break; 697 for(i = 0; rfc822_string[i]; i++) { 698 if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){ 699 count++; 700 break; 701 } 702 } 703 } 704 p = q+1; 705 } 706 if(count >= 3){ 707 print(mime ? "message/rfc822\n" : "email file\n"); 708 return 1; 709 } 710 return 0; 711 } 712 713 int 714 iscint(void) 715 { 716 int type; 717 char *name; 718 Biobuf b; 719 720 if(Binit(&b, fd, OREAD) == Beof) 721 return 0; 722 seek(fd, 0, 0); 723 type = objtype(&b, &name); 724 if(type < 0) 725 return 0; 726 if(mime) 727 print(OCTET); 728 else 729 print("%s intermediate\n", name); 730 return 1; 731 } 732 733 int 734 isc(void) 735 { 736 int n; 737 738 n = wfreq[I1]; 739 /* 740 * includes 741 */ 742 if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n) 743 goto yes; 744 if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n) 745 goto yes; 746 /* 747 * declarations 748 */ 749 if(wfreq[Cword] >= 5 && cfreq[';'] >= 5) 750 goto yes; 751 /* 752 * assignments 753 */ 754 if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1) 755 goto yes; 756 return 0; 757 758 yes: 759 if(mime){ 760 print(PLAIN); 761 return 1; 762 } 763 if(wfreq[Alword] > 0) 764 print("alef program\n"); 765 else 766 print("c program\n"); 767 return 1; 768 } 769 770 int 771 islimbo(void) 772 { 773 774 /* 775 * includes 776 */ 777 if(wfreq[Lword] < 4) 778 return 0; 779 print(mime ? PLAIN : "limbo program\n"); 780 return 1; 781 } 782 783 int 784 isas(void) 785 { 786 787 /* 788 * includes 789 */ 790 if(wfreq[Aword] < 2) 791 return 0; 792 print(mime ? PLAIN : "as program\n"); 793 return 1; 794 } 795 796 /* 797 * low entropy means encrypted 798 */ 799 int 800 ismung(void) 801 { 802 int i, bucket[8]; 803 float cs; 804 805 if(nbuf < 64) 806 return 0; 807 memset(bucket, 0, sizeof(bucket)); 808 for(i=0; i<64; i++) 809 bucket[(buf[i]>>5)&07] += 1; 810 811 cs = 0.; 812 for(i=0; i<8; i++) 813 cs += (bucket[i]-8)*(bucket[i]-8); 814 cs /= 8.; 815 if(cs <= 24.322) { 816 if(buf[0]==0x1f && (buf[1]==0x8b || buf[1]==0x9d)) 817 print(mime ? OCTET : "compressed\n"); 818 else 819 print(mime ? OCTET : "encrypted\n"); 820 return 1; 821 } 822 return 0; 823 } 824 825 /* 826 * english by punctuation and frequencies 827 */ 828 int 829 isenglish(void) 830 { 831 int vow, comm, rare, badpun, punct; 832 char *p; 833 834 if(guess != Fascii && guess != Feascii) 835 return 0; 836 badpun = 0; 837 punct = 0; 838 for(p = (char *)buf; p < (char *)buf+nbuf-1; p++) 839 switch(*p) { 840 case '.': 841 case ',': 842 case ')': 843 case '%': 844 case ';': 845 case ':': 846 case '?': 847 punct++; 848 if(p[1] != ' ' && p[1] != '\n') 849 badpun++; 850 } 851 if(badpun*5 > punct) 852 return 0; 853 if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e']) /* shell file test */ 854 return 0; 855 if(2*cfreq[';'] > cfreq['e']) 856 return 0; 857 858 vow = 0; 859 for(p="AEIOU"; *p; p++) { 860 vow += cfreq[*p]; 861 vow += cfreq[tolower(*p)]; 862 } 863 comm = 0; 864 for(p="ETAION"; *p; p++) { 865 comm += cfreq[*p]; 866 comm += cfreq[tolower(*p)]; 867 } 868 rare = 0; 869 for(p="VJKQXZ"; *p; p++) { 870 rare += cfreq[*p]; 871 rare += cfreq[tolower(*p)]; 872 } 873 if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) { 874 print(mime ? PLAIN : "English text\n"); 875 return 1; 876 } 877 return 0; 878 } 879 880 /* 881 * pick up a number with 882 * syntax _*[0-9]+_ 883 */ 884 #define P9BITLEN 12 885 int 886 p9bitnum(uchar *bp) 887 { 888 int n, c, len; 889 890 len = P9BITLEN; 891 while(*bp == ' ') { 892 bp++; 893 len--; 894 if(len <= 0) 895 return -1; 896 } 897 n = 0; 898 while(len > 1) { 899 c = *bp++; 900 if(!isdigit(c)) 901 return -1; 902 n = n*10 + c-'0'; 903 len--; 904 } 905 if(*bp != ' ') 906 return -1; 907 return n; 908 } 909 910 int 911 depthof(char *s, int *newp) 912 { 913 char *es; 914 int d; 915 916 *newp = 0; 917 es = s+12; 918 while(s<es && *s==' ') 919 s++; 920 if(s == es) 921 return -1; 922 if('0'<=*s && *s<='9') 923 return 1<<atoi(s); 924 925 *newp = 1; 926 d = 0; 927 while(s<es && *s!=' '){ 928 s++; /* skip letter */ 929 d += strtoul(s, &s, 10); 930 } 931 932 switch(d){ 933 case 32: 934 case 24: 935 case 16: 936 case 8: 937 return d; 938 } 939 return -1; 940 } 941 942 int 943 isp9bit(void) 944 { 945 int dep, lox, loy, hix, hiy, px, new; 946 ulong t; 947 long len; 948 char *newlabel; 949 950 newlabel = "old "; 951 952 dep = depthof((char*)buf + 0*P9BITLEN, &new); 953 if(new) 954 newlabel = ""; 955 lox = p9bitnum(buf + 1*P9BITLEN); 956 loy = p9bitnum(buf + 2*P9BITLEN); 957 hix = p9bitnum(buf + 3*P9BITLEN); 958 hiy = p9bitnum(buf + 4*P9BITLEN); 959 if(dep < 0 || lox < 0 || loy < 0 || hix < 0 || hiy < 0) 960 return 0; 961 962 if(dep < 8){ 963 px = 8/dep; /* pixels per byte */ 964 /* set l to number of bytes of data per scan line */ 965 if(lox >= 0) 966 len = (hix+px-1)/px - lox/px; 967 else{ /* make positive before divide */ 968 t = (-lox)+px-1; 969 t = (t/px)*px; 970 len = (t+hix+px-1)/px; 971 } 972 }else 973 len = (hix-lox)*dep/8; 974 len *= (hiy-loy); /* col length */ 975 len += 5*P9BITLEN; /* size of initial ascii */ 976 977 /* 978 * for image file, length is non-zero and must match calculation above 979 * for /dev/window and /dev/screen the length is always zero 980 * for subfont, the subfont header should follow immediately. 981 */ 982 if (len != 0 && mbuf->length == 0) { 983 print("%splan 9 image\n", newlabel); 984 return 1; 985 } 986 if (mbuf->length == len) { 987 print("%splan 9 image\n", newlabel); 988 return 1; 989 } 990 /* Ghostscript sometimes produces a little extra on the end */ 991 if (mbuf->length < len+P9BITLEN) { 992 print("%splan 9 image\n", newlabel); 993 return 1; 994 } 995 if (p9subfont(buf+len)) { 996 print("%ssubfont file\n", newlabel); 997 return 1; 998 } 999 return 0; 1000 } 1001 1002 int 1003 p9subfont(uchar *p) 1004 { 1005 int n, h, a; 1006 1007 /* if image too big, assume it's a subfont */ 1008 if (p+3*P9BITLEN > buf+sizeof(buf)) 1009 return 1; 1010 1011 n = p9bitnum(p + 0*P9BITLEN); /* char count */ 1012 if (n < 0) 1013 return 0; 1014 h = p9bitnum(p + 1*P9BITLEN); /* height */ 1015 if (h < 0) 1016 return 0; 1017 a = p9bitnum(p + 2*P9BITLEN); /* ascent */ 1018 if (a < 0) 1019 return 0; 1020 return 1; 1021 } 1022 1023 #define WHITESPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') 1024 1025 int 1026 isp9font(void) 1027 { 1028 uchar *cp, *p; 1029 int i, n; 1030 char pathname[1024]; 1031 1032 cp = buf; 1033 if (!getfontnum(cp, &cp)) /* height */ 1034 return 0; 1035 if (!getfontnum(cp, &cp)) /* ascent */ 1036 return 0; 1037 for (i = 0; 1; i++) { 1038 if (!getfontnum(cp, &cp)) /* min */ 1039 break; 1040 if (!getfontnum(cp, &cp)) /* max */ 1041 return 0; 1042 while (WHITESPACE(*cp)) 1043 cp++; 1044 for (p = cp; *cp && !WHITESPACE(*cp); cp++) 1045 ; 1046 /* construct a path name, if needed */ 1047 n = 0; 1048 if (*p != '/' && slash) { 1049 n = slash-fname+1; 1050 if (n < sizeof(pathname)) 1051 memcpy(pathname, fname, n); 1052 else n = 0; 1053 } 1054 if (n+cp-p < sizeof(pathname)) { 1055 memcpy(pathname+n, p, cp-p); 1056 n += cp-p; 1057 pathname[n] = 0; 1058 if (access(pathname, AEXIST) < 0) 1059 return 0; 1060 } 1061 } 1062 if (i) { 1063 print("font file\n"); 1064 return 1; 1065 } 1066 return 0; 1067 } 1068 1069 int 1070 getfontnum(uchar *cp, uchar **rp) 1071 { 1072 while (WHITESPACE(*cp)) /* extract ulong delimited by whitespace */ 1073 cp++; 1074 if (*cp < '0' || *cp > '9') 1075 return 0; 1076 strtoul((char *)cp, (char **)rp, 0); 1077 if (!WHITESPACE(**rp)) 1078 return 0; 1079 return 1; 1080 } 1081 1082 int 1083 ishp(void) 1084 { 1085 if (strncmp("\033%-12345X", (char *)buf, 9)==0) { 1086 print("HPJCL file\n"); 1087 return 1; 1088 } 1089 return 0; 1090 } 1091