1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 5 /* 6 * Deroff command -- strip troff, eqn, and tbl sequences from 7 * a file. Has three flags argument, -w, to cause output one word per line 8 * rather than in the original format. 9 * -mm (or -ms) causes the corresponding macro's to be interpreted 10 * so that just sentences are output 11 * -ml also gets rid of lists. 12 * -i causes deroff to ignore .so and .nx commands. 13 * Deroff follows .so and .nx commands, removes contents of macro 14 * definitions, equations (both .EQ ... .EN and $...$), 15 * Tbl command sequences, and Troff backslash vconstructions. 16 * 17 * All input is through the C macro; the most recently read character is in c. 18 */ 19 20 #define C ((c = Bgetrune(infile)) < 0?\ 21 eof():\ 22 ((c == ldelim) && (filesp == files)?\ 23 skeqn():\ 24 (c == '\n'?\ 25 (linect++,c):\ 26 c))) 27 #define C1 ((c = Bgetrune(infile)) == Beof?\ 28 eof():\ 29 (c == '\n'?\ 30 (linect++,c):\ 31 c)) 32 #define SKIP while(C != '\n') 33 #define SKIP1 while(C1 != '\n') 34 #define SKIP_TO_COM SKIP;\ 35 SKIP;\ 36 pc=c;\ 37 while(C != '.' || pc != '\n' || C > 'Z')\ 38 pc=c 39 40 #define YES 1 41 #define NO 0 42 #define MS 0 43 #define MM 1 44 #define ONE 1 45 #define TWO 2 46 47 #define NOCHAR -2 48 #define EXTENDED -1 /* All runes above 0x7F */ 49 #define SPECIAL 0 50 #define APOS 1 51 #define PUNCT 2 52 #define DIGIT 3 53 #define LETTER 4 54 55 56 int linect = 0; 57 int wordflag= NO; 58 int msflag = NO; 59 int iflag = NO; 60 int mac = MM; 61 int disp = 0; 62 int inmacro = NO; 63 int intable = NO; 64 int eqnflag = 0; 65 66 #define MAX_ASCII 0X80 67 #define CHARCLASS(c) ((c) >= MAX_ASCII ? EXTENDED : chars[(c)]) 68 69 char chars[MAX_ASCII]; /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */ 70 71 Rune line[4096]; 72 Rune* lp; 73 74 long c; 75 long pc; 76 int ldelim = NOCHAR; 77 int rdelim = NOCHAR; 78 79 80 char** argv; 81 82 char fname[50]; 83 Biobuf* files[15]; 84 Biobuf**filesp; 85 Biobuf* infile; 86 char* devnull = "/dev/null"; 87 Biobuf *infile; 88 Biobuf bout; 89 90 long skeqn(void); 91 Biobuf* opn(char *p); 92 int eof(void); 93 void getfname(void); 94 void fatal(char *s, char *p); 95 void usage(void); 96 void work(void); 97 void putmac(Rune *rp, int vconst); 98 void regline(int macline, int vconst); 99 void putwords(void); 100 void comline(void); 101 void macro(void); 102 void eqn(void); 103 void tbl(void); 104 void stbl(void); 105 void sdis(char a1, char a2); 106 void sce(void); 107 void backsl(void); 108 char* copys(char *s); 109 void refer(int c1); 110 void inpic(void); 111 112 void 113 main(int argc, char *av[]) 114 { 115 int i; 116 char *f; 117 118 argv = av; 119 Binit(&bout, 1, OWRITE); 120 ARGBEGIN{ 121 case 'w': 122 wordflag = YES; 123 break; 124 case 'm': 125 msflag = YES; 126 if(f = ARGF()) 127 switch(*f) 128 { 129 case 'm': mac = MM; break; 130 case 's': mac = MS; break; 131 case 'l': disp = 1; break; 132 default: usage(); 133 } 134 else 135 usage(); 136 break; 137 case 'i': 138 iflag = YES; 139 break; 140 default: 141 usage(); 142 }ARGEND 143 if(*argv) 144 infile = opn(*argv++); 145 else{ 146 infile = malloc(sizeof(Biobuf)); 147 Binit(infile, 0, OREAD); 148 } 149 files[0] = infile; 150 filesp = &files[0]; 151 152 for(i='a'; i<='z' ; ++i) 153 chars[i] = LETTER; 154 for(i='A'; i<='Z'; ++i) 155 chars[i] = LETTER; 156 for(i='0'; i<='9'; ++i) 157 chars[i] = DIGIT; 158 chars['\''] = APOS; 159 chars['&'] = APOS; 160 chars['\b'] = APOS; 161 chars['.'] = PUNCT; 162 chars[','] = PUNCT; 163 chars[';'] = PUNCT; 164 chars['?'] = PUNCT; 165 chars[':'] = PUNCT; 166 work(); 167 } 168 169 long 170 skeqn(void) 171 { 172 while(C1 != rdelim) 173 if(c == '\\') 174 c = C1; 175 else if(c == '"') 176 while(C1 != '"') 177 if(c == '\\') 178 C1; 179 if (msflag) 180 eqnflag = 1; 181 return(c = ' '); 182 } 183 184 Biobuf* 185 opn(char *p) 186 { 187 Biobuf *fd; 188 189 while ((fd = Bopen(p, OREAD)) == 0) { 190 if(msflag || p == devnull) 191 fatal("Cannot open file %s - quitting\n", p); 192 else { 193 fprint(2, "Deroff: Cannot open file %s - continuing\n", p); 194 p = devnull; 195 } 196 } 197 linect = 0; 198 return(fd); 199 } 200 201 int 202 eof(void) 203 { 204 if(Bfildes(infile) != 0) 205 Bterm(infile); 206 if(filesp > files) 207 infile = *--filesp; 208 else 209 if(*argv) 210 infile = opn(*argv++); 211 else 212 exits(0); 213 return(C); 214 } 215 216 void 217 getfname(void) 218 { 219 char *p; 220 Rune r; 221 Dir dir; 222 struct chain 223 { 224 struct chain* nextp; 225 char* datap; 226 } *q; 227 228 static struct chain *namechain= 0; 229 230 while(C == ' ') 231 ; 232 for(p = fname; (r=c) != '\n' && r != ' ' && r != '\t' && r != '\\'; C) 233 p += runetochar(p, &r); 234 *p = '\0'; 235 while(c != '\n') 236 C; 237 if(!strcmp(fname, "/sys/lib/tmac/tmac.cs") 238 || !strcmp(fname, "/sys/lib/tmac/tmac.s")) { 239 fname[0] = '\0'; 240 return; 241 } 242 if(dirstat(fname, &dir) >= 0 && ((dir.mode & CHDIR) || dir.type != 'M')) { 243 fname[0] = '\0'; 244 return; 245 } 246 /* 247 * see if this name has already been used 248 */ 249 250 for(q = namechain; q; q = q->nextp) 251 if( !strcmp(fname, q->datap)) { 252 fname[0] = '\0'; 253 return; 254 } 255 q = (struct chain*)malloc(sizeof(struct chain)); 256 q->nextp = namechain; 257 q->datap = copys(fname); 258 namechain = q; 259 } 260 261 void 262 usage(void) 263 { 264 fprint(2,"usage: deroff [-nwpi] [-m (m s l)] [file ...] \n"); 265 exits("usage"); 266 } 267 268 void 269 fatal(char *s, char *p) 270 { 271 fprint(2, "deroff: "); 272 fprint(2, s, p); 273 exits(s); 274 } 275 276 void 277 work(void) 278 { 279 280 for(;;) { 281 eqnflag = 0; 282 if(C == '.' || c == '\'') 283 comline(); 284 else 285 regline(NO, TWO); 286 } 287 } 288 289 void 290 regline(int macline, int vconst) 291 { 292 line[0] = c; 293 lp = line; 294 for(;;) { 295 if(c == '\\') { 296 *lp = ' '; 297 backsl(); 298 if(c == '%') /* no blank for hyphenation char */ 299 lp--; 300 } 301 if(c == '\n') 302 break; 303 if(intable && c=='T') { 304 *++lp = C; 305 if(c=='{' || c=='}') { 306 lp[-1] = ' '; 307 *lp = C; 308 } 309 } else { 310 if(msflag == 1 && eqnflag == 1) { 311 eqnflag = 0; 312 *++lp = 'x'; 313 } 314 *++lp = C; 315 } 316 } 317 *lp = '\0'; 318 if(lp != line) { 319 if(wordflag) 320 putwords(); 321 else 322 if(macline) 323 putmac(line,vconst); 324 else 325 Bprint(&bout, "%S\n", line); 326 } 327 } 328 329 void 330 putmac(Rune *rp, int vconst) 331 { 332 Rune *t; 333 int found; 334 Rune last; 335 336 found = 0; 337 last = 0; 338 while(*rp) { 339 while(*rp == ' ' || *rp == '\t') 340 Bputrune(&bout, *rp++); 341 for(t = rp; *t != ' ' && *t != '\t' && *t != '\0'; t++) 342 ; 343 if(*rp == '\"') 344 rp++; 345 if(t > rp+vconst && CHARCLASS(*rp) == LETTER 346 && CHARCLASS(rp[1]) == LETTER) { 347 while(rp < t) 348 if(*rp == '\"') 349 rp++; 350 else 351 Bputrune(&bout, *rp++); 352 last = t[-1]; 353 found++; 354 } else 355 if(found && CHARCLASS(*rp) == PUNCT && rp[1] == '\0') 356 Bputrune(&bout, *rp++); 357 else { 358 last = t[-1]; 359 rp = t; 360 } 361 } 362 Bputc(&bout, '\n'); 363 if(msflag && CHARCLASS(last) == PUNCT) 364 Bprint(&bout, " %C\n", last); 365 } 366 367 /* 368 * break into words for -w option 369 */ 370 void 371 putwords(void) 372 { 373 Rune *p, *p1; 374 int i, nlet; 375 376 377 for(p1 = line;;) { 378 /* 379 * skip initial specials ampersands and apostrophes 380 */ 381 while((i = CHARCLASS(*p1)) != EXTENDED && i < DIGIT) 382 if(*p1++ == '\0') 383 return; 384 nlet = 0; 385 for(p = p1; (i = CHARCLASS(*p)) != SPECIAL; p++) 386 if(i == LETTER) 387 nlet++; 388 /* 389 * MDM definition of word 390 */ 391 if(nlet > 1) { 392 /* 393 * delete trailing ampersands and apostrophes 394 */ 395 while(*--p == '\'' || *p == '&' 396 || CHARCLASS(*p) == PUNCT) 397 ; 398 while(p1 <= p) 399 Bputrune(&bout, *p1++); 400 Bputc(&bout, '\n'); 401 } else 402 p1 = p; 403 } 404 } 405 406 void 407 comline(void) 408 { 409 long c1, c2; 410 411 com: 412 while(C==' ' || c=='\t') 413 ; 414 comx: 415 if((c1=c) == '\n') 416 return; 417 c2 = C; 418 if(c1=='.' && c2!='.') 419 inmacro = NO; 420 if(msflag && c1 == '['){ 421 refer(c2); 422 return; 423 } 424 if(c2 == '\n') 425 return; 426 if(c1 == '\\' && c2 == '\"') 427 SKIP; 428 else 429 if (filesp==files && c1=='E' && c2=='Q') 430 eqn(); 431 else 432 if(filesp==files && c1=='T' && (c2=='S' || c2=='C' || c2=='&')) { 433 if(msflag) 434 stbl(); 435 else 436 tbl(); 437 } 438 else 439 if(c1=='T' && c2=='E') 440 intable = NO; 441 else if (!inmacro && 442 ((c1 == 'd' && c2 == 'e') || 443 (c1 == 'i' && c2 == 'g') || 444 (c1 == 'a' && c2 == 'm'))) 445 macro(); 446 else 447 if(c1=='s' && c2=='o') { 448 if(iflag) 449 SKIP; 450 else { 451 getfname(); 452 if(fname[0]) { 453 if(infile = opn(fname)) 454 *++filesp = infile; 455 else infile = *filesp; 456 } 457 } 458 } 459 else 460 if(c1=='n' && c2=='x') 461 if(iflag) 462 SKIP; 463 else { 464 getfname(); 465 if(fname[0] == '\0') 466 exits(0); 467 if(Bfildes(infile) != 0) 468 Bterm(infile); 469 infile = *filesp = opn(fname); 470 } 471 else 472 if(c1 == 't' && c2 == 'm') 473 SKIP; 474 else 475 if(c1=='h' && c2=='w') 476 SKIP; 477 else 478 if(msflag && c1 == 'T' && c2 == 'L') { 479 SKIP_TO_COM; 480 goto comx; 481 } 482 else 483 if(msflag && c1=='N' && c2 == 'R') 484 SKIP; 485 else 486 if(msflag && c1 == 'A' && (c2 == 'U' || c2 == 'I')){ 487 if(mac==MM)SKIP; 488 else { 489 SKIP_TO_COM; 490 goto comx; 491 } 492 } else 493 if(msflag && c1=='F' && c2=='S') { 494 SKIP_TO_COM; 495 goto comx; 496 } 497 else 498 if(msflag && (c1=='S' || c1=='N') && c2=='H') { 499 SKIP_TO_COM; 500 goto comx; 501 } else 502 if(c1 == 'U' && c2 == 'X') { 503 if(wordflag) 504 Bprint(&bout, "UNIX\n"); 505 else 506 Bprint(&bout, "UNIX "); 507 } else 508 if(msflag && c1=='O' && c2=='K') { 509 SKIP_TO_COM; 510 goto comx; 511 } else 512 if(msflag && c1=='N' && c2=='D') 513 SKIP; 514 else 515 if(msflag && mac==MM && c1=='H' && (c2==' '||c2=='U')) 516 SKIP; 517 else 518 if(msflag && mac==MM && c2=='L') { 519 if(disp || c1=='R') 520 sdis('L', 'E'); 521 else { 522 SKIP; 523 Bprint(&bout, " ."); 524 } 525 } else 526 if(!msflag && c1=='P' && c2=='S') { 527 inpic(); 528 } else 529 if(msflag && (c1=='D' || c1=='N' || c1=='K'|| c1=='P') && c2=='S') { 530 sdis(c1, 'E'); 531 } else 532 if(msflag && (c1 == 'K' && c2 == 'F')) { 533 sdis(c1,'E'); 534 } else 535 if(msflag && c1=='n' && c2=='f') 536 sdis('f','i'); 537 else 538 if(msflag && c1=='c' && c2=='e') 539 sce(); 540 else { 541 if(c1=='.' && c2=='.') { 542 if(msflag) { 543 SKIP; 544 return; 545 } 546 while(C == '.') 547 ; 548 } 549 inmacro++; 550 if(c1 <= 'Z' && msflag) 551 regline(YES,ONE); 552 else { 553 if(wordflag) 554 C; 555 regline(YES,TWO); 556 } 557 inmacro--; 558 } 559 } 560 561 void 562 macro(void) 563 { 564 if(msflag) { 565 do { 566 SKIP1; 567 } while(C1 != '.' || C1 != '.' || C1 == '.'); 568 if(c != '\n') 569 SKIP; 570 return; 571 } 572 SKIP; 573 inmacro = YES; 574 } 575 576 void 577 sdis(char a1, char a2) 578 { 579 int c1, c2; 580 int eqnf; 581 int lct; 582 583 if(a1 == 'P'){ 584 while(C1 == ' ') 585 ; 586 if(c == '<') { 587 SKIP1; 588 return; 589 } 590 } 591 lct = 0; 592 eqnf = 1; 593 if(c != '\n') 594 SKIP1; 595 for(;;) { 596 while(C1 != '.') 597 if(c == '\n') 598 continue; 599 else 600 SKIP1; 601 if((c1=C1) == '\n') 602 continue; 603 if((c2=C1) == '\n') { 604 if(a1 == 'f' && (c1 == 'P' || c1 == 'H')) 605 return; 606 continue; 607 } 608 if(c1==a1 && c2 == a2) { 609 SKIP1; 610 if(lct != 0){ 611 lct--; 612 continue; 613 } 614 if(eqnf) 615 Bprint(&bout, " ."); 616 Bputc(&bout, '\n'); 617 return; 618 } else 619 if(a1 == 'L' && c2 == 'L') { 620 lct++; 621 SKIP1; 622 } else 623 if(a1 == 'D' && c1 == 'E' && c2 == 'Q') { 624 eqn(); 625 eqnf = 0; 626 } else 627 if(a1 == 'f') { 628 if((mac == MS && c2 == 'P') || 629 (mac == MM && c1 == 'H' && c2 == 'U')){ 630 SKIP1; 631 return; 632 } 633 SKIP1; 634 } 635 else 636 SKIP1; 637 } 638 } 639 640 void 641 tbl(void) 642 { 643 while(C != '.') 644 ; 645 SKIP; 646 intable = YES; 647 } 648 649 void 650 stbl(void) 651 { 652 while(C != '.') 653 ; 654 SKIP_TO_COM; 655 if(c != 'T' || C != 'E') { 656 SKIP; 657 pc = c; 658 while(C != '.' || pc != '\n' || C != 'T' || C != 'E') 659 pc = c; 660 } 661 } 662 663 void 664 eqn(void) 665 { 666 long c1, c2; 667 int dflg; 668 char last; 669 670 last = 0; 671 dflg = 1; 672 SKIP; 673 674 for(;;) { 675 if(C1 == '.' || c == '\'') { 676 while(C1==' ' || c=='\t') 677 ; 678 if(c=='E' && C1=='N') { 679 SKIP; 680 if(msflag && dflg) { 681 Bputc(&bout, 'x'); 682 Bputc(&bout, ' '); 683 if(last) { 684 Bputc(&bout, last); 685 Bputc(&bout, '\n'); 686 } 687 } 688 return; 689 } 690 } else 691 if(c == 'd') { 692 if(C1=='e' && C1=='l') 693 if(C1=='i' && C1=='m') { 694 while(C1 == ' ') 695 ; 696 if((c1=c)=='\n' || (c2=C1)=='\n' || 697 (c1=='o' && c2=='f' && C1=='f')) { 698 ldelim = NOCHAR; 699 rdelim = NOCHAR; 700 } else { 701 ldelim = c1; 702 rdelim = c2; 703 } 704 } 705 dflg = 0; 706 } 707 if(c != '\n') 708 while(C1 != '\n') { 709 if(chars[c] == PUNCT) 710 last = c; 711 else 712 if(c != ' ') 713 last = 0; 714 } 715 } 716 } 717 718 /* 719 * skip over a complete backslash vconstruction 720 */ 721 void 722 backsl(void) 723 { 724 int bdelim; 725 726 sw: 727 switch(C1) 728 { 729 case '"': 730 SKIP1; 731 return; 732 733 case 's': 734 if(C1 == '\\') 735 backsl(); 736 else { 737 while(C1>='0' && c<='9') 738 ; 739 Bungetrune(infile); 740 c = '0'; 741 } 742 lp--; 743 return; 744 745 case 'f': 746 case 'n': 747 case '*': 748 if(C1 != '(') 749 return; 750 751 case '(': 752 if(msflag) { 753 if(C == 'e') { 754 if(C1 == 'm') { 755 *lp = '-'; 756 return; 757 } 758 } else 759 if(c != '\n') 760 C1; 761 return; 762 } 763 if(C1 != '\n') 764 C1; 765 return; 766 767 case '$': 768 C1; /* discard argument number */ 769 return; 770 771 case 'b': 772 case 'x': 773 case 'v': 774 case 'h': 775 case 'w': 776 case 'o': 777 case 'l': 778 case 'L': 779 if((bdelim=C1) == '\n') 780 return; 781 while(C1!='\n' && c!=bdelim) 782 if(c == '\\') 783 backsl(); 784 return; 785 786 case '\\': 787 if(inmacro) 788 goto sw; 789 default: 790 return; 791 } 792 } 793 794 char* 795 copys(char *s) 796 { 797 char *t, *t0; 798 799 if((t0 = t = malloc((strlen(s)+1))) == 0) 800 fatal("Cannot allocate memory", (char*)0); 801 while(*t++ = *s++) 802 ; 803 return(t0); 804 } 805 806 void 807 sce(void) 808 { 809 int n = 1; 810 811 while (C != L'\n' && !(L'0' <= c && c <= L'9')) 812 ; 813 if (c != L'\n') { 814 for (n = c-L'0';'0' <= C && c <= L'9';) 815 n = n*10 + c-L'0'; 816 } 817 while(n) { 818 if(C == '.') { 819 if(C == 'c') { 820 if(C == 'e') { 821 while(C == ' ') 822 ; 823 if(c == '0') { 824 SKIP; 825 break; 826 } else 827 SKIP; 828 } else 829 SKIP; 830 } else 831 if(c == 'P' || C == 'P') { 832 if(c != '\n') 833 SKIP; 834 break; 835 } else 836 if(c != '\n') 837 SKIP; 838 } else { 839 SKIP; 840 n--; 841 } 842 } 843 } 844 845 void 846 refer(int c1) 847 { 848 int c2; 849 850 if(c1 != '\n') 851 SKIP; 852 c2 = 0; 853 for(;;) { 854 if(C != '.') 855 SKIP; 856 else { 857 if(C != ']') 858 SKIP; 859 else { 860 while(C != '\n') 861 c2 = c; 862 if(CHARCLASS(c2) == PUNCT) 863 Bprint(&bout, " %C",c2); 864 return; 865 } 866 } 867 } 868 } 869 870 void 871 inpic(void) 872 { 873 int c1; 874 Rune *p1; 875 876 /* SKIP1;*/ 877 while(C1 != '\n') 878 if(c == '<'){ 879 SKIP1; 880 return; 881 } 882 p1 = line; 883 c = '\n'; 884 for(;;) { 885 c1 = c; 886 if(C1 == '.' && c1 == '\n') { 887 if(C1 != 'P' || C1 != 'E') { 888 if(c != '\n'){ 889 SKIP1; 890 c = '\n'; 891 } 892 continue; 893 } 894 SKIP1; 895 return; 896 } else 897 if(c == '\"') { 898 while(C1 != '\"') { 899 if(c == '\\') { 900 if(C1 == '\"') 901 continue; 902 Bungetrune(infile); 903 backsl(); 904 } else 905 *p1++ = c; 906 } 907 *p1++ = ' '; 908 } else 909 if(c == '\n' && p1 != line) { 910 *p1 = '\0'; 911 if(wordflag) 912 putwords(); 913 else 914 Bprint(&bout, "%S\n\n", line); 915 p1 = line; 916 } 917 } 918 } 919