1 /* $OpenBSD: deroff.c,v 1.8 2009/10/27 23:59:37 deraadt Exp $ */ 2 3 /*- 4 * Copyright (c) 1988, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 /* 32 * Copyright (C) Caldera International Inc. 2001-2002. 33 * All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code and documentation must retain the above 39 * copyright notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgement: 45 * This product includes software developed or owned by Caldera 46 * International, Inc. 47 * 4. Neither the name of Caldera International, Inc. nor the names of other 48 * contributors may be used to endorse or promote products derived from 49 * this software without specific prior written permission. 50 * 51 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 52 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 53 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 54 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 55 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 56 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 57 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 58 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 60 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 61 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 62 * POSSIBILITY OF SUCH DAMAGE. 63 */ 64 65 #include <err.h> 66 #include <limits.h> 67 #include <stdio.h> 68 #include <stdlib.h> 69 #include <string.h> 70 #include <unistd.h> 71 72 /* 73 * Deroff command -- strip troff, eqn, and Tbl sequences from 74 * a file. Has two flags argument, -w, to cause output one word per line 75 * rather than in the original format. 76 * -mm (or -ms) causes the corresponding macro's to be interpreted 77 * so that just sentences are output 78 * -ml also gets rid of lists. 79 * Deroff follows .so and .nx commands, removes contents of macro 80 * definitions, equations (both .EQ ... .EN and $...$), 81 * Tbl command sequences, and Troff backslash constructions. 82 * 83 * All input is through the Cget macro; 84 * the most recently read character is in c. 85 * 86 * Modified by Robert Henry to process -me and -man macros. 87 */ 88 89 #define Cget ( (c=getc(infile)) == EOF ? eof() : ((c==ldelim)&&(filesp==files) ? skeqn() : c) ) 90 #define C1get ( (c=getc(infile)) == EOF ? eof() : c) 91 92 #ifdef DEBUG 93 # define C _C() 94 # define C1 _C1() 95 #else /* not DEBUG */ 96 # define C Cget 97 # define C1 C1get 98 #endif /* not DEBUG */ 99 100 #define SKIP while (C != '\n') 101 #define SKIP_TO_COM SKIP; SKIP; pc=c; while (C != '.' || pc != '\n' || C > 'Z')pc=c 102 103 #define YES 1 104 #define NO 0 105 #define MS 0 /* -ms */ 106 #define MM 1 /* -mm */ 107 #define ME 2 /* -me */ 108 #define MA 3 /* -man */ 109 110 #ifdef DEBUG 111 char *mactab[] = { "-ms", "-mm", "-me", "-ma" }; 112 #endif /* DEBUG */ 113 114 #define ONE 1 115 #define TWO 2 116 117 #define NOCHAR -2 118 #define SPECIAL 0 119 #define APOS 1 120 #define PUNCT 2 121 #define DIGIT 3 122 #define LETTER 4 123 124 #define MAXFILES 20 125 126 int iflag; 127 int wordflag; 128 int msflag; /* processing a source written using a mac package */ 129 int mac; /* which package */ 130 int disp; 131 int parag; 132 int inmacro; 133 int intable; 134 int keepblock; /* keep blocks of text; normally false when msflag */ 135 136 char chars[128]; /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */ 137 138 char line[LINE_MAX]; 139 char *lp; 140 141 int c; 142 int pc; 143 int ldelim; 144 int rdelim; 145 146 char fname[PATH_MAX]; 147 FILE *files[MAXFILES]; 148 FILE **filesp; 149 FILE *infile; 150 151 int argc; 152 char **argv; 153 154 /* 155 * Macro processing 156 * 157 * Macro table definitions 158 */ 159 typedef int pacmac; /* compressed macro name */ 160 int argconcat = 0; /* concat arguments together (-me only) */ 161 162 #define tomac(c1, c2) ((((c1) & 0xFF) << 8) | ((c2) & 0xFF)) 163 #define frommac(src, c1, c2) (((c1)=((src)>>8)&0xFF),((c2) =(src)&0xFF)) 164 165 struct mactab{ 166 int condition; 167 pacmac macname; 168 int (*func)(); /* XXX - args */ 169 }; 170 171 struct mactab troffmactab[]; 172 struct mactab ppmactab[]; 173 struct mactab msmactab[]; 174 struct mactab mmmactab[]; 175 struct mactab memactab[]; 176 struct mactab manmactab[]; 177 178 /* 179 * Macro table initialization 180 */ 181 #define M(cond, c1, c2, func) {cond, tomac(c1, c2), func} 182 183 /* 184 * Flags for matching conditions other than 185 * the macro name 186 */ 187 #define NONE 0 188 #define FNEST 1 /* no nested files */ 189 #define NOMAC 2 /* no macro */ 190 #define MAC 3 /* macro */ 191 #define PARAG 4 /* in a paragraph */ 192 #define MSF 5 /* msflag is on */ 193 #define NBLK 6 /* set if no blocks to be kept */ 194 195 /* 196 * Return codes from macro minions, determine where to jump, 197 * how to repeat/reprocess text 198 */ 199 #define COMX 1 /* goto comx */ 200 #define COM 2 /* goto com */ 201 202 int skeqn(void); 203 int eof(void); 204 int _C1(void); 205 int _C(void); 206 int EQ(void); 207 int domacro(void); 208 int PS(void); 209 int skip(void); 210 int intbl(void); 211 int outtbl(void); 212 int so(void); 213 int nx(void); 214 int skiptocom(void); 215 int PP(pacmac); 216 int AU(void); 217 int SH(pacmac); 218 int UX(void); 219 int MMHU(pacmac); 220 int mesnblock(pacmac); 221 int mssnblock(pacmac); 222 int nf(void); 223 int ce(void); 224 int meip(pacmac); 225 int mepp(pacmac); 226 int mesh(pacmac); 227 int mefont(pacmac); 228 int manfont(pacmac); 229 int manpp(pacmac); 230 int macsort(const void *, const void *); 231 int sizetab(struct mactab *); 232 void getfname(void); 233 void textline(char *, int); 234 void work(void); 235 void regline(void (*)(char *, int), int); 236 void macro(void); 237 void tbl(void); 238 void stbl(void); 239 void eqn(void); 240 void backsl(void); 241 void sce(void); 242 void refer(int); 243 void inpic(void); 244 void msputmac(char *, int); 245 void msputwords(int); 246 void meputmac(char *, int); 247 void meputwords(int); 248 void noblock(char, char); 249 void defcomline(pacmac); 250 void comline(void); 251 void buildtab(struct mactab **, int *); 252 FILE *opn(char *); 253 struct mactab *macfill(struct mactab *, struct mactab *); 254 __dead void usage(void); 255 256 int 257 main(int ac, char **av) 258 { 259 int i, ch; 260 int errflg = 0; 261 int kflag = NO; 262 263 iflag = NO; 264 wordflag = NO; 265 msflag = NO; 266 mac = ME; 267 disp = NO; 268 parag = NO; 269 inmacro = NO; 270 intable = NO; 271 ldelim = NOCHAR; 272 rdelim = NOCHAR; 273 keepblock = YES; 274 275 while ((ch = getopt(ac, av, "ikpwm:")) != -1) { 276 switch (ch) { 277 case 'i': 278 iflag = YES; 279 break; 280 case 'k': 281 kflag = YES; 282 break; 283 case 'm': 284 msflag = YES; 285 keepblock = NO; 286 switch (optarg[0]) { 287 case 'm': 288 mac = MM; 289 break; 290 case 's': 291 mac = MS; 292 break; 293 case 'e': 294 mac = ME; 295 break; 296 case 'a': 297 mac = MA; 298 break; 299 case 'l': 300 disp = YES; 301 break; 302 default: 303 errflg++; 304 break; 305 } 306 if (errflg == 0 && optarg[1] != '\0') 307 errflg++; 308 break; 309 case 'p': 310 parag = YES; 311 break; 312 case 'w': 313 wordflag = YES; 314 kflag = YES; 315 break; 316 default: 317 errflg++; 318 } 319 } 320 argc = ac - optind; 321 argv = av + optind; 322 323 if (kflag) 324 keepblock = YES; 325 if (errflg) 326 usage(); 327 328 #ifdef DEBUG 329 printf("msflag = %d, mac = %s, keepblock = %d, disp = %d\n", 330 msflag, mactab[mac], keepblock, disp); 331 #endif /* DEBUG */ 332 if (argc == 0) { 333 infile = stdin; 334 } else { 335 infile = opn(argv[0]); 336 --argc; 337 ++argv; 338 } 339 files[0] = infile; 340 filesp = &files[0]; 341 342 for (i = 'a'; i <= 'z' ; ++i) 343 chars[i] = LETTER; 344 for (i = 'A'; i <= 'Z'; ++i) 345 chars[i] = LETTER; 346 for (i = '0'; i <= '9'; ++i) 347 chars[i] = DIGIT; 348 chars['\''] = APOS; 349 chars['&'] = APOS; 350 chars['.'] = PUNCT; 351 chars[','] = PUNCT; 352 chars[';'] = PUNCT; 353 chars['?'] = PUNCT; 354 chars[':'] = PUNCT; 355 work(); 356 exit(0); 357 } 358 359 int 360 skeqn(void) 361 { 362 363 while ((c = getc(infile)) != rdelim) { 364 if (c == EOF) 365 c = eof(); 366 else if (c == '"') { 367 while ((c = getc(infile)) != '"') { 368 if (c == EOF || 369 (c == '\\' && (c = getc(infile)) == EOF)) 370 c = eof(); 371 } 372 } 373 } 374 if (msflag) 375 return((c = 'x')); 376 return((c = ' ')); 377 } 378 379 FILE * 380 opn(char *p) 381 { 382 FILE *fd; 383 384 if ((fd = fopen(p, "r")) == NULL) 385 err(1, "fopen %s", p); 386 387 return(fd); 388 } 389 390 int 391 eof(void) 392 { 393 394 if (infile != stdin) 395 fclose(infile); 396 if (filesp > files) 397 infile = *--filesp; 398 else if (argc > 0) { 399 infile = opn(argv[0]); 400 --argc; 401 ++argv; 402 } else 403 exit(0); 404 return(C); 405 } 406 407 void 408 getfname(void) 409 { 410 char *p; 411 struct chain { 412 struct chain *nextp; 413 char *datap; 414 } *q; 415 static struct chain *namechain= NULL; 416 417 while (C == ' ') 418 ; /* nothing */ 419 420 for (p = fname ; p - fname < sizeof(fname) && (*p = c) != '\n' && 421 c != ' ' && c != '\t' && c != '\\'; ++p) 422 C; 423 *p = '\0'; 424 while (c != '\n') 425 C; 426 427 /* see if this name has already been used */ 428 for (q = namechain ; q; q = q->nextp) 429 if (strcmp(fname, q->datap) == 0) { 430 fname[0] = '\0'; 431 return; 432 } 433 434 q = (struct chain *) malloc(sizeof(struct chain)); 435 if (q == NULL) 436 err(1, NULL); 437 q->nextp = namechain; 438 q->datap = strdup(fname); 439 if (q->datap == NULL) 440 err(1, NULL); 441 namechain = q; 442 } 443 444 /*ARGSUSED*/ 445 void 446 textline(char *str, int constant) 447 { 448 449 if (wordflag) { 450 msputwords(0); 451 return; 452 } 453 puts(str); 454 } 455 456 void 457 work(void) 458 { 459 460 for (;;) { 461 C; 462 #ifdef FULLDEBUG 463 printf("Starting work with `%c'\n", c); 464 #endif /* FULLDEBUG */ 465 if (c == '.' || c == '\'') 466 comline(); 467 else 468 regline(textline, TWO); 469 } 470 } 471 472 void 473 regline(void (*pfunc)(char *, int), int constant) 474 { 475 476 line[0] = c; 477 lp = line; 478 while (lp - line < sizeof(line)) { 479 if (c == '\\') { 480 *lp = ' '; 481 backsl(); 482 } 483 if (c == '\n') 484 break; 485 if (intable && c == 'T') { 486 *++lp = C; 487 if (c == '{' || c == '}') { 488 lp[-1] = ' '; 489 *lp = C; 490 } 491 } else { 492 *++lp = C; 493 } 494 } 495 *lp = '\0'; 496 497 if (line[0] != '\0') 498 (*pfunc)(line, constant); 499 } 500 501 void 502 macro(void) 503 { 504 505 if (msflag) { 506 do { 507 SKIP; 508 } while (C!='.' || C!='.' || C=='.'); /* look for .. */ 509 if (c != '\n') 510 SKIP; 511 return; 512 } 513 SKIP; 514 inmacro = YES; 515 } 516 517 void 518 tbl(void) 519 { 520 521 while (C != '.') 522 ; /* nothing */ 523 SKIP; 524 intable = YES; 525 } 526 527 void 528 stbl(void) 529 { 530 531 while (C != '.') 532 ; /* nothing */ 533 SKIP_TO_COM; 534 if (c != 'T' || C != 'E') { 535 SKIP; 536 pc = c; 537 while (C != '.' || pc != '\n' || C != 'T' || C != 'E') 538 pc = c; 539 } 540 } 541 542 void 543 eqn(void) 544 { 545 int c1, c2; 546 int dflg; 547 char last; 548 549 last=0; 550 dflg = 1; 551 SKIP; 552 553 for (;;) { 554 if (C1 == '.' || c == '\'') { 555 while (C1 == ' ' || c == '\t') 556 ; 557 if (c == 'E' && C1 == 'N') { 558 SKIP; 559 if (msflag && dflg) { 560 putchar('x'); 561 putchar(' '); 562 if (last) { 563 putchar(last); 564 putchar('\n'); 565 } 566 } 567 return; 568 } 569 } else if (c == 'd') { 570 /* look for delim */ 571 if (C1 == 'e' && C1 == 'l') 572 if (C1 == 'i' && C1 == 'm') { 573 while (C1 == ' ') 574 ; /* nothing */ 575 576 if ((c1 = c) == '\n' || 577 (c2 = C1) == '\n' || 578 (c1 == 'o' && c2 == 'f' && C1=='f')) { 579 ldelim = NOCHAR; 580 rdelim = NOCHAR; 581 } else { 582 ldelim = c1; 583 rdelim = c2; 584 } 585 } 586 dflg = 0; 587 } 588 589 if (c != '\n') 590 while (C1 != '\n') { 591 if (chars[c] == PUNCT) 592 last = c; 593 else if (c != ' ') 594 last = 0; 595 } 596 } 597 } 598 599 /* skip over a complete backslash construction */ 600 void 601 backsl(void) 602 { 603 int bdelim; 604 605 sw: 606 switch (C) { 607 case '"': 608 SKIP; 609 return; 610 611 case 's': 612 if (C == '\\') 613 backsl(); 614 else { 615 while (C >= '0' && c <= '9') 616 ; /* nothing */ 617 ungetc(c, infile); 618 c = '0'; 619 } 620 --lp; 621 return; 622 623 case 'f': 624 case 'n': 625 case '*': 626 if (C != '(') 627 return; 628 629 case '(': 630 if (msflag) { 631 if (C == 'e') { 632 if (C == 'm') { 633 *lp = '-'; 634 return; 635 } 636 } 637 else if (c != '\n') 638 C; 639 return; 640 } 641 if (C != '\n') 642 C; 643 return; 644 645 case '$': 646 C; /* discard argument number */ 647 return; 648 649 case 'b': 650 case 'x': 651 case 'v': 652 case 'h': 653 case 'w': 654 case 'o': 655 case 'l': 656 case 'L': 657 if ((bdelim = C) == '\n') 658 return; 659 while (C != '\n' && c != bdelim) 660 if (c == '\\') 661 backsl(); 662 return; 663 664 case '\\': 665 if (inmacro) 666 goto sw; 667 668 default: 669 return; 670 } 671 } 672 673 void 674 sce(void) 675 { 676 char *ap; 677 int n, i; 678 char a[10]; 679 680 for (ap = a; C != '\n'; ap++) { 681 *ap = c; 682 if (ap == &a[9]) { 683 SKIP; 684 ap = a; 685 break; 686 } 687 } 688 if (ap != a) 689 n = atoi(a); 690 else 691 n = 1; 692 for (i = 0; i < n;) { 693 if (C == '.') { 694 if (C == 'c') { 695 if (C == 'e') { 696 while (C == ' ') 697 ; /* nothing */ 698 if (c == '0') { 699 SKIP; 700 break; 701 } else 702 SKIP; 703 } 704 else 705 SKIP; 706 } else if (c == 'P' || C == 'P') { 707 if (c != '\n') 708 SKIP; 709 break; 710 } else if (c != '\n') 711 SKIP; 712 } else { 713 SKIP; 714 i++; 715 } 716 } 717 } 718 719 void 720 refer(int c1) 721 { 722 int c2; 723 724 if (c1 != '\n') 725 SKIP; 726 727 for (c2 = -1;;) { 728 if (C != '.') 729 SKIP; 730 else { 731 if (C != ']') 732 SKIP; 733 else { 734 while (C != '\n') 735 c2 = c; 736 if (c2 != -1 && chars[c2] == PUNCT) 737 putchar(c2); 738 return; 739 } 740 } 741 } 742 } 743 744 void 745 inpic(void) 746 { 747 int c1; 748 char *p1; 749 750 SKIP; 751 p1 = line; 752 c = '\n'; 753 for (;;) { 754 c1 = c; 755 if (C == '.' && c1 == '\n') { 756 if (C != 'P') { 757 if (c == '\n') 758 continue; 759 else { 760 SKIP; 761 c = '\n'; 762 continue; 763 } 764 } 765 if (C != 'E') { 766 if (c == '\n') 767 continue; 768 else { 769 SKIP; 770 c = '\n'; 771 continue; 772 } 773 } 774 SKIP; 775 return; 776 } 777 else if (c == '\"') { 778 while (C != '\"') { 779 if (c == '\\') { 780 if (C == '\"') 781 continue; 782 ungetc(c, infile); 783 backsl(); 784 } else 785 *p1++ = c; 786 } 787 *p1++ = ' '; 788 } 789 else if (c == '\n' && p1 != line) { 790 *p1 = '\0'; 791 if (wordflag) 792 msputwords(NO); 793 else { 794 puts(line); 795 putchar('\n'); 796 } 797 p1 = line; 798 } 799 } 800 } 801 802 #ifdef DEBUG 803 int 804 _C1(void) 805 { 806 807 return(C1get); 808 } 809 810 int 811 _C(void) 812 { 813 814 return(Cget); 815 } 816 #endif /* DEBUG */ 817 818 /* 819 * Put out a macro line, using ms and mm conventions. 820 */ 821 void 822 msputmac(char *s, int constant) 823 { 824 char *t; 825 int found; 826 int last; 827 828 last = 0; 829 found = 0; 830 if (wordflag) { 831 msputwords(YES); 832 return; 833 } 834 while (*s) { 835 while (*s == ' ' || *s == '\t') 836 putchar(*s++); 837 for (t = s ; *t != ' ' && *t != '\t' && *t != '\0' ; ++t) 838 ; /* nothing */ 839 if (*s == '\"') 840 s++; 841 if (t > s + constant && chars[(unsigned char)s[0]] == LETTER && 842 chars[(unsigned char)s[1]] == LETTER) { 843 while (s < t) 844 if (*s == '\"') 845 s++; 846 else 847 putchar(*s++); 848 last = *(t-1); 849 found++; 850 } else if (found && chars[(unsigned char)s[0]] == PUNCT && 851 s[1] == '\0') { 852 putchar(*s++); 853 } else { 854 last = *(t - 1); 855 s = t; 856 } 857 } 858 putchar('\n'); 859 if (msflag && chars[last] == PUNCT) { 860 putchar(last); 861 putchar('\n'); 862 } 863 } 864 865 /* 866 * put out words (for the -w option) with ms and mm conventions 867 */ 868 void 869 msputwords(int macline) 870 { 871 char *p, *p1; 872 int i, nlet; 873 874 for (p1 = line;;) { 875 /* 876 * skip initial specials ampersands and apostrophes 877 */ 878 while (chars[(unsigned char)*p1] < DIGIT) 879 if (*p1++ == '\0') 880 return; 881 nlet = 0; 882 for (p = p1 ; (i = chars[(unsigned char)*p]) != SPECIAL ; ++p) 883 if (i == LETTER) 884 ++nlet; 885 886 if (nlet > 1 && chars[(unsigned char)p1[0]] == LETTER) { 887 /* 888 * delete trailing ampersands and apostrophes 889 */ 890 while ((i = chars[(unsigned char)p[-1]]) == PUNCT || 891 i == APOS ) 892 --p; 893 while (p1 < p) 894 putchar(*p1++); 895 putchar('\n'); 896 } else { 897 p1 = p; 898 } 899 } 900 } 901 902 /* 903 * put out a macro using the me conventions 904 */ 905 #define SKIPBLANK(cp) while (*cp == ' ' || *cp == '\t') { cp++; } 906 #define SKIPNONBLANK(cp) while (*cp !=' ' && *cp !='\cp' && *cp !='\0') { cp++; } 907 908 void 909 meputmac(char *cp, int constant) 910 { 911 char *np; 912 int found; 913 int argno; 914 int last; 915 int inquote; 916 917 last = 0; 918 found = 0; 919 if (wordflag) { 920 meputwords(YES); 921 return; 922 } 923 for (argno = 0; *cp; argno++) { 924 SKIPBLANK(cp); 925 inquote = (*cp == '"'); 926 if (inquote) 927 cp++; 928 for (np = cp; *np; np++) { 929 switch (*np) { 930 case '\n': 931 case '\0': 932 break; 933 934 case '\t': 935 case ' ': 936 if (inquote) 937 continue; 938 else 939 goto endarg; 940 941 case '"': 942 if (inquote && np[1] == '"') { 943 memmove(np, np + 1, strlen(np)); 944 np++; 945 continue; 946 } else { 947 *np = ' '; /* bye bye " */ 948 goto endarg; 949 } 950 951 default: 952 continue; 953 } 954 } 955 endarg: ; 956 /* 957 * cp points at the first char in the arg 958 * np points one beyond the last char in the arg 959 */ 960 if ((argconcat == 0) || (argconcat != argno)) 961 putchar(' '); 962 #ifdef FULLDEBUG 963 { 964 char *p; 965 printf("[%d,%d: ", argno, np - cp); 966 for (p = cp; p < np; p++) { 967 putchar(*p); 968 } 969 printf("]"); 970 } 971 #endif /* FULLDEBUG */ 972 /* 973 * Determine if the argument merits being printed 974 * 975 * constant is the cut off point below which something 976 * is not a word. 977 */ 978 if (((np - cp) > constant) && 979 (inquote || (chars[(unsigned char)cp[0]] == LETTER))) { 980 for (cp = cp; cp < np; cp++) 981 putchar(*cp); 982 last = np[-1]; 983 found++; 984 } else if (found && (np - cp == 1) && 985 chars[(unsigned char)*cp] == PUNCT) { 986 putchar(*cp); 987 } else { 988 last = np[-1]; 989 } 990 cp = np; 991 } 992 if (msflag && chars[last] == PUNCT) 993 putchar(last); 994 putchar('\n'); 995 } 996 997 /* 998 * put out words (for the -w option) with ms and mm conventions 999 */ 1000 void 1001 meputwords(int macline) 1002 { 1003 1004 msputwords(macline); 1005 } 1006 1007 /* 1008 * 1009 * Skip over a nested set of macros 1010 * 1011 * Possible arguments to noblock are: 1012 * 1013 * fi end of unfilled text 1014 * PE pic ending 1015 * DE display ending 1016 * 1017 * for ms and mm only: 1018 * KE keep ending 1019 * 1020 * NE undocumented match to NS (for mm?) 1021 * LE mm only: matches RL or *L (for lists) 1022 * 1023 * for me: 1024 * ([lqbzcdf] 1025 */ 1026 void 1027 noblock(char a1, char a2) 1028 { 1029 int c1,c2; 1030 int eqnf; 1031 int lct; 1032 1033 lct = 0; 1034 eqnf = 1; 1035 SKIP; 1036 for (;;) { 1037 while (C != '.') 1038 if (c == '\n') 1039 continue; 1040 else 1041 SKIP; 1042 if ((c1 = C) == '\n') 1043 continue; 1044 if ((c2 = C) == '\n') 1045 continue; 1046 if (c1 == a1 && c2 == a2) { 1047 SKIP; 1048 if (lct != 0) { 1049 lct--; 1050 continue; 1051 } 1052 if (eqnf) 1053 putchar('.'); 1054 putchar('\n'); 1055 return; 1056 } else if (a1 == 'L' && c2 == 'L') { 1057 lct++; 1058 SKIP; 1059 } 1060 /* 1061 * equations (EQ) nested within a display 1062 */ 1063 else if (c1 == 'E' && c2 == 'Q') { 1064 if ((mac == ME && a1 == ')') 1065 || (mac != ME && a1 == 'D')) { 1066 eqn(); 1067 eqnf=0; 1068 } 1069 } 1070 /* 1071 * turning on filling is done by the paragraphing 1072 * macros 1073 */ 1074 else if (a1 == 'f') { /* .fi */ 1075 if ((mac == ME && (c2 == 'h' || c2 == 'p')) 1076 || (mac != ME && (c1 == 'P' || c2 == 'P'))) { 1077 SKIP; 1078 return; 1079 } 1080 } else { 1081 SKIP; 1082 } 1083 } 1084 } 1085 1086 int 1087 EQ(void) 1088 { 1089 1090 eqn(); 1091 return(0); 1092 } 1093 1094 int 1095 domacro(void) 1096 { 1097 1098 macro(); 1099 return(0); 1100 } 1101 1102 int 1103 PS(void) 1104 { 1105 1106 for (C; c == ' ' || c == '\t'; C) 1107 ; /* nothing */ 1108 1109 if (c == '<') { /* ".PS < file" -- don't expect a .PE */ 1110 SKIP; 1111 return(0); 1112 } 1113 if (!msflag) 1114 inpic(); 1115 else 1116 noblock('P', 'E'); 1117 return(0); 1118 } 1119 1120 int 1121 skip(void) 1122 { 1123 1124 SKIP; 1125 return(0); 1126 } 1127 1128 int 1129 intbl(void) 1130 { 1131 1132 if (msflag) 1133 stbl(); 1134 else 1135 tbl(); 1136 return(0); 1137 } 1138 1139 int 1140 outtbl(void) 1141 { 1142 1143 intable = NO; 1144 return(0); 1145 } 1146 1147 int 1148 so(void) 1149 { 1150 1151 if (!iflag) { 1152 getfname(); 1153 if (fname[0]) { 1154 if (++filesp - &files[0] > MAXFILES) 1155 err(1, "too many nested files (max %d)", 1156 MAXFILES); 1157 infile = *filesp = opn(fname); 1158 } 1159 } 1160 return(0); 1161 } 1162 1163 int 1164 nx(void) 1165 { 1166 1167 if (!iflag) { 1168 getfname(); 1169 if (fname[0] == '\0') 1170 exit(0); 1171 if (infile != stdin) 1172 fclose(infile); 1173 infile = *filesp = opn(fname); 1174 } 1175 return(0); 1176 } 1177 1178 int 1179 skiptocom(void) 1180 { 1181 1182 SKIP_TO_COM; 1183 return(COMX); 1184 } 1185 1186 int 1187 PP(pacmac c12) 1188 { 1189 int c1, c2; 1190 1191 frommac(c12, c1, c2); 1192 printf(".%c%c", c1, c2); 1193 while (C != '\n') 1194 putchar(c); 1195 putchar('\n'); 1196 return(0); 1197 } 1198 1199 int 1200 AU(void) 1201 { 1202 1203 if (mac == MM) 1204 return(0); 1205 SKIP_TO_COM; 1206 return(COMX); 1207 } 1208 1209 int 1210 SH(pacmac c12) 1211 { 1212 int c1, c2; 1213 1214 frommac(c12, c1, c2); 1215 1216 if (parag) { 1217 printf(".%c%c", c1, c2); 1218 while (C != '\n') 1219 putchar(c); 1220 putchar(c); 1221 putchar('!'); 1222 for (;;) { 1223 while (C != '\n') 1224 putchar(c); 1225 putchar('\n'); 1226 if (C == '.') 1227 return(COM); 1228 putchar('!'); 1229 putchar(c); 1230 } 1231 /*NOTREACHED*/ 1232 } else { 1233 SKIP_TO_COM; 1234 return(COMX); 1235 } 1236 } 1237 1238 int 1239 UX(void) 1240 { 1241 1242 if (wordflag) 1243 printf("UNIX\n"); 1244 else 1245 printf("UNIX "); 1246 return(0); 1247 } 1248 1249 int 1250 MMHU(pacmac c12) 1251 { 1252 int c1, c2; 1253 1254 frommac(c12, c1, c2); 1255 if (parag) { 1256 printf(".%c%c", c1, c2); 1257 while (C != '\n') 1258 putchar(c); 1259 putchar('\n'); 1260 } else { 1261 SKIP; 1262 } 1263 return(0); 1264 } 1265 1266 int 1267 mesnblock(pacmac c12) 1268 { 1269 int c1, c2; 1270 1271 frommac(c12, c1, c2); 1272 noblock(')', c2); 1273 return(0); 1274 } 1275 1276 int 1277 mssnblock(pacmac c12) 1278 { 1279 int c1, c2; 1280 1281 frommac(c12, c1, c2); 1282 noblock(c1, 'E'); 1283 return(0); 1284 } 1285 1286 int 1287 nf(void) 1288 { 1289 1290 noblock('f', 'i'); 1291 return(0); 1292 } 1293 1294 int 1295 ce(void) 1296 { 1297 1298 sce(); 1299 return(0); 1300 } 1301 1302 int 1303 meip(pacmac c12) 1304 { 1305 1306 if (parag) 1307 mepp(c12); 1308 else if (wordflag) /* save the tag */ 1309 regline(meputmac, ONE); 1310 else 1311 SKIP; 1312 return(0); 1313 } 1314 1315 /* 1316 * only called for -me .pp or .sh, when parag is on 1317 */ 1318 int 1319 mepp(pacmac c12) 1320 { 1321 1322 PP(c12); /* eats the line */ 1323 return(0); 1324 } 1325 1326 /* 1327 * Start of a section heading; output the section name if doing words 1328 */ 1329 int 1330 mesh(pacmac c12) 1331 { 1332 1333 if (parag) 1334 mepp(c12); 1335 else if (wordflag) 1336 defcomline(c12); 1337 else 1338 SKIP; 1339 return(0); 1340 } 1341 1342 /* 1343 * process a font setting 1344 */ 1345 int 1346 mefont(pacmac c12) 1347 { 1348 1349 argconcat = 1; 1350 defcomline(c12); 1351 argconcat = 0; 1352 return(0); 1353 } 1354 1355 int 1356 manfont(pacmac c12) 1357 { 1358 1359 return(mefont(c12)); 1360 } 1361 1362 int 1363 manpp(pacmac c12) 1364 { 1365 1366 return(mepp(c12)); 1367 } 1368 1369 void 1370 defcomline(pacmac c12) 1371 { 1372 int c1, c2; 1373 1374 frommac(c12, c1, c2); 1375 if (msflag && mac == MM && c2 == 'L') { 1376 if (disp || c1 == 'R') { 1377 noblock('L', 'E'); 1378 } else { 1379 SKIP; 1380 putchar('.'); 1381 } 1382 } 1383 else if (c1 == '.' && c2 == '.') { 1384 if (msflag) { 1385 SKIP; 1386 return; 1387 } 1388 while (C == '.') 1389 /*VOID*/; 1390 } 1391 ++inmacro; 1392 /* 1393 * Process the arguments to the macro 1394 */ 1395 switch (mac) { 1396 default: 1397 case MM: 1398 case MS: 1399 if (c1 <= 'Z' && msflag) 1400 regline(msputmac, ONE); 1401 else 1402 regline(msputmac, TWO); 1403 break; 1404 case ME: 1405 regline(meputmac, ONE); 1406 break; 1407 } 1408 --inmacro; 1409 } 1410 1411 void 1412 comline(void) 1413 { 1414 int c1; 1415 int c2; 1416 pacmac c12; 1417 int mid; 1418 int lb, ub; 1419 int hit; 1420 static int tabsize = 0; 1421 static struct mactab *mactab = (struct mactab *)0; 1422 struct mactab *mp; 1423 1424 if (mactab == 0) 1425 buildtab(&mactab, &tabsize); 1426 com: 1427 while (C == ' ' || c == '\t') 1428 ; 1429 comx: 1430 if ((c1 = c) == '\n') 1431 return; 1432 c2 = C; 1433 if (c1 == '.' && c2 != '.') 1434 inmacro = NO; 1435 if (msflag && c1 == '[') { 1436 refer(c2); 1437 return; 1438 } 1439 if (parag && mac==MM && c1 == 'P' && c2 == '\n') { 1440 printf(".P\n"); 1441 return; 1442 } 1443 if (c2 == '\n') 1444 return; 1445 /* 1446 * Single letter macro 1447 */ 1448 if (mac == ME && (c2 == ' ' || c2 == '\t') ) 1449 c2 = ' '; 1450 c12 = tomac(c1, c2); 1451 /* 1452 * binary search through the table of macros 1453 */ 1454 lb = 0; 1455 ub = tabsize - 1; 1456 while (lb <= ub) { 1457 mid = (ub + lb) / 2; 1458 mp = &mactab[mid]; 1459 if (mp->macname < c12) 1460 lb = mid + 1; 1461 else if (mp->macname > c12) 1462 ub = mid - 1; 1463 else { 1464 hit = 1; 1465 #ifdef FULLDEBUG 1466 printf("preliminary hit macro %c%c ", c1, c2); 1467 #endif /* FULLDEBUG */ 1468 switch (mp->condition) { 1469 case NONE: 1470 hit = YES; 1471 break; 1472 case FNEST: 1473 hit = (filesp == files); 1474 break; 1475 case NOMAC: 1476 hit = !inmacro; 1477 break; 1478 case MAC: 1479 hit = inmacro; 1480 break; 1481 case PARAG: 1482 hit = parag; 1483 break; 1484 case NBLK: 1485 hit = !keepblock; 1486 break; 1487 default: 1488 hit = 0; 1489 } 1490 1491 if (hit) { 1492 #ifdef FULLDEBUG 1493 printf("MATCH\n"); 1494 #endif /* FULLDEBUG */ 1495 switch ((*(mp->func))(c12)) { 1496 default: 1497 return; 1498 case COMX: 1499 goto comx; 1500 case COM: 1501 goto com; 1502 } 1503 } 1504 #ifdef FULLDEBUG 1505 printf("FAIL\n"); 1506 #endif /* FULLDEBUG */ 1507 break; 1508 } 1509 } 1510 defcomline(c12); 1511 } 1512 1513 int 1514 macsort(const void *p1, const void *p2) 1515 { 1516 struct mactab *t1 = (struct mactab *)p1; 1517 struct mactab *t2 = (struct mactab *)p2; 1518 1519 return(t1->macname - t2->macname); 1520 } 1521 1522 int 1523 sizetab(struct mactab *mp) 1524 { 1525 int i; 1526 1527 i = 0; 1528 if (mp) { 1529 for (; mp->macname; mp++, i++) 1530 /*VOID*/ ; 1531 } 1532 return(i); 1533 } 1534 1535 struct mactab * 1536 macfill(struct mactab *dst, struct mactab *src) 1537 { 1538 1539 if (src) { 1540 while (src->macname) 1541 *dst++ = *src++; 1542 } 1543 return(dst); 1544 } 1545 1546 __dead void 1547 usage(void) 1548 { 1549 extern char *__progname; 1550 1551 fprintf(stderr, "usage: %s [-ikpw] [-m a | e | l | m | s] [file ...]\n", __progname); 1552 exit(1); 1553 } 1554 1555 void 1556 buildtab(struct mactab **r_back, int *r_size) 1557 { 1558 int size; 1559 struct mactab *p, *p1, *p2; 1560 struct mactab *back; 1561 1562 size = sizetab(troffmactab) + sizetab(ppmactab); 1563 p1 = p2 = NULL; 1564 if (msflag) { 1565 switch (mac) { 1566 case ME: 1567 p1 = memactab; 1568 break; 1569 case MM: 1570 p1 = msmactab; 1571 p2 = mmmactab; 1572 break; 1573 case MS: 1574 p1 = msmactab; 1575 break; 1576 case MA: 1577 p1 = manmactab; 1578 break; 1579 default: 1580 break; 1581 } 1582 } 1583 size += sizetab(p1); 1584 size += sizetab(p2); 1585 back = (struct mactab *)calloc(size+2, sizeof(struct mactab)); 1586 if (back == NULL) 1587 err(1, NULL); 1588 1589 p = macfill(back, troffmactab); 1590 p = macfill(p, ppmactab); 1591 p = macfill(p, p1); 1592 p = macfill(p, p2); 1593 1594 qsort(back, size, sizeof(struct mactab), macsort); 1595 *r_size = size; 1596 *r_back = back; 1597 } 1598 1599 /* 1600 * troff commands 1601 */ 1602 struct mactab troffmactab[] = { 1603 M(NONE, '\\','"', skip), /* comment */ 1604 M(NOMAC, 'd','e', domacro), /* define */ 1605 M(NOMAC, 'i','g', domacro), /* ignore till .. */ 1606 M(NOMAC, 'a','m', domacro), /* append macro */ 1607 M(NBLK, 'n','f', nf), /* filled */ 1608 M(NBLK, 'c','e', ce), /* centered */ 1609 1610 M(NONE, 's','o', so), /* source a file */ 1611 M(NONE, 'n','x', nx), /* go to next file */ 1612 1613 M(NONE, 't','m', skip), /* print string on tty */ 1614 M(NONE, 'h','w', skip), /* exception hyphen words */ 1615 M(NONE, 0,0, 0) 1616 }; 1617 1618 /* 1619 * Preprocessor output 1620 */ 1621 struct mactab ppmactab[] = { 1622 M(FNEST, 'E','Q', EQ), /* equation starting */ 1623 M(FNEST, 'T','S', intbl), /* table starting */ 1624 M(FNEST, 'T','C', intbl), /* alternative table? */ 1625 M(FNEST, 'T','&', intbl), /* table reformatting */ 1626 M(NONE, 'T','E', outtbl),/* table ending */ 1627 M(NONE, 'P','S', PS), /* picture starting */ 1628 M(NONE, 0,0, 0) 1629 }; 1630 1631 /* 1632 * Particular to ms and mm 1633 */ 1634 struct mactab msmactab[] = { 1635 M(NONE, 'T','L', skiptocom), /* title follows */ 1636 M(NONE, 'F','S', skiptocom), /* start footnote */ 1637 M(NONE, 'O','K', skiptocom), /* Other kws */ 1638 1639 M(NONE, 'N','R', skip), /* undocumented */ 1640 M(NONE, 'N','D', skip), /* use supplied date */ 1641 1642 M(PARAG, 'P','P', PP), /* begin parag */ 1643 M(PARAG, 'I','P', PP), /* begin indent parag, tag x */ 1644 M(PARAG, 'L','P', PP), /* left blocked parag */ 1645 1646 M(NONE, 'A','U', AU), /* author */ 1647 M(NONE, 'A','I', AU), /* authors institution */ 1648 1649 M(NONE, 'S','H', SH), /* section heading */ 1650 M(NONE, 'S','N', SH), /* undocumented */ 1651 M(NONE, 'U','X', UX), /* unix */ 1652 1653 M(NBLK, 'D','S', mssnblock), /* start display text */ 1654 M(NBLK, 'K','S', mssnblock), /* start keep */ 1655 M(NBLK, 'K','F', mssnblock), /* start float keep */ 1656 M(NONE, 0,0, 0) 1657 }; 1658 1659 struct mactab mmmactab[] = { 1660 M(NONE, 'H',' ', MMHU), /* -mm ? */ 1661 M(NONE, 'H','U', MMHU), /* -mm ? */ 1662 M(PARAG, 'P',' ', PP), /* paragraph for -mm */ 1663 M(NBLK, 'N','S', mssnblock), /* undocumented */ 1664 M(NONE, 0,0, 0) 1665 }; 1666 1667 struct mactab memactab[] = { 1668 M(PARAG, 'p','p', mepp), 1669 M(PARAG, 'l','p', mepp), 1670 M(PARAG, 'n','p', mepp), 1671 M(NONE, 'i','p', meip), 1672 1673 M(NONE, 's','h', mesh), 1674 M(NONE, 'u','h', mesh), 1675 1676 M(NBLK, '(','l', mesnblock), 1677 M(NBLK, '(','q', mesnblock), 1678 M(NBLK, '(','b', mesnblock), 1679 M(NBLK, '(','z', mesnblock), 1680 M(NBLK, '(','c', mesnblock), 1681 1682 M(NBLK, '(','d', mesnblock), 1683 M(NBLK, '(','f', mesnblock), 1684 M(NBLK, '(','x', mesnblock), 1685 1686 M(NONE, 'r',' ', mefont), 1687 M(NONE, 'i',' ', mefont), 1688 M(NONE, 'b',' ', mefont), 1689 M(NONE, 'u',' ', mefont), 1690 M(NONE, 'q',' ', mefont), 1691 M(NONE, 'r','b', mefont), 1692 M(NONE, 'b','i', mefont), 1693 M(NONE, 'b','x', mefont), 1694 M(NONE, 0,0, 0) 1695 }; 1696 1697 struct mactab manmactab[] = { 1698 M(PARAG, 'B','I', manfont), 1699 M(PARAG, 'B','R', manfont), 1700 M(PARAG, 'I','B', manfont), 1701 M(PARAG, 'I','R', manfont), 1702 M(PARAG, 'R','B', manfont), 1703 M(PARAG, 'R','I', manfont), 1704 1705 M(PARAG, 'P','P', manpp), 1706 M(PARAG, 'L','P', manpp), 1707 M(PARAG, 'H','P', manpp), 1708 M(NONE, 0,0, 0) 1709 }; 1710