1 /* $NetBSD: deroff.c,v 1.2 2005/06/30 16:23:29 christos Exp $ */ 2 3 /* taken from: OpenBSD: deroff.c,v 1.6 2004/06/02 14:58:46 tom Exp */ 4 5 /*- 6 * Copyright (c) 1988, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 /* 34 * Copyright (C) Caldera International Inc. 2001-2002. 35 * All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code and documentation must retain the above 41 * copyright notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. All advertising materials mentioning features or use of this software 46 * must display the following acknowledgement: 47 * This product includes software developed or owned by Caldera 48 * International, Inc. 49 * 4. Neither the name of Caldera International, Inc. nor the names of other 50 * contributors may be used to endorse or promote products derived from 51 * this software without specific prior written permission. 52 * 53 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 54 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 55 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 56 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 57 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 58 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 59 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 60 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 62 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 63 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 64 * POSSIBILITY OF SUCH DAMAGE. 65 */ 66 67 #ifndef lint 68 static const char copyright[] = 69 "@(#) Copyright (c) 1988, 1993\n\ 70 The Regents of the University of California. All rights reserved.\n"; 71 #endif /* not lint */ 72 73 #ifndef lint 74 #if 0 75 static const char sccsid[] = "@(#)deroff.c 8.1 (Berkeley) 6/6/93"; 76 #else 77 static const char rcsid[] = "$NetBSD: deroff.c,v 1.2 2005/06/30 16:23:29 christos Exp $"; 78 #endif 79 #endif /* not lint */ 80 81 #include <err.h> 82 #include <limits.h> 83 #include <stdio.h> 84 #include <stdlib.h> 85 #include <string.h> 86 #include <unistd.h> 87 88 /* 89 * Deroff command -- strip troff, eqn, and Tbl sequences from 90 * a file. Has two flags argument, -w, to cause output one word per line 91 * rather than in the original format. 92 * -mm (or -ms) causes the corresponding macro's to be interpreted 93 * so that just sentences are output 94 * -ml also gets rid of lists. 95 * Deroff follows .so and .nx commands, removes contents of macro 96 * definitions, equations (both .EQ ... .EN and $...$), 97 * Tbl command sequences, and Troff backslash constructions. 98 * 99 * All input is through the Cget macro; 100 * the most recently read character is in c. 101 * 102 * Modified by Robert Henry to process -me and -man macros. 103 */ 104 105 #define Cget ( (c=getc(infile)) == EOF ? eof() : ((c==ldelim)&&(filesp==files) ? skeqn() : c) ) 106 #define C1get ( (c=getc(infile)) == EOF ? eof() : c) 107 108 #ifdef DEBUG 109 # define C _C() 110 # define C1 _C1() 111 #else /* not DEBUG */ 112 # define C Cget 113 # define C1 C1get 114 #endif /* not DEBUG */ 115 116 #define SKIP while (C != '\n') 117 #define SKIP_TO_COM SKIP; SKIP; pc=c; while (C != '.' || pc != '\n' || C > 'Z')pc=c 118 119 #define YES 1 120 #define NO 0 121 #define MS 0 /* -ms */ 122 #define MM 1 /* -mm */ 123 #define ME 2 /* -me */ 124 #define MA 3 /* -man */ 125 126 #ifdef DEBUG 127 char *mactab[] = { "-ms", "-mm", "-me", "-ma" }; 128 #endif /* DEBUG */ 129 130 #define ONE 1 131 #define TWO 2 132 133 #define NOCHAR -2 134 #define SPECIAL 0 135 #define APOS 1 136 #define PUNCT 2 137 #define DIGIT 3 138 #define LETTER 4 139 140 #define MAXFILES 20 141 142 static int iflag; 143 static int wordflag; 144 static int msflag; /* processing a source written using a mac package */ 145 static int mac; /* which package */ 146 static int disp; 147 static int parag; 148 static int inmacro; 149 static int intable; 150 static int keepblock; /* keep blocks of text; normally false when msflag */ 151 152 static char chars[128]; /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */ 153 154 static char line[LINE_MAX]; 155 static char *lp; 156 157 static int c; 158 static int pc; 159 static int ldelim; 160 static int rdelim; 161 162 static char fname[PATH_MAX]; 163 static FILE *files[MAXFILES]; 164 static FILE **filesp; 165 static FILE *infile; 166 167 static int argc; 168 static char **argv; 169 170 /* 171 * Macro processing 172 * 173 * Macro table definitions 174 */ 175 typedef int pacmac; /* compressed macro name */ 176 static int argconcat = 0; /* concat arguments together (-me only) */ 177 178 #define tomac(c1, c2) ((((c1) & 0xFF) << 8) | ((c2) & 0xFF)) 179 #define frommac(src, c1, c2) (((c1)=((src)>>8)&0xFF),((c2) =(src)&0xFF)) 180 181 struct mactab { 182 int condition; 183 pacmac macname; 184 int (*func)(pacmac); 185 }; 186 187 static const struct mactab troffmactab[]; 188 static const struct mactab ppmactab[]; 189 static const struct mactab msmactab[]; 190 static const struct mactab mmmactab[]; 191 static const struct mactab memactab[]; 192 static const struct mactab manmactab[]; 193 194 /* 195 * Macro table initialization 196 */ 197 #define M(cond, c1, c2, func) {cond, tomac(c1, c2), func} 198 199 /* 200 * Flags for matching conditions other than 201 * the macro name 202 */ 203 #define NONE 0 204 #define FNEST 1 /* no nested files */ 205 #define NOMAC 2 /* no macro */ 206 #define MAC 3 /* macro */ 207 #define PARAG 4 /* in a paragraph */ 208 #define MSF 5 /* msflag is on */ 209 #define NBLK 6 /* set if no blocks to be kept */ 210 211 /* 212 * Return codes from macro minions, determine where to jump, 213 * how to repeat/reprocess text 214 */ 215 #define COMX 1 /* goto comx */ 216 #define COM 2 /* goto com */ 217 218 static int skeqn(void); 219 static int eof(void); 220 #ifdef DEBUG 221 static int _C1(void); 222 static int _C(void); 223 #endif 224 static int EQ(pacmac); 225 static int domacro(pacmac); 226 static int PS(pacmac); 227 static int skip(pacmac); 228 static int intbl(pacmac); 229 static int outtbl(pacmac); 230 static int so(pacmac); 231 static int nx(pacmac); 232 static int skiptocom(pacmac); 233 static int PP(pacmac); 234 static int AU(pacmac); 235 static int SH(pacmac); 236 static int UX(pacmac); 237 static int MMHU(pacmac); 238 static int mesnblock(pacmac); 239 static int mssnblock(pacmac); 240 static int nf(pacmac); 241 static int ce(pacmac); 242 static int meip(pacmac); 243 static int mepp(pacmac); 244 static int mesh(pacmac); 245 static int mefont(pacmac); 246 static int manfont(pacmac); 247 static int manpp(pacmac); 248 static int macsort(const void *, const void *); 249 static int sizetab(const struct mactab *); 250 static void getfname(void); 251 static void textline(char *, int); 252 static void work(void); 253 static void regline(void (*)(char *, int), int); 254 static void macro(void); 255 static void tbl(void); 256 static void stbl(void); 257 static void eqn(void); 258 static void backsl(void); 259 static void sce(void); 260 static void refer(int); 261 static void inpic(void); 262 static void msputmac(char *, int); 263 static void msputwords(int); 264 static void meputmac(char *, int); 265 static void meputwords(int); 266 static void noblock(char, char); 267 static void defcomline(pacmac); 268 static void comline(void); 269 static void buildtab(const struct mactab **, int *); 270 static FILE *opn(char *); 271 static struct mactab *macfill(struct mactab *, const struct mactab *); 272 static void usage(void) __attribute__((__noreturn__)); 273 274 int 275 main(int ac, char **av) 276 { 277 int i, ch; 278 int errflg = 0; 279 int kflag = NO; 280 281 iflag = NO; 282 wordflag = NO; 283 msflag = NO; 284 mac = ME; 285 disp = NO; 286 parag = NO; 287 inmacro = NO; 288 intable = NO; 289 ldelim = NOCHAR; 290 rdelim = NOCHAR; 291 keepblock = YES; 292 293 while ((ch = getopt(ac, av, "ikpwm:")) != -1) { 294 switch (ch) { 295 case 'i': 296 iflag = YES; 297 break; 298 case 'k': 299 kflag = YES; 300 break; 301 case 'm': 302 msflag = YES; 303 keepblock = NO; 304 switch (optarg[0]) { 305 case 'm': 306 mac = MM; 307 break; 308 case 's': 309 mac = MS; 310 break; 311 case 'e': 312 mac = ME; 313 break; 314 case 'a': 315 mac = MA; 316 break; 317 case 'l': 318 disp = YES; 319 break; 320 default: 321 errflg++; 322 break; 323 } 324 if (errflg == 0 && optarg[1] != '\0') 325 errflg++; 326 break; 327 case 'p': 328 parag = YES; 329 break; 330 case 'w': 331 wordflag = YES; 332 kflag = YES; 333 break; 334 default: 335 errflg++; 336 } 337 } 338 argc = ac - optind; 339 argv = av + optind; 340 341 if (kflag) 342 keepblock = YES; 343 if (errflg) 344 usage(); 345 346 #ifdef DEBUG 347 printf("msflag = %d, mac = %s, keepblock = %d, disp = %d\n", 348 msflag, mactab[mac], keepblock, disp); 349 #endif /* DEBUG */ 350 if (argc == 0) { 351 infile = stdin; 352 } else { 353 infile = opn(argv[0]); 354 --argc; 355 ++argv; 356 } 357 files[0] = infile; 358 filesp = &files[0]; 359 360 for (i = 'a'; i <= 'z' ; ++i) 361 chars[i] = LETTER; 362 for (i = 'A'; i <= 'Z'; ++i) 363 chars[i] = LETTER; 364 for (i = '0'; i <= '9'; ++i) 365 chars[i] = DIGIT; 366 chars['\''] = APOS; 367 chars['&'] = APOS; 368 chars['.'] = PUNCT; 369 chars[','] = PUNCT; 370 chars[';'] = PUNCT; 371 chars['?'] = PUNCT; 372 chars[':'] = PUNCT; 373 work(); 374 return 0; 375 } 376 377 static int 378 skeqn(void) 379 { 380 381 while ((c = getc(infile)) != rdelim) { 382 if (c == EOF) 383 c = eof(); 384 else if (c == '"') { 385 while ((c = getc(infile)) != '"') { 386 if (c == EOF || 387 (c == '\\' && (c = getc(infile)) == EOF)) 388 c = eof(); 389 } 390 } 391 } 392 if (msflag) 393 return c == 'x'; 394 return c == ' '; 395 } 396 397 static FILE * 398 opn(char *p) 399 { 400 FILE *fd; 401 402 if ((fd = fopen(p, "r")) == NULL) 403 err(1, "fopen %s", p); 404 405 return fd; 406 } 407 408 static int 409 eof(void) 410 { 411 412 if (infile != stdin) 413 fclose(infile); 414 if (filesp > files) 415 infile = *--filesp; 416 else if (argc > 0) { 417 infile = opn(argv[0]); 418 --argc; 419 ++argv; 420 } else 421 exit(0); 422 return C; 423 } 424 425 static void 426 getfname(void) 427 { 428 char *p; 429 struct chain { 430 struct chain *nextp; 431 char *datap; 432 } *q; 433 static struct chain *namechain= NULL; 434 435 while (C == ' ') 436 ; /* nothing */ 437 438 for (p = fname ; p - fname < sizeof(fname) && (*p = c) != '\n' && 439 c != ' ' && c != '\t' && c != '\\'; ++p) 440 C; 441 *p = '\0'; 442 while (c != '\n') 443 C; 444 445 /* see if this name has already been used */ 446 for (q = namechain ; q; q = q->nextp) 447 if (strcmp(fname, q->datap) == 0) { 448 fname[0] = '\0'; 449 return; 450 } 451 452 q = (struct chain *) malloc(sizeof(struct chain)); 453 if (q == NULL) 454 err(1, NULL); 455 q->nextp = namechain; 456 q->datap = strdup(fname); 457 if (q->datap == NULL) 458 err(1, NULL); 459 namechain = q; 460 } 461 462 /*ARGSUSED*/ 463 static void 464 textline(char *str, int constant) 465 { 466 467 if (wordflag) { 468 msputwords(0); 469 return; 470 } 471 puts(str); 472 } 473 474 void 475 work(void) 476 { 477 478 for (;;) { 479 C; 480 #ifdef FULLDEBUG 481 printf("Starting work with `%c'\n", c); 482 #endif /* FULLDEBUG */ 483 if (c == '.' || c == '\'') 484 comline(); 485 else 486 regline(textline, TWO); 487 } 488 } 489 490 static void 491 regline(void (*pfunc)(char *, int), int constant) 492 { 493 494 line[0] = c; 495 lp = line; 496 while (lp - line < sizeof(line)) { 497 if (c == '\\') { 498 *lp = ' '; 499 backsl(); 500 } 501 if (c == '\n') 502 break; 503 if (intable && c == 'T') { 504 *++lp = C; 505 if (c == '{' || c == '}') { 506 lp[-1] = ' '; 507 *lp = C; 508 } 509 } else { 510 *++lp = C; 511 } 512 } 513 *lp = '\0'; 514 515 if (line[0] != '\0') 516 (*pfunc)(line, constant); 517 } 518 519 static void 520 macro(void) 521 { 522 523 if (msflag) { 524 do { 525 SKIP; 526 } while (C!='.' || C!='.' || C=='.'); /* look for .. */ 527 if (c != '\n') 528 SKIP; 529 return; 530 } 531 SKIP; 532 inmacro = YES; 533 } 534 535 static void 536 tbl(void) 537 { 538 539 while (C != '.') 540 ; /* nothing */ 541 SKIP; 542 intable = YES; 543 } 544 545 static void 546 stbl(void) 547 { 548 549 while (C != '.') 550 ; /* nothing */ 551 SKIP_TO_COM; 552 if (c != 'T' || C != 'E') { 553 SKIP; 554 pc = c; 555 while (C != '.' || pc != '\n' || C != 'T' || C != 'E') 556 pc = c; 557 } 558 } 559 560 static void 561 eqn(void) 562 { 563 int c1, c2; 564 int dflg; 565 char last; 566 567 last=0; 568 dflg = 1; 569 SKIP; 570 571 for (;;) { 572 if (C1 == '.' || c == '\'') { 573 while (C1 == ' ' || c == '\t') 574 ; 575 if (c == 'E' && C1 == 'N') { 576 SKIP; 577 if (msflag && dflg) { 578 putchar('x'); 579 putchar(' '); 580 if (last) { 581 putchar(last); 582 putchar('\n'); 583 } 584 } 585 return; 586 } 587 } else if (c == 'd') { 588 /* look for delim */ 589 if (C1 == 'e' && C1 == 'l') 590 if (C1 == 'i' && C1 == 'm') { 591 while (C1 == ' ') 592 ; /* nothing */ 593 594 if ((c1 = c) == '\n' || 595 (c2 = C1) == '\n' || 596 (c1 == 'o' && c2 == 'f' && C1=='f')) { 597 ldelim = NOCHAR; 598 rdelim = NOCHAR; 599 } else { 600 ldelim = c1; 601 rdelim = c2; 602 } 603 } 604 dflg = 0; 605 } 606 607 if (c != '\n') 608 while (C1 != '\n') { 609 if (chars[c] == PUNCT) 610 last = c; 611 else if (c != ' ') 612 last = 0; 613 } 614 } 615 } 616 617 /* skip over a complete backslash construction */ 618 static void 619 backsl(void) 620 { 621 int bdelim; 622 623 sw: 624 switch (C) { 625 case '"': 626 SKIP; 627 return; 628 629 case 's': 630 if (C == '\\') 631 backsl(); 632 else { 633 while (C >= '0' && c <= '9') 634 ; /* nothing */ 635 ungetc(c, infile); 636 c = '0'; 637 } 638 --lp; 639 return; 640 641 case 'f': 642 case 'n': 643 case '*': 644 if (C != '(') 645 return; 646 647 case '(': 648 if (msflag) { 649 if (C == 'e') { 650 if (C == 'm') { 651 *lp = '-'; 652 return; 653 } 654 } 655 else if (c != '\n') 656 C; 657 return; 658 } 659 if (C != '\n') 660 C; 661 return; 662 663 case '$': 664 C; /* discard argument number */ 665 return; 666 667 case 'b': 668 case 'x': 669 case 'v': 670 case 'h': 671 case 'w': 672 case 'o': 673 case 'l': 674 case 'L': 675 if ((bdelim = C) == '\n') 676 return; 677 while (C != '\n' && c != bdelim) 678 if (c == '\\') 679 backsl(); 680 return; 681 682 case '\\': 683 if (inmacro) 684 goto sw; 685 686 default: 687 return; 688 } 689 } 690 691 static void 692 sce(void) 693 { 694 char *ap; 695 int n, i; 696 char a[10]; 697 698 for (ap = a; C != '\n'; ap++) { 699 *ap = c; 700 if (ap == &a[9]) { 701 SKIP; 702 ap = a; 703 break; 704 } 705 } 706 if (ap != a) 707 n = atoi(a); 708 else 709 n = 1; 710 for (i = 0; i < n;) { 711 if (C == '.') { 712 if (C == 'c') { 713 if (C == 'e') { 714 while (C == ' ') 715 ; /* nothing */ 716 if (c == '0') { 717 SKIP; 718 break; 719 } else 720 SKIP; 721 } 722 else 723 SKIP; 724 } else if (c == 'P' || C == 'P') { 725 if (c != '\n') 726 SKIP; 727 break; 728 } else if (c != '\n') 729 SKIP; 730 } else { 731 SKIP; 732 i++; 733 } 734 } 735 } 736 737 static void 738 refer(int c1) 739 { 740 int c2; 741 742 if (c1 != '\n') 743 SKIP; 744 745 for (c2 = -1;;) { 746 if (C != '.') 747 SKIP; 748 else { 749 if (C != ']') 750 SKIP; 751 else { 752 while (C != '\n') 753 c2 = c; 754 if (c2 != -1 && chars[c2] == PUNCT) 755 putchar(c2); 756 return; 757 } 758 } 759 } 760 } 761 762 static void 763 inpic(void) 764 { 765 int c1; 766 char *p1; 767 768 SKIP; 769 p1 = line; 770 c = '\n'; 771 for (;;) { 772 c1 = c; 773 if (C == '.' && c1 == '\n') { 774 if (C != 'P') { 775 if (c == '\n') 776 continue; 777 else { 778 SKIP; 779 c = '\n'; 780 continue; 781 } 782 } 783 if (C != 'E') { 784 if (c == '\n') 785 continue; 786 else { 787 SKIP; 788 c = '\n'; 789 continue; 790 } 791 } 792 SKIP; 793 return; 794 } 795 else if (c == '\"') { 796 while (C != '\"') { 797 if (c == '\\') { 798 if (C == '\"') 799 continue; 800 ungetc(c, infile); 801 backsl(); 802 } else 803 *p1++ = c; 804 } 805 *p1++ = ' '; 806 } 807 else if (c == '\n' && p1 != line) { 808 *p1 = '\0'; 809 if (wordflag) 810 msputwords(NO); 811 else { 812 puts(line); 813 putchar('\n'); 814 } 815 p1 = line; 816 } 817 } 818 } 819 820 #ifdef DEBUG 821 static int 822 _C1(void) 823 { 824 825 return C1get); 826 } 827 828 static int 829 _C(void) 830 { 831 832 return Cget); 833 } 834 #endif /* DEBUG */ 835 836 /* 837 * Put out a macro line, using ms and mm conventions. 838 */ 839 static void 840 msputmac(char *s, int constant) 841 { 842 char *t; 843 int found; 844 int last; 845 846 last = 0; 847 found = 0; 848 if (wordflag) { 849 msputwords(YES); 850 return; 851 } 852 while (*s) { 853 while (*s == ' ' || *s == '\t') 854 putchar(*s++); 855 for (t = s ; *t != ' ' && *t != '\t' && *t != '\0' ; ++t) 856 ; /* nothing */ 857 if (*s == '\"') 858 s++; 859 if (t > s + constant && chars[(unsigned char)s[0]] == LETTER && 860 chars[(unsigned char)s[1]] == LETTER) { 861 while (s < t) 862 if (*s == '\"') 863 s++; 864 else 865 putchar(*s++); 866 last = *(t-1); 867 found++; 868 } else if (found && chars[(unsigned char)s[0]] == PUNCT && 869 s[1] == '\0') { 870 putchar(*s++); 871 } else { 872 last = *(t - 1); 873 s = t; 874 } 875 } 876 putchar('\n'); 877 if (msflag && chars[last] == PUNCT) { 878 putchar(last); 879 putchar('\n'); 880 } 881 } 882 883 /* 884 * put out words (for the -w option) with ms and mm conventions 885 */ 886 static void 887 msputwords(int macline) 888 { 889 char *p, *p1; 890 int i, nlet; 891 892 for (p1 = line;;) { 893 /* 894 * skip initial specials ampersands and apostrophes 895 */ 896 while (chars[(unsigned char)*p1] < DIGIT) 897 if (*p1++ == '\0') 898 return; 899 nlet = 0; 900 for (p = p1 ; (i = chars[(unsigned char)*p]) != SPECIAL ; ++p) 901 if (i == LETTER) 902 ++nlet; 903 904 if (nlet > 1 && chars[(unsigned char)p1[0]] == LETTER) { 905 /* 906 * delete trailing ampersands and apostrophes 907 */ 908 while ((i = chars[(unsigned char)p[-1]]) == PUNCT || 909 i == APOS ) 910 --p; 911 while (p1 < p) 912 putchar(*p1++); 913 putchar('\n'); 914 } else { 915 p1 = p; 916 } 917 } 918 } 919 920 /* 921 * put out a macro using the me conventions 922 */ 923 #define SKIPBLANK(cp) while (*cp == ' ' || *cp == '\t') { cp++; } 924 #define SKIPNONBLANK(cp) while (*cp !=' ' && *cp !='\cp' && *cp !='\0') { cp++; } 925 926 static void 927 meputmac(char *cp, int constant) 928 { 929 char *np; 930 int found; 931 int argno; 932 int last; 933 int inquote; 934 935 last = 0; 936 found = 0; 937 if (wordflag) { 938 meputwords(YES); 939 return; 940 } 941 for (argno = 0; *cp; argno++) { 942 SKIPBLANK(cp); 943 inquote = (*cp == '"'); 944 if (inquote) 945 cp++; 946 for (np = cp; *np; np++) { 947 switch (*np) { 948 case '\n': 949 case '\0': 950 break; 951 952 case '\t': 953 case ' ': 954 if (inquote) 955 continue; 956 else 957 goto endarg; 958 959 case '"': 960 if (inquote && np[1] == '"') { 961 memmove(np, np + 1, strlen(np)); 962 np++; 963 continue; 964 } else { 965 *np = ' '; /* bye bye " */ 966 goto endarg; 967 } 968 969 default: 970 continue; 971 } 972 } 973 endarg: ; 974 /* 975 * cp points at the first char in the arg 976 * np points one beyond the last char in the arg 977 */ 978 if ((argconcat == 0) || (argconcat != argno)) 979 putchar(' '); 980 #ifdef FULLDEBUG 981 { 982 char *p; 983 printf("[%d,%d: ", argno, np - cp); 984 for (p = cp; p < np; p++) { 985 putchar(*p); 986 } 987 printf("]"); 988 } 989 #endif /* FULLDEBUG */ 990 /* 991 * Determine if the argument merits being printed 992 * 993 * constant is the cut off point below which something 994 * is not a word. 995 */ 996 if (((np - cp) > constant) && 997 (inquote || (chars[(unsigned char)cp[0]] == LETTER))) { 998 for (cp = cp; cp < np; cp++) 999 putchar(*cp); 1000 last = np[-1]; 1001 found++; 1002 } else if (found && (np - cp == 1) && 1003 chars[(unsigned char)*cp] == PUNCT) { 1004 putchar(*cp); 1005 } else { 1006 last = np[-1]; 1007 } 1008 cp = np; 1009 } 1010 if (msflag && chars[last] == PUNCT) 1011 putchar(last); 1012 putchar('\n'); 1013 } 1014 1015 /* 1016 * put out words (for the -w option) with ms and mm conventions 1017 */ 1018 static void 1019 meputwords(int macline) 1020 { 1021 1022 msputwords(macline); 1023 } 1024 1025 /* 1026 * 1027 * Skip over a nested set of macros 1028 * 1029 * Possible arguments to noblock are: 1030 * 1031 * fi end of unfilled text 1032 * PE pic ending 1033 * DE display ending 1034 * 1035 * for ms and mm only: 1036 * KE keep ending 1037 * 1038 * NE undocumented match to NS (for mm?) 1039 * LE mm only: matches RL or *L (for lists) 1040 * 1041 * for me: 1042 * ([lqbzcdf] 1043 */ 1044 static void 1045 noblock(char a1, char a2) 1046 { 1047 int c1,c2; 1048 int eqnf; 1049 int lct; 1050 1051 lct = 0; 1052 eqnf = 1; 1053 SKIP; 1054 for (;;) { 1055 while (C != '.') 1056 if (c == '\n') 1057 continue; 1058 else 1059 SKIP; 1060 if ((c1 = C) == '\n') 1061 continue; 1062 if ((c2 = C) == '\n') 1063 continue; 1064 if (c1 == a1 && c2 == a2) { 1065 SKIP; 1066 if (lct != 0) { 1067 lct--; 1068 continue; 1069 } 1070 if (eqnf) 1071 putchar('.'); 1072 putchar('\n'); 1073 return; 1074 } else if (a1 == 'L' && c2 == 'L') { 1075 lct++; 1076 SKIP; 1077 } 1078 /* 1079 * equations (EQ) nested within a display 1080 */ 1081 else if (c1 == 'E' && c2 == 'Q') { 1082 if ((mac == ME && a1 == ')') 1083 || (mac != ME && a1 == 'D')) { 1084 eqn(); 1085 eqnf=0; 1086 } 1087 } 1088 /* 1089 * turning on filling is done by the paragraphing 1090 * macros 1091 */ 1092 else if (a1 == 'f') { /* .fi */ 1093 if ((mac == ME && (c2 == 'h' || c2 == 'p')) 1094 || (mac != ME && (c1 == 'P' || c2 == 'P'))) { 1095 SKIP; 1096 return; 1097 } 1098 } else { 1099 SKIP; 1100 } 1101 } 1102 } 1103 1104 static int 1105 /*ARGSUSED*/ 1106 EQ(pacmac unused) 1107 { 1108 1109 eqn(); 1110 return 0; 1111 } 1112 1113 static int 1114 /*ARGSUSED*/ 1115 domacro(pacmac unused) 1116 { 1117 1118 macro(); 1119 return 0; 1120 } 1121 1122 static int 1123 /*ARGSUSED*/ 1124 PS(pacmac unused) 1125 { 1126 1127 for (C; c == ' ' || c == '\t'; C) 1128 ; /* nothing */ 1129 1130 if (c == '<') { /* ".PS < file" -- don't expect a .PE */ 1131 SKIP; 1132 return 0; 1133 } 1134 if (!msflag) 1135 inpic(); 1136 else 1137 noblock('P', 'E'); 1138 return 0; 1139 } 1140 1141 static int 1142 /*ARGSUSED*/ 1143 skip(pacmac unused) 1144 { 1145 1146 SKIP; 1147 return 0; 1148 } 1149 1150 static int 1151 /*ARGSUSED*/ 1152 intbl(pacmac unused) 1153 { 1154 1155 if (msflag) 1156 stbl(); 1157 else 1158 tbl(); 1159 return 0; 1160 } 1161 1162 static int 1163 /*ARGSUSED*/ 1164 outtbl(pacmac unused) 1165 { 1166 1167 intable = NO; 1168 return 0; 1169 } 1170 1171 int 1172 /*ARGSUSED*/ 1173 so(pacmac unused) 1174 { 1175 1176 if (!iflag) { 1177 getfname(); 1178 if (fname[0]) { 1179 if (++filesp - &files[0] > MAXFILES) 1180 err(1, "too many nested files (max %d)", 1181 MAXFILES); 1182 infile = *filesp = opn(fname); 1183 } 1184 } 1185 return 0; 1186 } 1187 1188 static int 1189 /*ARGSUSED*/ 1190 nx(pacmac unused) 1191 { 1192 1193 if (!iflag) { 1194 getfname(); 1195 if (fname[0] == '\0') 1196 exit(0); 1197 if (infile != stdin) 1198 fclose(infile); 1199 infile = *filesp = opn(fname); 1200 } 1201 return 0; 1202 } 1203 1204 static int 1205 /*ARGSUSED*/ 1206 skiptocom(pacmac unused) 1207 { 1208 1209 SKIP_TO_COM; 1210 return COMX; 1211 } 1212 1213 static int 1214 PP(pacmac c12) 1215 { 1216 int c1, c2; 1217 1218 frommac(c12, c1, c2); 1219 printf(".%c%c", c1, c2); 1220 while (C != '\n') 1221 putchar(c); 1222 putchar('\n'); 1223 return 0; 1224 } 1225 1226 static int 1227 /*ARGSUSED*/ 1228 AU(pacmac unused) 1229 { 1230 1231 if (mac == MM) 1232 return 0; 1233 SKIP_TO_COM; 1234 return COMX; 1235 } 1236 1237 static int 1238 SH(pacmac c12) 1239 { 1240 int c1, c2; 1241 1242 frommac(c12, c1, c2); 1243 1244 if (parag) { 1245 printf(".%c%c", c1, c2); 1246 while (C != '\n') 1247 putchar(c); 1248 putchar(c); 1249 putchar('!'); 1250 for (;;) { 1251 while (C != '\n') 1252 putchar(c); 1253 putchar('\n'); 1254 if (C == '.') 1255 return COM; 1256 putchar('!'); 1257 putchar(c); 1258 } 1259 /*NOTREACHED*/ 1260 } else { 1261 SKIP_TO_COM; 1262 return COMX; 1263 } 1264 } 1265 1266 static int 1267 /*ARGSUSED*/ 1268 UX(pacmac unused) 1269 { 1270 1271 if (wordflag) 1272 printf("UNIX\n"); 1273 else 1274 printf("UNIX "); 1275 return 0; 1276 } 1277 1278 static int 1279 MMHU(pacmac c12) 1280 { 1281 int c1, c2; 1282 1283 frommac(c12, c1, c2); 1284 if (parag) { 1285 printf(".%c%c", c1, c2); 1286 while (C != '\n') 1287 putchar(c); 1288 putchar('\n'); 1289 } else { 1290 SKIP; 1291 } 1292 return 0; 1293 } 1294 1295 static int 1296 mesnblock(pacmac c12) 1297 { 1298 int c1, c2; 1299 1300 frommac(c12, c1, c2); 1301 noblock(')', c2); 1302 return 0; 1303 } 1304 1305 static int 1306 mssnblock(pacmac c12) 1307 { 1308 int c1, c2; 1309 1310 frommac(c12, c1, c2); 1311 noblock(c1, 'E'); 1312 return 0; 1313 } 1314 1315 static int 1316 /*ARGUSED*/ 1317 nf(pacmac unused) 1318 { 1319 1320 noblock('f', 'i'); 1321 return 0; 1322 } 1323 1324 static int 1325 /*ARGUSED*/ 1326 ce(pacmac unused) 1327 { 1328 1329 sce(); 1330 return 0; 1331 } 1332 1333 static int 1334 meip(pacmac c12) 1335 { 1336 1337 if (parag) 1338 mepp(c12); 1339 else if (wordflag) /* save the tag */ 1340 regline(meputmac, ONE); 1341 else 1342 SKIP; 1343 return 0; 1344 } 1345 1346 /* 1347 * only called for -me .pp or .sh, when parag is on 1348 */ 1349 static int 1350 mepp(pacmac c12) 1351 { 1352 1353 PP(c12); /* eats the line */ 1354 return 0; 1355 } 1356 1357 /* 1358 * Start of a section heading; output the section name if doing words 1359 */ 1360 static int 1361 mesh(pacmac c12) 1362 { 1363 1364 if (parag) 1365 mepp(c12); 1366 else if (wordflag) 1367 defcomline(c12); 1368 else 1369 SKIP; 1370 return 0; 1371 } 1372 1373 /* 1374 * process a font setting 1375 */ 1376 static int 1377 mefont(pacmac c12) 1378 { 1379 1380 argconcat = 1; 1381 defcomline(c12); 1382 argconcat = 0; 1383 return 0; 1384 } 1385 1386 static int 1387 manfont(pacmac c12) 1388 { 1389 1390 return mefont(c12); 1391 } 1392 1393 static int 1394 manpp(pacmac c12) 1395 { 1396 1397 return mepp(c12); 1398 } 1399 1400 static void 1401 defcomline(pacmac c12) 1402 { 1403 int c1, c2; 1404 1405 frommac(c12, c1, c2); 1406 if (msflag && mac == MM && c2 == 'L') { 1407 if (disp || c1 == 'R') { 1408 noblock('L', 'E'); 1409 } else { 1410 SKIP; 1411 putchar('.'); 1412 } 1413 } 1414 else if (c1 == '.' && c2 == '.') { 1415 if (msflag) { 1416 SKIP; 1417 return; 1418 } 1419 while (C == '.') 1420 /*VOID*/; 1421 } 1422 ++inmacro; 1423 /* 1424 * Process the arguments to the macro 1425 */ 1426 switch (mac) { 1427 default: 1428 case MM: 1429 case MS: 1430 if (c1 <= 'Z' && msflag) 1431 regline(msputmac, ONE); 1432 else 1433 regline(msputmac, TWO); 1434 break; 1435 case ME: 1436 regline(meputmac, ONE); 1437 break; 1438 } 1439 --inmacro; 1440 } 1441 1442 static void 1443 comline(void) 1444 { 1445 int c1; 1446 int c2; 1447 pacmac c12; 1448 int mid; 1449 int lb, ub; 1450 int hit; 1451 static int tabsize = 0; 1452 static const struct mactab *mactab = NULL; 1453 const struct mactab *mp; 1454 1455 if (mactab == 0) 1456 buildtab(&mactab, &tabsize); 1457 com: 1458 while (C == ' ' || c == '\t') 1459 ; 1460 comx: 1461 if ((c1 = c) == '\n') 1462 return; 1463 c2 = C; 1464 if (c1 == '.' && c2 != '.') 1465 inmacro = NO; 1466 if (msflag && c1 == '[') { 1467 refer(c2); 1468 return; 1469 } 1470 if (parag && mac==MM && c1 == 'P' && c2 == '\n') { 1471 printf(".P\n"); 1472 return; 1473 } 1474 if (c2 == '\n') 1475 return; 1476 /* 1477 * Single letter macro 1478 */ 1479 if (mac == ME && (c2 == ' ' || c2 == '\t') ) 1480 c2 = ' '; 1481 c12 = tomac(c1, c2); 1482 /* 1483 * binary search through the table of macros 1484 */ 1485 lb = 0; 1486 ub = tabsize - 1; 1487 while (lb <= ub) { 1488 mid = (ub + lb) / 2; 1489 mp = &mactab[mid]; 1490 if (mp->macname < c12) 1491 lb = mid + 1; 1492 else if (mp->macname > c12) 1493 ub = mid - 1; 1494 else { 1495 hit = 1; 1496 #ifdef FULLDEBUG 1497 printf("preliminary hit macro %c%c ", c1, c2); 1498 #endif /* FULLDEBUG */ 1499 switch (mp->condition) { 1500 case NONE: 1501 hit = YES; 1502 break; 1503 case FNEST: 1504 hit = (filesp == files); 1505 break; 1506 case NOMAC: 1507 hit = !inmacro; 1508 break; 1509 case MAC: 1510 hit = inmacro; 1511 break; 1512 case PARAG: 1513 hit = parag; 1514 break; 1515 case NBLK: 1516 hit = !keepblock; 1517 break; 1518 default: 1519 hit = 0; 1520 } 1521 1522 if (hit) { 1523 #ifdef FULLDEBUG 1524 printf("MATCH\n"); 1525 #endif /* FULLDEBUG */ 1526 switch ((*(mp->func))(c12)) { 1527 default: 1528 return; 1529 case COMX: 1530 goto comx; 1531 case COM: 1532 goto com; 1533 } 1534 } 1535 #ifdef FULLDEBUG 1536 printf("FAIL\n"); 1537 #endif /* FULLDEBUG */ 1538 break; 1539 } 1540 } 1541 defcomline(c12); 1542 } 1543 1544 static int 1545 macsort(const void *p1, const void *p2) 1546 { 1547 const struct mactab *t1 = p1; 1548 const struct mactab *t2 = p2; 1549 1550 return t1->macname - t2->macname; 1551 } 1552 1553 static int 1554 sizetab(const struct mactab *mp) 1555 { 1556 int i; 1557 1558 i = 0; 1559 if (mp) { 1560 for (; mp->macname; mp++, i++) 1561 /*VOID*/ ; 1562 } 1563 return i; 1564 } 1565 1566 static struct mactab * 1567 macfill(struct mactab *dst, const struct mactab *src) 1568 { 1569 1570 if (src) { 1571 while (src->macname) 1572 *dst++ = *src++; 1573 } 1574 return dst; 1575 } 1576 1577 static void 1578 usage(void) 1579 { 1580 extern char *__progname; 1581 1582 fprintf(stderr, "usage: %s [-ikpw ] [ -m ( a | e | m | s | l ) ] [ filename ] ... \n", __progname); 1583 exit(1); 1584 } 1585 1586 static void 1587 buildtab(const struct mactab **r_back, int *r_size) 1588 { 1589 size_t size; 1590 const struct mactab *p1, *p2; 1591 struct mactab *back, *p; 1592 1593 size = sizetab(troffmactab) + sizetab(ppmactab); 1594 p1 = p2 = NULL; 1595 if (msflag) { 1596 switch (mac) { 1597 case ME: 1598 p1 = memactab; 1599 break; 1600 case MM: 1601 p1 = msmactab; 1602 p2 = mmmactab; 1603 break; 1604 case MS: 1605 p1 = msmactab; 1606 break; 1607 case MA: 1608 p1 = manmactab; 1609 break; 1610 default: 1611 break; 1612 } 1613 } 1614 size += sizetab(p1); 1615 size += sizetab(p2); 1616 back = calloc(size + 2, sizeof(struct mactab)); 1617 if (back == NULL) 1618 err(1, NULL); 1619 1620 p = macfill(back, troffmactab); 1621 p = macfill(p, ppmactab); 1622 p = macfill(p, p1); 1623 p = macfill(p, p2); 1624 1625 qsort(back, size, sizeof(struct mactab), macsort); 1626 *r_size = size; 1627 *r_back = back; 1628 } 1629 1630 /* 1631 * troff commands 1632 */ 1633 static const struct mactab troffmactab[] = { 1634 M(NONE, '\\','"', skip), /* comment */ 1635 M(NOMAC, 'd','e', domacro), /* define */ 1636 M(NOMAC, 'i','g', domacro), /* ignore till .. */ 1637 M(NOMAC, 'a','m', domacro), /* append macro */ 1638 M(NBLK, 'n','f', nf), /* filled */ 1639 M(NBLK, 'c','e', ce), /* centered */ 1640 1641 M(NONE, 's','o', so), /* source a file */ 1642 M(NONE, 'n','x', nx), /* go to next file */ 1643 1644 M(NONE, 't','m', skip), /* print string on tty */ 1645 M(NONE, 'h','w', skip), /* exception hyphen words */ 1646 M(NONE, 0,0, 0) 1647 }; 1648 1649 /* 1650 * Preprocessor output 1651 */ 1652 static const struct mactab ppmactab[] = { 1653 M(FNEST, 'E','Q', EQ), /* equation starting */ 1654 M(FNEST, 'T','S', intbl), /* table starting */ 1655 M(FNEST, 'T','C', intbl), /* alternative table? */ 1656 M(FNEST, 'T','&', intbl), /* table reformatting */ 1657 M(NONE, 'T','E', outtbl),/* table ending */ 1658 M(NONE, 'P','S', PS), /* picture starting */ 1659 M(NONE, 0,0, 0) 1660 }; 1661 1662 /* 1663 * Particular to ms and mm 1664 */ 1665 static const struct mactab msmactab[] = { 1666 M(NONE, 'T','L', skiptocom), /* title follows */ 1667 M(NONE, 'F','S', skiptocom), /* start footnote */ 1668 M(NONE, 'O','K', skiptocom), /* Other kws */ 1669 1670 M(NONE, 'N','R', skip), /* undocumented */ 1671 M(NONE, 'N','D', skip), /* use supplied date */ 1672 1673 M(PARAG, 'P','P', PP), /* begin parag */ 1674 M(PARAG, 'I','P', PP), /* begin indent parag, tag x */ 1675 M(PARAG, 'L','P', PP), /* left blocked parag */ 1676 1677 M(NONE, 'A','U', AU), /* author */ 1678 M(NONE, 'A','I', AU), /* authors institution */ 1679 1680 M(NONE, 'S','H', SH), /* section heading */ 1681 M(NONE, 'S','N', SH), /* undocumented */ 1682 M(NONE, 'U','X', UX), /* unix */ 1683 1684 M(NBLK, 'D','S', mssnblock), /* start display text */ 1685 M(NBLK, 'K','S', mssnblock), /* start keep */ 1686 M(NBLK, 'K','F', mssnblock), /* start float keep */ 1687 M(NONE, 0,0, 0) 1688 }; 1689 1690 static const struct mactab mmmactab[] = { 1691 M(NONE, 'H',' ', MMHU), /* -mm ? */ 1692 M(NONE, 'H','U', MMHU), /* -mm ? */ 1693 M(PARAG, 'P',' ', PP), /* paragraph for -mm */ 1694 M(NBLK, 'N','S', mssnblock), /* undocumented */ 1695 M(NONE, 0,0, 0) 1696 }; 1697 1698 static const struct mactab memactab[] = { 1699 M(PARAG, 'p','p', mepp), 1700 M(PARAG, 'l','p', mepp), 1701 M(PARAG, 'n','p', mepp), 1702 M(NONE, 'i','p', meip), 1703 1704 M(NONE, 's','h', mesh), 1705 M(NONE, 'u','h', mesh), 1706 1707 M(NBLK, '(','l', mesnblock), 1708 M(NBLK, '(','q', mesnblock), 1709 M(NBLK, '(','b', mesnblock), 1710 M(NBLK, '(','z', mesnblock), 1711 M(NBLK, '(','c', mesnblock), 1712 1713 M(NBLK, '(','d', mesnblock), 1714 M(NBLK, '(','f', mesnblock), 1715 M(NBLK, '(','x', mesnblock), 1716 1717 M(NONE, 'r',' ', mefont), 1718 M(NONE, 'i',' ', mefont), 1719 M(NONE, 'b',' ', mefont), 1720 M(NONE, 'u',' ', mefont), 1721 M(NONE, 'q',' ', mefont), 1722 M(NONE, 'r','b', mefont), 1723 M(NONE, 'b','i', mefont), 1724 M(NONE, 'b','x', mefont), 1725 M(NONE, 0,0, 0) 1726 }; 1727 1728 static const struct mactab manmactab[] = { 1729 M(PARAG, 'B','I', manfont), 1730 M(PARAG, 'B','R', manfont), 1731 M(PARAG, 'I','B', manfont), 1732 M(PARAG, 'I','R', manfont), 1733 M(PARAG, 'R','B', manfont), 1734 M(PARAG, 'R','I', manfont), 1735 1736 M(PARAG, 'P','P', manpp), 1737 M(PARAG, 'L','P', manpp), 1738 M(PARAG, 'H','P', manpp), 1739 M(NONE, 0,0, 0) 1740 }; 1741