1 /* $NetBSD: deroff.c,v 1.5 2007/12/15 19:44:50 perry Exp $ */ 2 3 /* taken from: OpenBSD: deroff.c,v 1.6 2004/06/02 14:58:46 tom Exp */ 4 5 /*- 6 * Copyright (c) 1988, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 /* 34 * Copyright (C) Caldera International Inc. 2001-2002. 35 * All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code and documentation must retain the above 41 * copyright notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. All advertising materials mentioning features or use of this software 46 * must display the following acknowledgement: 47 * This product includes software developed or owned by Caldera 48 * International, Inc. 49 * 4. Neither the name of Caldera International, Inc. nor the names of other 50 * contributors may be used to endorse or promote products derived from 51 * this software without specific prior written permission. 52 * 53 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 54 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 55 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 56 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 57 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 58 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 59 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 60 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 62 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 63 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 64 * POSSIBILITY OF SUCH DAMAGE. 65 */ 66 67 #ifndef lint 68 static const char copyright[] = 69 "@(#) Copyright (c) 1988, 1993\n\ 70 The Regents of the University of California. All rights reserved.\n"; 71 #endif /* not lint */ 72 73 #ifndef lint 74 #if 0 75 static const char sccsid[] = "@(#)deroff.c 8.1 (Berkeley) 6/6/93"; 76 #else 77 static const char rcsid[] = "$NetBSD: deroff.c,v 1.5 2007/12/15 19:44:50 perry Exp $"; 78 #endif 79 #endif /* not lint */ 80 81 #include <sys/cdefs.h> 82 #include <err.h> 83 #include <limits.h> 84 #include <stdio.h> 85 #include <stdlib.h> 86 #include <string.h> 87 #include <unistd.h> 88 89 /* 90 * Deroff command -- strip troff, eqn, and Tbl sequences from 91 * a file. Has two flags argument, -w, to cause output one word per line 92 * rather than in the original format. 93 * -mm (or -ms) causes the corresponding macro's to be interpreted 94 * so that just sentences are output 95 * -ml also gets rid of lists. 96 * Deroff follows .so and .nx commands, removes contents of macro 97 * definitions, equations (both .EQ ... .EN and $...$), 98 * Tbl command sequences, and Troff backslash constructions. 99 * 100 * All input is through the Cget macro; 101 * the most recently read character is in c. 102 * 103 * Modified by Robert Henry to process -me and -man macros. 104 */ 105 106 #define Cget ( (c=getc(infile)) == EOF ? eof() : ((c==ldelim)&&(filesp==files) ? skeqn() : c) ) 107 #define C1get ( (c=getc(infile)) == EOF ? eof() : c) 108 109 #ifdef DEBUG 110 # define C _C() 111 # define C1 _C1() 112 #else /* not DEBUG */ 113 # define C Cget 114 # define C1 C1get 115 #endif /* not DEBUG */ 116 117 #define SKIP while (C != '\n') 118 #define SKIP_TO_COM SKIP; SKIP; pc=c; while (C != '.' || pc != '\n' || C > 'Z')pc=c 119 120 #define YES 1 121 #define NO 0 122 #define MS 0 /* -ms */ 123 #define MM 1 /* -mm */ 124 #define ME 2 /* -me */ 125 #define MA 3 /* -man */ 126 127 #ifdef DEBUG 128 char *mactab[] = { "-ms", "-mm", "-me", "-ma" }; 129 #endif /* DEBUG */ 130 131 #define ONE 1 132 #define TWO 2 133 134 #define NOCHAR -2 135 #define SPECIAL 0 136 #define APOS 1 137 #define PUNCT 2 138 #define DIGIT 3 139 #define LETTER 4 140 141 #define MAXFILES 20 142 143 static int iflag; 144 static int wordflag; 145 static int msflag; /* processing a source written using a mac package */ 146 static int mac; /* which package */ 147 static int disp; 148 static int parag; 149 static int inmacro; 150 static int intable; 151 static int keepblock; /* keep blocks of text; normally false when msflag */ 152 153 static char chars[128]; /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */ 154 155 static char line[LINE_MAX]; 156 static char *lp; 157 158 static int c; 159 static int pc; 160 static int ldelim; 161 static int rdelim; 162 163 static char fname[PATH_MAX]; 164 static FILE *files[MAXFILES]; 165 static FILE **filesp; 166 static FILE *infile; 167 168 static int argc; 169 static char **argv; 170 171 /* 172 * Macro processing 173 * 174 * Macro table definitions 175 */ 176 typedef int pacmac; /* compressed macro name */ 177 static int argconcat = 0; /* concat arguments together (-me only) */ 178 179 #define tomac(c1, c2) ((((c1) & 0xFF) << 8) | ((c2) & 0xFF)) 180 #define frommac(src, c1, c2) (((c1)=((src)>>8)&0xFF),((c2) =(src)&0xFF)) 181 182 struct mactab { 183 int condition; 184 pacmac macname; 185 int (*func)(pacmac); 186 }; 187 188 static const struct mactab troffmactab[]; 189 static const struct mactab ppmactab[]; 190 static const struct mactab msmactab[]; 191 static const struct mactab mmmactab[]; 192 static const struct mactab memactab[]; 193 static const struct mactab manmactab[]; 194 195 /* 196 * Macro table initialization 197 */ 198 #define M(cond, c1, c2, func) {cond, tomac(c1, c2), func} 199 200 /* 201 * Flags for matching conditions other than 202 * the macro name 203 */ 204 #define NONE 0 205 #define FNEST 1 /* no nested files */ 206 #define NOMAC 2 /* no macro */ 207 #define MAC 3 /* macro */ 208 #define PARAG 4 /* in a paragraph */ 209 #define MSF 5 /* msflag is on */ 210 #define NBLK 6 /* set if no blocks to be kept */ 211 212 /* 213 * Return codes from macro minions, determine where to jump, 214 * how to repeat/reprocess text 215 */ 216 #define COMX 1 /* goto comx */ 217 #define COM 2 /* goto com */ 218 219 static int skeqn(void); 220 static int eof(void); 221 #ifdef DEBUG 222 static int _C1(void); 223 static int _C(void); 224 #endif 225 static int EQ(pacmac); 226 static int domacro(pacmac); 227 static int PS(pacmac); 228 static int skip(pacmac); 229 static int intbl(pacmac); 230 static int outtbl(pacmac); 231 static int so(pacmac); 232 static int nx(pacmac); 233 static int skiptocom(pacmac); 234 static int PP(pacmac); 235 static int AU(pacmac); 236 static int SH(pacmac); 237 static int UX(pacmac); 238 static int MMHU(pacmac); 239 static int mesnblock(pacmac); 240 static int mssnblock(pacmac); 241 static int nf(pacmac); 242 static int ce(pacmac); 243 static int meip(pacmac); 244 static int mepp(pacmac); 245 static int mesh(pacmac); 246 static int mefont(pacmac); 247 static int manfont(pacmac); 248 static int manpp(pacmac); 249 static int macsort(const void *, const void *); 250 static int sizetab(const struct mactab *); 251 static void getfname(void); 252 static void textline(char *, int); 253 static void work(void); 254 static void regline(void (*)(char *, int), int); 255 static void macro(void); 256 static void tbl(void); 257 static void stbl(void); 258 static void eqn(void); 259 static void backsl(void); 260 static void sce(void); 261 static void refer(int); 262 static void inpic(void); 263 static void msputmac(char *, int); 264 static void msputwords(int); 265 static void meputmac(char *, int); 266 static void meputwords(int); 267 static void noblock(char, char); 268 static void defcomline(pacmac); 269 static void comline(void); 270 static void buildtab(const struct mactab **, int *); 271 static FILE *opn(char *); 272 static struct mactab *macfill(struct mactab *, const struct mactab *); 273 static void usage(void) __dead; 274 275 int 276 main(int ac, char **av) 277 { 278 int i, ch; 279 int errflg = 0; 280 int kflag = NO; 281 282 iflag = NO; 283 wordflag = NO; 284 msflag = NO; 285 mac = ME; 286 disp = NO; 287 parag = NO; 288 inmacro = NO; 289 intable = NO; 290 ldelim = NOCHAR; 291 rdelim = NOCHAR; 292 keepblock = YES; 293 294 while ((ch = getopt(ac, av, "ikpwm:")) != -1) { 295 switch (ch) { 296 case 'i': 297 iflag = YES; 298 break; 299 case 'k': 300 kflag = YES; 301 break; 302 case 'm': 303 msflag = YES; 304 keepblock = NO; 305 switch (optarg[0]) { 306 case 'm': 307 mac = MM; 308 break; 309 case 's': 310 mac = MS; 311 break; 312 case 'e': 313 mac = ME; 314 break; 315 case 'a': 316 mac = MA; 317 break; 318 case 'l': 319 disp = YES; 320 break; 321 default: 322 errflg++; 323 break; 324 } 325 if (errflg == 0 && optarg[1] != '\0') 326 errflg++; 327 break; 328 case 'p': 329 parag = YES; 330 break; 331 case 'w': 332 wordflag = YES; 333 kflag = YES; 334 break; 335 default: 336 errflg++; 337 } 338 } 339 argc = ac - optind; 340 argv = av + optind; 341 342 if (kflag) 343 keepblock = YES; 344 if (errflg) 345 usage(); 346 347 #ifdef DEBUG 348 printf("msflag = %d, mac = %s, keepblock = %d, disp = %d\n", 349 msflag, mactab[mac], keepblock, disp); 350 #endif /* DEBUG */ 351 if (argc == 0) { 352 infile = stdin; 353 } else { 354 infile = opn(argv[0]); 355 --argc; 356 ++argv; 357 } 358 files[0] = infile; 359 filesp = &files[0]; 360 361 for (i = 'a'; i <= 'z' ; ++i) 362 chars[i] = LETTER; 363 for (i = 'A'; i <= 'Z'; ++i) 364 chars[i] = LETTER; 365 for (i = '0'; i <= '9'; ++i) 366 chars[i] = DIGIT; 367 chars['\''] = APOS; 368 chars['&'] = APOS; 369 chars['.'] = PUNCT; 370 chars[','] = PUNCT; 371 chars[';'] = PUNCT; 372 chars['?'] = PUNCT; 373 chars[':'] = PUNCT; 374 work(); 375 return 0; 376 } 377 378 static int 379 skeqn(void) 380 { 381 382 while ((c = getc(infile)) != rdelim) { 383 if (c == EOF) 384 c = eof(); 385 else if (c == '"') { 386 while ((c = getc(infile)) != '"') { 387 if (c == EOF || 388 (c == '\\' && (c = getc(infile)) == EOF)) 389 c = eof(); 390 } 391 } 392 } 393 if (msflag) 394 return c == 'x'; 395 return c == ' '; 396 } 397 398 static FILE * 399 opn(char *p) 400 { 401 FILE *fd; 402 403 if ((fd = fopen(p, "r")) == NULL) 404 err(1, "fopen %s", p); 405 406 return fd; 407 } 408 409 static int 410 eof(void) 411 { 412 413 if (infile != stdin) 414 fclose(infile); 415 if (filesp > files) 416 infile = *--filesp; 417 else if (argc > 0) { 418 infile = opn(argv[0]); 419 --argc; 420 ++argv; 421 } else 422 exit(0); 423 return C; 424 } 425 426 static void 427 getfname(void) 428 { 429 char *p; 430 struct chain { 431 struct chain *nextp; 432 char *datap; 433 } *q; 434 static struct chain *namechain= NULL; 435 436 while (C == ' ') 437 ; /* nothing */ 438 439 for (p = fname ; p - fname < sizeof(fname) && (*p = c) != '\n' && 440 c != ' ' && c != '\t' && c != '\\'; ++p) 441 C; 442 *p = '\0'; 443 while (c != '\n') 444 C; 445 446 /* see if this name has already been used */ 447 for (q = namechain ; q; q = q->nextp) 448 if (strcmp(fname, q->datap) == 0) { 449 fname[0] = '\0'; 450 return; 451 } 452 453 q = (struct chain *) malloc(sizeof(struct chain)); 454 if (q == NULL) 455 err(1, NULL); 456 q->nextp = namechain; 457 q->datap = strdup(fname); 458 if (q->datap == NULL) 459 err(1, NULL); 460 namechain = q; 461 } 462 463 /*ARGSUSED*/ 464 static void 465 textline(char *str, int constant) 466 { 467 468 if (wordflag) { 469 msputwords(0); 470 return; 471 } 472 puts(str); 473 } 474 475 void 476 work(void) 477 { 478 479 for (;;) { 480 C; 481 #ifdef FULLDEBUG 482 printf("Starting work with `%c'\n", c); 483 #endif /* FULLDEBUG */ 484 if (c == '.' || c == '\'') 485 comline(); 486 else 487 regline(textline, TWO); 488 } 489 } 490 491 static void 492 regline(void (*pfunc)(char *, int), int constant) 493 { 494 495 line[0] = c; 496 lp = line; 497 while (lp - line < sizeof(line)) { 498 if (c == '\\') { 499 *lp = ' '; 500 backsl(); 501 } 502 if (c == '\n') 503 break; 504 if (intable && c == 'T') { 505 *++lp = C; 506 if (c == '{' || c == '}') { 507 lp[-1] = ' '; 508 *lp = C; 509 } 510 } else { 511 *++lp = C; 512 } 513 } 514 *lp = '\0'; 515 516 if (line[0] != '\0') 517 (*pfunc)(line, constant); 518 } 519 520 static void 521 macro(void) 522 { 523 524 if (msflag) { 525 do { 526 SKIP; 527 } while (C!='.' || C!='.' || C=='.'); /* look for .. */ 528 if (c != '\n') 529 SKIP; 530 return; 531 } 532 SKIP; 533 inmacro = YES; 534 } 535 536 static void 537 tbl(void) 538 { 539 540 while (C != '.') 541 ; /* nothing */ 542 SKIP; 543 intable = YES; 544 } 545 546 static void 547 stbl(void) 548 { 549 550 while (C != '.') 551 ; /* nothing */ 552 SKIP_TO_COM; 553 if (c != 'T' || C != 'E') { 554 SKIP; 555 pc = c; 556 while (C != '.' || pc != '\n' || C != 'T' || C != 'E') 557 pc = c; 558 } 559 } 560 561 static void 562 eqn(void) 563 { 564 int c1, c2; 565 int dflg; 566 char last; 567 568 last=0; 569 dflg = 1; 570 SKIP; 571 572 for (;;) { 573 if (C1 == '.' || c == '\'') { 574 while (C1 == ' ' || c == '\t') 575 ; 576 if (c == 'E' && C1 == 'N') { 577 SKIP; 578 if (msflag && dflg) { 579 putchar('x'); 580 putchar(' '); 581 if (last) { 582 putchar(last); 583 putchar('\n'); 584 } 585 } 586 return; 587 } 588 } else if (c == 'd') { 589 /* look for delim */ 590 if (C1 == 'e' && C1 == 'l') 591 if (C1 == 'i' && C1 == 'm') { 592 while (C1 == ' ') 593 ; /* nothing */ 594 595 if ((c1 = c) == '\n' || 596 (c2 = C1) == '\n' || 597 (c1 == 'o' && c2 == 'f' && C1=='f')) { 598 ldelim = NOCHAR; 599 rdelim = NOCHAR; 600 } else { 601 ldelim = c1; 602 rdelim = c2; 603 } 604 } 605 dflg = 0; 606 } 607 608 if (c != '\n') 609 while (C1 != '\n') { 610 if (chars[c] == PUNCT) 611 last = c; 612 else if (c != ' ') 613 last = 0; 614 } 615 } 616 } 617 618 /* skip over a complete backslash construction */ 619 static void 620 backsl(void) 621 { 622 int bdelim; 623 624 sw: 625 switch (C) { 626 case '"': 627 SKIP; 628 return; 629 630 case 's': 631 if (C == '\\') 632 backsl(); 633 else { 634 while (C >= '0' && c <= '9') 635 ; /* nothing */ 636 ungetc(c, infile); 637 c = '0'; 638 } 639 --lp; 640 return; 641 642 case 'f': 643 case 'n': 644 case '*': 645 if (C != '(') 646 return; 647 648 case '(': 649 if (msflag) { 650 if (C == 'e') { 651 if (C == 'm') { 652 *lp = '-'; 653 return; 654 } 655 } 656 else if (c != '\n') 657 C; 658 return; 659 } 660 if (C != '\n') 661 C; 662 return; 663 664 case '$': 665 C; /* discard argument number */ 666 return; 667 668 case 'b': 669 case 'x': 670 case 'v': 671 case 'h': 672 case 'w': 673 case 'o': 674 case 'l': 675 case 'L': 676 if ((bdelim = C) == '\n') 677 return; 678 while (C != '\n' && c != bdelim) 679 if (c == '\\') 680 backsl(); 681 return; 682 683 case '\\': 684 if (inmacro) 685 goto sw; 686 687 default: 688 return; 689 } 690 } 691 692 static void 693 sce(void) 694 { 695 char *ap; 696 int n, i; 697 char a[10]; 698 699 for (ap = a; C != '\n'; ap++) { 700 *ap = c; 701 if (ap == &a[9]) { 702 SKIP; 703 ap = a; 704 break; 705 } 706 } 707 if (ap != a) 708 n = atoi(a); 709 else 710 n = 1; 711 for (i = 0; i < n;) { 712 if (C == '.') { 713 if (C == 'c') { 714 if (C == 'e') { 715 while (C == ' ') 716 ; /* nothing */ 717 if (c == '0') { 718 SKIP; 719 break; 720 } else 721 SKIP; 722 } 723 else 724 SKIP; 725 } else if (c == 'P' || C == 'P') { 726 if (c != '\n') 727 SKIP; 728 break; 729 } else if (c != '\n') 730 SKIP; 731 } else { 732 SKIP; 733 i++; 734 } 735 } 736 } 737 738 static void 739 refer(int c1) 740 { 741 int c2; 742 743 if (c1 != '\n') 744 SKIP; 745 746 for (c2 = -1;;) { 747 if (C != '.') 748 SKIP; 749 else { 750 if (C != ']') 751 SKIP; 752 else { 753 while (C != '\n') 754 c2 = c; 755 if (c2 != -1 && chars[c2] == PUNCT) 756 putchar(c2); 757 return; 758 } 759 } 760 } 761 } 762 763 static void 764 inpic(void) 765 { 766 int c1; 767 char *p1; 768 769 SKIP; 770 p1 = line; 771 c = '\n'; 772 for (;;) { 773 c1 = c; 774 if (C == '.' && c1 == '\n') { 775 if (C != 'P') { 776 if (c == '\n') 777 continue; 778 else { 779 SKIP; 780 c = '\n'; 781 continue; 782 } 783 } 784 if (C != 'E') { 785 if (c == '\n') 786 continue; 787 else { 788 SKIP; 789 c = '\n'; 790 continue; 791 } 792 } 793 SKIP; 794 return; 795 } 796 else if (c == '\"') { 797 while (C != '\"') { 798 if (c == '\\') { 799 if (C == '\"') 800 continue; 801 ungetc(c, infile); 802 backsl(); 803 } else 804 *p1++ = c; 805 } 806 *p1++ = ' '; 807 } 808 else if (c == '\n' && p1 != line) { 809 *p1 = '\0'; 810 if (wordflag) 811 msputwords(NO); 812 else { 813 puts(line); 814 putchar('\n'); 815 } 816 p1 = line; 817 } 818 } 819 } 820 821 #ifdef DEBUG 822 static int 823 _C1(void) 824 { 825 826 return C1get); 827 } 828 829 static int 830 _C(void) 831 { 832 833 return Cget); 834 } 835 #endif /* DEBUG */ 836 837 /* 838 * Put out a macro line, using ms and mm conventions. 839 */ 840 static void 841 msputmac(char *s, int constant) 842 { 843 char *t; 844 int found; 845 int last; 846 847 last = 0; 848 found = 0; 849 if (wordflag) { 850 msputwords(YES); 851 return; 852 } 853 while (*s) { 854 while (*s == ' ' || *s == '\t') 855 putchar(*s++); 856 for (t = s ; *t != ' ' && *t != '\t' && *t != '\0' ; ++t) 857 ; /* nothing */ 858 if (*s == '\"') 859 s++; 860 if (t > s + constant && chars[(unsigned char)s[0]] == LETTER && 861 chars[(unsigned char)s[1]] == LETTER) { 862 while (s < t) 863 if (*s == '\"') 864 s++; 865 else 866 putchar(*s++); 867 last = *(t-1); 868 found++; 869 } else if (found && chars[(unsigned char)s[0]] == PUNCT && 870 s[1] == '\0') { 871 putchar(*s++); 872 } else { 873 last = *(t - 1); 874 s = t; 875 } 876 } 877 putchar('\n'); 878 if (msflag && chars[last] == PUNCT) { 879 putchar(last); 880 putchar('\n'); 881 } 882 } 883 884 /* 885 * put out words (for the -w option) with ms and mm conventions 886 */ 887 static void 888 msputwords(int macline) 889 { 890 char *p, *p1; 891 int i, nlet; 892 893 for (p1 = line;;) { 894 /* 895 * skip initial specials ampersands and apostrophes 896 */ 897 while (chars[(unsigned char)*p1] < DIGIT) 898 if (*p1++ == '\0') 899 return; 900 nlet = 0; 901 for (p = p1 ; (i = chars[(unsigned char)*p]) != SPECIAL ; ++p) 902 if (i == LETTER) 903 ++nlet; 904 905 if (nlet > 1 && chars[(unsigned char)p1[0]] == LETTER) { 906 /* 907 * delete trailing ampersands and apostrophes 908 */ 909 while ((i = chars[(unsigned char)p[-1]]) == PUNCT || 910 i == APOS ) 911 --p; 912 while (p1 < p) 913 putchar(*p1++); 914 putchar('\n'); 915 } else { 916 p1 = p; 917 } 918 } 919 } 920 921 /* 922 * put out a macro using the me conventions 923 */ 924 #define SKIPBLANK(cp) while (*cp == ' ' || *cp == '\t') { cp++; } 925 #define SKIPNONBLANK(cp) while (*cp !=' ' && *cp !='\cp' && *cp !='\0') { cp++; } 926 927 static void 928 meputmac(char *cp, int constant) 929 { 930 char *np; 931 int found; 932 int argno; 933 int last; 934 int inquote; 935 936 last = 0; 937 found = 0; 938 if (wordflag) { 939 meputwords(YES); 940 return; 941 } 942 for (argno = 0; *cp; argno++) { 943 SKIPBLANK(cp); 944 inquote = (*cp == '"'); 945 if (inquote) 946 cp++; 947 for (np = cp; *np; np++) { 948 switch (*np) { 949 case '\n': 950 case '\0': 951 break; 952 953 case '\t': 954 case ' ': 955 if (inquote) 956 continue; 957 else 958 goto endarg; 959 960 case '"': 961 if (inquote && np[1] == '"') { 962 memmove(np, np + 1, strlen(np)); 963 np++; 964 continue; 965 } else { 966 *np = ' '; /* bye bye " */ 967 goto endarg; 968 } 969 970 default: 971 continue; 972 } 973 } 974 endarg: ; 975 /* 976 * cp points at the first char in the arg 977 * np points one beyond the last char in the arg 978 */ 979 if ((argconcat == 0) || (argconcat != argno)) 980 putchar(' '); 981 #ifdef FULLDEBUG 982 { 983 char *p; 984 printf("[%d,%d: ", argno, np - cp); 985 for (p = cp; p < np; p++) { 986 putchar(*p); 987 } 988 printf("]"); 989 } 990 #endif /* FULLDEBUG */ 991 /* 992 * Determine if the argument merits being printed 993 * 994 * constant is the cut off point below which something 995 * is not a word. 996 */ 997 if (((np - cp) > constant) && 998 (inquote || (chars[(unsigned char)cp[0]] == LETTER))) { 999 for (cp = cp; cp < np; cp++) 1000 putchar(*cp); 1001 last = np[-1]; 1002 found++; 1003 } else if (found && (np - cp == 1) && 1004 chars[(unsigned char)*cp] == PUNCT) { 1005 putchar(*cp); 1006 } else { 1007 last = np[-1]; 1008 } 1009 cp = np; 1010 } 1011 if (msflag && chars[last] == PUNCT) 1012 putchar(last); 1013 putchar('\n'); 1014 } 1015 1016 /* 1017 * put out words (for the -w option) with ms and mm conventions 1018 */ 1019 static void 1020 meputwords(int macline) 1021 { 1022 1023 msputwords(macline); 1024 } 1025 1026 /* 1027 * 1028 * Skip over a nested set of macros 1029 * 1030 * Possible arguments to noblock are: 1031 * 1032 * fi end of unfilled text 1033 * PE pic ending 1034 * DE display ending 1035 * 1036 * for ms and mm only: 1037 * KE keep ending 1038 * 1039 * NE undocumented match to NS (for mm?) 1040 * LE mm only: matches RL or *L (for lists) 1041 * 1042 * for me: 1043 * ([lqbzcdf] 1044 */ 1045 static void 1046 noblock(char a1, char a2) 1047 { 1048 int c1,c2; 1049 int eqnf; 1050 int lct; 1051 1052 lct = 0; 1053 eqnf = 1; 1054 SKIP; 1055 for (;;) { 1056 while (C != '.') 1057 if (c == '\n') 1058 continue; 1059 else 1060 SKIP; 1061 if ((c1 = C) == '\n') 1062 continue; 1063 if ((c2 = C) == '\n') 1064 continue; 1065 if (c1 == a1 && c2 == a2) { 1066 SKIP; 1067 if (lct != 0) { 1068 lct--; 1069 continue; 1070 } 1071 if (eqnf) 1072 putchar('.'); 1073 putchar('\n'); 1074 return; 1075 } else if (a1 == 'L' && c2 == 'L') { 1076 lct++; 1077 SKIP; 1078 } 1079 /* 1080 * equations (EQ) nested within a display 1081 */ 1082 else if (c1 == 'E' && c2 == 'Q') { 1083 if ((mac == ME && a1 == ')') 1084 || (mac != ME && a1 == 'D')) { 1085 eqn(); 1086 eqnf=0; 1087 } 1088 } 1089 /* 1090 * turning on filling is done by the paragraphing 1091 * macros 1092 */ 1093 else if (a1 == 'f') { /* .fi */ 1094 if ((mac == ME && (c2 == 'h' || c2 == 'p')) 1095 || (mac != ME && (c1 == 'P' || c2 == 'P'))) { 1096 SKIP; 1097 return; 1098 } 1099 } else { 1100 SKIP; 1101 } 1102 } 1103 } 1104 1105 static int 1106 /*ARGSUSED*/ 1107 EQ(pacmac unused) 1108 { 1109 1110 eqn(); 1111 return 0; 1112 } 1113 1114 static int 1115 /*ARGSUSED*/ 1116 domacro(pacmac unused) 1117 { 1118 1119 macro(); 1120 return 0; 1121 } 1122 1123 static int 1124 /*ARGSUSED*/ 1125 PS(pacmac unused) 1126 { 1127 1128 for (C; c == ' ' || c == '\t'; C) 1129 ; /* nothing */ 1130 1131 if (c == '<') { /* ".PS < file" -- don't expect a .PE */ 1132 SKIP; 1133 return 0; 1134 } 1135 if (!msflag) 1136 inpic(); 1137 else 1138 noblock('P', 'E'); 1139 return 0; 1140 } 1141 1142 static int 1143 /*ARGSUSED*/ 1144 skip(pacmac unused) 1145 { 1146 1147 SKIP; 1148 return 0; 1149 } 1150 1151 static int 1152 /*ARGSUSED*/ 1153 intbl(pacmac unused) 1154 { 1155 1156 if (msflag) 1157 stbl(); 1158 else 1159 tbl(); 1160 return 0; 1161 } 1162 1163 static int 1164 /*ARGSUSED*/ 1165 outtbl(pacmac unused) 1166 { 1167 1168 intable = NO; 1169 return 0; 1170 } 1171 1172 int 1173 /*ARGSUSED*/ 1174 so(pacmac unused) 1175 { 1176 1177 if (!iflag) { 1178 getfname(); 1179 if (fname[0]) { 1180 if (++filesp - &files[0] > MAXFILES) 1181 err(1, "too many nested files (max %d)", 1182 MAXFILES); 1183 infile = *filesp = opn(fname); 1184 } 1185 } 1186 return 0; 1187 } 1188 1189 static int 1190 /*ARGSUSED*/ 1191 nx(pacmac unused) 1192 { 1193 1194 if (!iflag) { 1195 getfname(); 1196 if (fname[0] == '\0') 1197 exit(0); 1198 if (infile != stdin) 1199 fclose(infile); 1200 infile = *filesp = opn(fname); 1201 } 1202 return 0; 1203 } 1204 1205 static int 1206 /*ARGSUSED*/ 1207 skiptocom(pacmac unused) 1208 { 1209 1210 SKIP_TO_COM; 1211 return COMX; 1212 } 1213 1214 static int 1215 PP(pacmac c12) 1216 { 1217 int c1, c2; 1218 1219 frommac(c12, c1, c2); 1220 printf(".%c%c", c1, c2); 1221 while (C != '\n') 1222 putchar(c); 1223 putchar('\n'); 1224 return 0; 1225 } 1226 1227 static int 1228 /*ARGSUSED*/ 1229 AU(pacmac unused) 1230 { 1231 1232 if (mac == MM) 1233 return 0; 1234 SKIP_TO_COM; 1235 return COMX; 1236 } 1237 1238 static int 1239 SH(pacmac c12) 1240 { 1241 int c1, c2; 1242 1243 frommac(c12, c1, c2); 1244 1245 if (parag) { 1246 printf(".%c%c", c1, c2); 1247 while (C != '\n') 1248 putchar(c); 1249 putchar(c); 1250 putchar('!'); 1251 for (;;) { 1252 while (C != '\n') 1253 putchar(c); 1254 putchar('\n'); 1255 if (C == '.') 1256 return COM; 1257 putchar('!'); 1258 putchar(c); 1259 } 1260 /*NOTREACHED*/ 1261 } else { 1262 SKIP_TO_COM; 1263 return COMX; 1264 } 1265 } 1266 1267 static int 1268 /*ARGSUSED*/ 1269 UX(pacmac unused) 1270 { 1271 1272 if (wordflag) 1273 printf("UNIX\n"); 1274 else 1275 printf("UNIX "); 1276 return 0; 1277 } 1278 1279 static int 1280 MMHU(pacmac c12) 1281 { 1282 int c1, c2; 1283 1284 frommac(c12, c1, c2); 1285 if (parag) { 1286 printf(".%c%c", c1, c2); 1287 while (C != '\n') 1288 putchar(c); 1289 putchar('\n'); 1290 } else { 1291 SKIP; 1292 } 1293 return 0; 1294 } 1295 1296 static int 1297 mesnblock(pacmac c12) 1298 { 1299 int c1, c2; 1300 1301 frommac(c12, c1, c2); 1302 noblock(')', c2); 1303 return 0; 1304 } 1305 1306 static int 1307 mssnblock(pacmac c12) 1308 { 1309 int c1, c2; 1310 1311 frommac(c12, c1, c2); 1312 noblock(c1, 'E'); 1313 return 0; 1314 } 1315 1316 static int 1317 /*ARGUSED*/ 1318 nf(pacmac unused) 1319 { 1320 1321 noblock('f', 'i'); 1322 return 0; 1323 } 1324 1325 static int 1326 /*ARGUSED*/ 1327 ce(pacmac unused) 1328 { 1329 1330 sce(); 1331 return 0; 1332 } 1333 1334 static int 1335 meip(pacmac c12) 1336 { 1337 1338 if (parag) 1339 mepp(c12); 1340 else if (wordflag) /* save the tag */ 1341 regline(meputmac, ONE); 1342 else 1343 SKIP; 1344 return 0; 1345 } 1346 1347 /* 1348 * only called for -me .pp or .sh, when parag is on 1349 */ 1350 static int 1351 mepp(pacmac c12) 1352 { 1353 1354 PP(c12); /* eats the line */ 1355 return 0; 1356 } 1357 1358 /* 1359 * Start of a section heading; output the section name if doing words 1360 */ 1361 static int 1362 mesh(pacmac c12) 1363 { 1364 1365 if (parag) 1366 mepp(c12); 1367 else if (wordflag) 1368 defcomline(c12); 1369 else 1370 SKIP; 1371 return 0; 1372 } 1373 1374 /* 1375 * process a font setting 1376 */ 1377 static int 1378 mefont(pacmac c12) 1379 { 1380 1381 argconcat = 1; 1382 defcomline(c12); 1383 argconcat = 0; 1384 return 0; 1385 } 1386 1387 static int 1388 manfont(pacmac c12) 1389 { 1390 1391 return mefont(c12); 1392 } 1393 1394 static int 1395 manpp(pacmac c12) 1396 { 1397 1398 return mepp(c12); 1399 } 1400 1401 static void 1402 defcomline(pacmac c12) 1403 { 1404 int c1, c2; 1405 1406 frommac(c12, c1, c2); 1407 if (msflag && mac == MM && c2 == 'L') { 1408 if (disp || c1 == 'R') { 1409 noblock('L', 'E'); 1410 } else { 1411 SKIP; 1412 putchar('.'); 1413 } 1414 } 1415 else if (c1 == '.' && c2 == '.') { 1416 if (msflag) { 1417 SKIP; 1418 return; 1419 } 1420 while (C == '.') 1421 /*VOID*/; 1422 } 1423 ++inmacro; 1424 /* 1425 * Process the arguments to the macro 1426 */ 1427 switch (mac) { 1428 default: 1429 case MM: 1430 case MS: 1431 if (c1 <= 'Z' && msflag) 1432 regline(msputmac, ONE); 1433 else 1434 regline(msputmac, TWO); 1435 break; 1436 case ME: 1437 regline(meputmac, ONE); 1438 break; 1439 } 1440 --inmacro; 1441 } 1442 1443 static void 1444 comline(void) 1445 { 1446 int c1; 1447 int c2; 1448 pacmac c12; 1449 int mid; 1450 int lb, ub; 1451 int hit; 1452 static int tabsize = 0; 1453 static const struct mactab *mactab = NULL; 1454 const struct mactab *mp; 1455 1456 if (mactab == 0) 1457 buildtab(&mactab, &tabsize); 1458 com: 1459 while (C == ' ' || c == '\t') 1460 ; 1461 comx: 1462 if ((c1 = c) == '\n') 1463 return; 1464 c2 = C; 1465 if (c1 == '.' && c2 != '.') 1466 inmacro = NO; 1467 if (msflag && c1 == '[') { 1468 refer(c2); 1469 return; 1470 } 1471 if (parag && mac==MM && c1 == 'P' && c2 == '\n') { 1472 printf(".P\n"); 1473 return; 1474 } 1475 if (c2 == '\n') 1476 return; 1477 /* 1478 * Single letter macro 1479 */ 1480 if (mac == ME && (c2 == ' ' || c2 == '\t') ) 1481 c2 = ' '; 1482 c12 = tomac(c1, c2); 1483 /* 1484 * binary search through the table of macros 1485 */ 1486 lb = 0; 1487 ub = tabsize - 1; 1488 while (lb <= ub) { 1489 mid = (ub + lb) / 2; 1490 mp = &mactab[mid]; 1491 if (mp->macname < c12) 1492 lb = mid + 1; 1493 else if (mp->macname > c12) 1494 ub = mid - 1; 1495 else { 1496 hit = 1; 1497 #ifdef FULLDEBUG 1498 printf("preliminary hit macro %c%c ", c1, c2); 1499 #endif /* FULLDEBUG */ 1500 switch (mp->condition) { 1501 case NONE: 1502 hit = YES; 1503 break; 1504 case FNEST: 1505 hit = (filesp == files); 1506 break; 1507 case NOMAC: 1508 hit = !inmacro; 1509 break; 1510 case MAC: 1511 hit = inmacro; 1512 break; 1513 case PARAG: 1514 hit = parag; 1515 break; 1516 case NBLK: 1517 hit = !keepblock; 1518 break; 1519 default: 1520 hit = 0; 1521 } 1522 1523 if (hit) { 1524 #ifdef FULLDEBUG 1525 printf("MATCH\n"); 1526 #endif /* FULLDEBUG */ 1527 switch ((*(mp->func))(c12)) { 1528 default: 1529 return; 1530 case COMX: 1531 goto comx; 1532 case COM: 1533 goto com; 1534 } 1535 } 1536 #ifdef FULLDEBUG 1537 printf("FAIL\n"); 1538 #endif /* FULLDEBUG */ 1539 break; 1540 } 1541 } 1542 defcomline(c12); 1543 } 1544 1545 static int 1546 macsort(const void *p1, const void *p2) 1547 { 1548 const struct mactab *t1 = p1; 1549 const struct mactab *t2 = p2; 1550 1551 return t1->macname - t2->macname; 1552 } 1553 1554 static int 1555 sizetab(const struct mactab *mp) 1556 { 1557 int i; 1558 1559 i = 0; 1560 if (mp) { 1561 for (; mp->macname; mp++, i++) 1562 /*VOID*/ ; 1563 } 1564 return i; 1565 } 1566 1567 static struct mactab * 1568 macfill(struct mactab *dst, const struct mactab *src) 1569 { 1570 1571 if (src) { 1572 while (src->macname) 1573 *dst++ = *src++; 1574 } 1575 return dst; 1576 } 1577 1578 static void 1579 usage(void) 1580 { 1581 extern char *__progname; 1582 1583 fprintf(stderr, "usage: %s [-ikpw ] [ -m a | e | l | m | s] [file ...]\n", __progname); 1584 exit(1); 1585 } 1586 1587 static void 1588 buildtab(const struct mactab **r_back, int *r_size) 1589 { 1590 size_t size; 1591 const struct mactab *p1, *p2; 1592 struct mactab *back, *p; 1593 1594 size = sizetab(troffmactab) + sizetab(ppmactab); 1595 p1 = p2 = NULL; 1596 if (msflag) { 1597 switch (mac) { 1598 case ME: 1599 p1 = memactab; 1600 break; 1601 case MM: 1602 p1 = msmactab; 1603 p2 = mmmactab; 1604 break; 1605 case MS: 1606 p1 = msmactab; 1607 break; 1608 case MA: 1609 p1 = manmactab; 1610 break; 1611 default: 1612 break; 1613 } 1614 } 1615 size += sizetab(p1); 1616 size += sizetab(p2); 1617 back = calloc(size + 2, sizeof(struct mactab)); 1618 if (back == NULL) 1619 err(1, NULL); 1620 1621 p = macfill(back, troffmactab); 1622 p = macfill(p, ppmactab); 1623 p = macfill(p, p1); 1624 p = macfill(p, p2); 1625 1626 qsort(back, size, sizeof(struct mactab), macsort); 1627 *r_size = size; 1628 *r_back = back; 1629 } 1630 1631 /* 1632 * troff commands 1633 */ 1634 static const struct mactab troffmactab[] = { 1635 M(NONE, '\\','"', skip), /* comment */ 1636 M(NOMAC, 'd','e', domacro), /* define */ 1637 M(NOMAC, 'i','g', domacro), /* ignore till .. */ 1638 M(NOMAC, 'a','m', domacro), /* append macro */ 1639 M(NBLK, 'n','f', nf), /* filled */ 1640 M(NBLK, 'c','e', ce), /* centered */ 1641 1642 M(NONE, 's','o', so), /* source a file */ 1643 M(NONE, 'n','x', nx), /* go to next file */ 1644 1645 M(NONE, 't','m', skip), /* print string on tty */ 1646 M(NONE, 'h','w', skip), /* exception hyphen words */ 1647 M(NONE, 0,0, 0) 1648 }; 1649 1650 /* 1651 * Preprocessor output 1652 */ 1653 static const struct mactab ppmactab[] = { 1654 M(FNEST, 'E','Q', EQ), /* equation starting */ 1655 M(FNEST, 'T','S', intbl), /* table starting */ 1656 M(FNEST, 'T','C', intbl), /* alternative table? */ 1657 M(FNEST, 'T','&', intbl), /* table reformatting */ 1658 M(NONE, 'T','E', outtbl),/* table ending */ 1659 M(NONE, 'P','S', PS), /* picture starting */ 1660 M(NONE, 0,0, 0) 1661 }; 1662 1663 /* 1664 * Particular to ms and mm 1665 */ 1666 static const struct mactab msmactab[] = { 1667 M(NONE, 'T','L', skiptocom), /* title follows */ 1668 M(NONE, 'F','S', skiptocom), /* start footnote */ 1669 M(NONE, 'O','K', skiptocom), /* Other kws */ 1670 1671 M(NONE, 'N','R', skip), /* undocumented */ 1672 M(NONE, 'N','D', skip), /* use supplied date */ 1673 1674 M(PARAG, 'P','P', PP), /* begin parag */ 1675 M(PARAG, 'I','P', PP), /* begin indent parag, tag x */ 1676 M(PARAG, 'L','P', PP), /* left blocked parag */ 1677 1678 M(NONE, 'A','U', AU), /* author */ 1679 M(NONE, 'A','I', AU), /* authors institution */ 1680 1681 M(NONE, 'S','H', SH), /* section heading */ 1682 M(NONE, 'S','N', SH), /* undocumented */ 1683 M(NONE, 'U','X', UX), /* unix */ 1684 1685 M(NBLK, 'D','S', mssnblock), /* start display text */ 1686 M(NBLK, 'K','S', mssnblock), /* start keep */ 1687 M(NBLK, 'K','F', mssnblock), /* start float keep */ 1688 M(NONE, 0,0, 0) 1689 }; 1690 1691 static const struct mactab mmmactab[] = { 1692 M(NONE, 'H',' ', MMHU), /* -mm ? */ 1693 M(NONE, 'H','U', MMHU), /* -mm ? */ 1694 M(PARAG, 'P',' ', PP), /* paragraph for -mm */ 1695 M(NBLK, 'N','S', mssnblock), /* undocumented */ 1696 M(NONE, 0,0, 0) 1697 }; 1698 1699 static const struct mactab memactab[] = { 1700 M(PARAG, 'p','p', mepp), 1701 M(PARAG, 'l','p', mepp), 1702 M(PARAG, 'n','p', mepp), 1703 M(NONE, 'i','p', meip), 1704 1705 M(NONE, 's','h', mesh), 1706 M(NONE, 'u','h', mesh), 1707 1708 M(NBLK, '(','l', mesnblock), 1709 M(NBLK, '(','q', mesnblock), 1710 M(NBLK, '(','b', mesnblock), 1711 M(NBLK, '(','z', mesnblock), 1712 M(NBLK, '(','c', mesnblock), 1713 1714 M(NBLK, '(','d', mesnblock), 1715 M(NBLK, '(','f', mesnblock), 1716 M(NBLK, '(','x', mesnblock), 1717 1718 M(NONE, 'r',' ', mefont), 1719 M(NONE, 'i',' ', mefont), 1720 M(NONE, 'b',' ', mefont), 1721 M(NONE, 'u',' ', mefont), 1722 M(NONE, 'q',' ', mefont), 1723 M(NONE, 'r','b', mefont), 1724 M(NONE, 'b','i', mefont), 1725 M(NONE, 'b','x', mefont), 1726 M(NONE, 0,0, 0) 1727 }; 1728 1729 static const struct mactab manmactab[] = { 1730 M(PARAG, 'B','I', manfont), 1731 M(PARAG, 'B','R', manfont), 1732 M(PARAG, 'I','B', manfont), 1733 M(PARAG, 'I','R', manfont), 1734 M(PARAG, 'R','B', manfont), 1735 M(PARAG, 'R','I', manfont), 1736 1737 M(PARAG, 'P','P', manpp), 1738 M(PARAG, 'L','P', manpp), 1739 M(PARAG, 'H','P', manpp), 1740 M(NONE, 0,0, 0) 1741 }; 1742