1 /* $NetBSD: deroff.c,v 1.11 2013/10/18 20:47:06 christos Exp $ */ 2 3 /* taken from: OpenBSD: deroff.c,v 1.6 2004/06/02 14:58:46 tom Exp */ 4 5 /*- 6 * Copyright (c) 1988, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 /* 34 * Copyright (C) Caldera International Inc. 2001-2002. 35 * All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code and documentation must retain the above 41 * copyright notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. All advertising materials mentioning features or use of this software 46 * must display the following acknowledgement: 47 * This product includes software developed or owned by Caldera 48 * International, Inc. 49 * 4. Neither the name of Caldera International, Inc. nor the names of other 50 * contributors may be used to endorse or promote products derived from 51 * this software without specific prior written permission. 52 * 53 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 54 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 55 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 56 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 57 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 58 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 59 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 60 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 62 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 63 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 64 * POSSIBILITY OF SUCH DAMAGE. 65 */ 66 67 #include <sys/cdefs.h> 68 __RCSID("$NetBSD: deroff.c,v 1.11 2013/10/18 20:47:06 christos Exp $"); 69 70 #include <err.h> 71 #include <limits.h> 72 #include <stddef.h> 73 #include <stdio.h> 74 #include <stdlib.h> 75 #include <string.h> 76 #include <unistd.h> 77 78 /* 79 * Deroff command -- strip troff, eqn, and Tbl sequences from 80 * a file. Has two flags argument, -w, to cause output one word per line 81 * rather than in the original format. 82 * -mm (or -ms) causes the corresponding macro's to be interpreted 83 * so that just sentences are output 84 * -ml also gets rid of lists. 85 * Deroff follows .so and .nx commands, removes contents of macro 86 * definitions, equations (both .EQ ... .EN and $...$), 87 * Tbl command sequences, and Troff backslash constructions. 88 * 89 * All input is through the Cget macro; 90 * the most recently read character is in c. 91 * 92 * Modified by Robert Henry to process -me and -man macros. 93 */ 94 95 #define Cget ( (c=getc(infile)) == EOF ? eof() : ((c==ldelim)&&(filesp==files) ? skeqn() : c) ) 96 #define C1get ( (c=getc(infile)) == EOF ? eof() : c) 97 98 #ifdef DEBUG 99 # define C _C() 100 # define C1 _C1() 101 #else /* not DEBUG */ 102 # define C Cget 103 # define C1 C1get 104 #endif /* not DEBUG */ 105 106 #define SKIP while (C != '\n') 107 #define SKIP_TO_COM SKIP; SKIP; pc=c; while (C != '.' || pc != '\n' || C > 'Z')pc=c 108 109 #define YES 1 110 #define NO 0 111 #define MS 0 /* -ms */ 112 #define MM 1 /* -mm */ 113 #define ME 2 /* -me */ 114 #define MA 3 /* -man */ 115 116 #ifdef DEBUG 117 static char *mactab[] = { "-ms", "-mm", "-me", "-ma" }; 118 #endif /* DEBUG */ 119 120 #define ONE 1 121 #define TWO 2 122 123 #define NOCHAR -2 124 #define SPECIAL 0 125 #define APOS 1 126 #define PUNCT 2 127 #define DIGIT 3 128 #define LETTER 4 129 130 #define MAXFILES 20 131 132 static int iflag; 133 static int wordflag; 134 static int msflag; /* processing a source written using a mac package */ 135 static int mac; /* which package */ 136 static int disp; 137 static int parag; 138 static int inmacro; 139 static int intable; 140 static int keepblock; /* keep blocks of text; normally false when msflag */ 141 142 static char chars[128]; /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */ 143 144 static char line[LINE_MAX]; 145 static char *lp; 146 147 static int c; 148 static int pc; 149 static int ldelim; 150 static int rdelim; 151 152 static char fname[PATH_MAX]; 153 static FILE *files[MAXFILES]; 154 static FILE **filesp; 155 static FILE *infile; 156 157 static int argc; 158 static char **argv; 159 160 /* 161 * Macro processing 162 * 163 * Macro table definitions 164 */ 165 typedef int pacmac; /* compressed macro name */ 166 static int argconcat = 0; /* concat arguments together (-me only) */ 167 168 #define tomac(c1, c2) ((((c1) & 0xFF) << 8) | ((c2) & 0xFF)) 169 #define frommac(src, c1, c2) (((c1)=((src)>>8)&0xFF),((c2) =(src)&0xFF), __USE(c1), __USE(c2)) 170 171 struct mactab { 172 int condition; 173 pacmac macname; 174 int (*func)(pacmac); 175 }; 176 177 static const struct mactab troffmactab[]; 178 static const struct mactab ppmactab[]; 179 static const struct mactab msmactab[]; 180 static const struct mactab mmmactab[]; 181 static const struct mactab memactab[]; 182 static const struct mactab manmactab[]; 183 184 /* 185 * Macro table initialization 186 */ 187 #define M(cond, c1, c2, func) {cond, tomac(c1, c2), func} 188 189 /* 190 * Flags for matching conditions other than 191 * the macro name 192 */ 193 #define NONE 0 194 #define FNEST 1 /* no nested files */ 195 #define NOMAC 2 /* no macro */ 196 #define MAC 3 /* macro */ 197 #define PARAG 4 /* in a paragraph */ 198 #define MSF 5 /* msflag is on */ 199 #define NBLK 6 /* set if no blocks to be kept */ 200 201 /* 202 * Return codes from macro minions, determine where to jump, 203 * how to repeat/reprocess text 204 */ 205 #define COMX 1 /* goto comx */ 206 #define COM 2 /* goto com */ 207 208 static int skeqn(void); 209 static int eof(void); 210 #ifdef DEBUG 211 static int _C1(void); 212 static int _C(void); 213 #endif 214 static int EQ(pacmac); 215 static int domacro(pacmac); 216 static int PS(pacmac); 217 static int skip(pacmac); 218 static int intbl(pacmac); 219 static int outtbl(pacmac); 220 static int so(pacmac); 221 static int nx(pacmac); 222 static int skiptocom(pacmac); 223 static int PP(pacmac); 224 static int AU(pacmac); 225 static int SH(pacmac); 226 static int UX(pacmac); 227 static int MMHU(pacmac); 228 static int mesnblock(pacmac); 229 static int mssnblock(pacmac); 230 static int nf(pacmac); 231 static int ce(pacmac); 232 static int meip(pacmac); 233 static int mepp(pacmac); 234 static int mesh(pacmac); 235 static int mefont(pacmac); 236 static int manfont(pacmac); 237 static int manpp(pacmac); 238 static int macsort(const void *, const void *); 239 static int sizetab(const struct mactab *); 240 static void getfname(void); 241 static void textline(char *, int); 242 static void work(void) __dead; 243 static void regline(void (*)(char *, int), int); 244 static void macro(void); 245 static void tbl(void); 246 static void stbl(void); 247 static void eqn(void); 248 static void backsl(void); 249 static void sce(void); 250 static void refer(int); 251 static void inpic(void); 252 static void msputmac(char *, int); 253 static void msputwords(int); 254 static void meputmac(char *, int); 255 static void meputwords(int); 256 static void noblock(char, char); 257 static void defcomline(pacmac); 258 static void comline(void); 259 static void buildtab(const struct mactab **, int *); 260 static FILE *opn(char *); 261 static struct mactab *macfill(struct mactab *, const struct mactab *); 262 static void usage(void) __dead; 263 264 int 265 main(int ac, char **av) 266 { 267 int i, ch; 268 int errflg = 0; 269 int kflag = NO; 270 271 iflag = NO; 272 wordflag = NO; 273 msflag = NO; 274 mac = ME; 275 disp = NO; 276 parag = NO; 277 inmacro = NO; 278 intable = NO; 279 ldelim = NOCHAR; 280 rdelim = NOCHAR; 281 keepblock = YES; 282 283 while ((ch = getopt(ac, av, "ikpwm:")) != -1) { 284 switch (ch) { 285 case 'i': 286 iflag = YES; 287 break; 288 case 'k': 289 kflag = YES; 290 break; 291 case 'm': 292 msflag = YES; 293 keepblock = NO; 294 switch (optarg[0]) { 295 case 'm': 296 mac = MM; 297 break; 298 case 's': 299 mac = MS; 300 break; 301 case 'e': 302 mac = ME; 303 break; 304 case 'a': 305 mac = MA; 306 break; 307 case 'l': 308 disp = YES; 309 break; 310 default: 311 errflg++; 312 break; 313 } 314 if (errflg == 0 && optarg[1] != '\0') 315 errflg++; 316 break; 317 case 'p': 318 parag = YES; 319 break; 320 case 'w': 321 wordflag = YES; 322 kflag = YES; 323 break; 324 default: 325 errflg++; 326 } 327 } 328 argc = ac - optind; 329 argv = av + optind; 330 331 if (kflag) 332 keepblock = YES; 333 if (errflg) 334 usage(); 335 336 #ifdef DEBUG 337 printf("msflag = %d, mac = %s, keepblock = %d, disp = %d\n", 338 msflag, mactab[mac], keepblock, disp); 339 #endif /* DEBUG */ 340 if (argc == 0) { 341 infile = stdin; 342 } else { 343 infile = opn(argv[0]); 344 --argc; 345 ++argv; 346 } 347 files[0] = infile; 348 filesp = &files[0]; 349 350 for (i = 'a'; i <= 'z' ; ++i) 351 chars[i] = LETTER; 352 for (i = 'A'; i <= 'Z'; ++i) 353 chars[i] = LETTER; 354 for (i = '0'; i <= '9'; ++i) 355 chars[i] = DIGIT; 356 chars['\''] = APOS; 357 chars['&'] = APOS; 358 chars['.'] = PUNCT; 359 chars[','] = PUNCT; 360 chars[';'] = PUNCT; 361 chars['?'] = PUNCT; 362 chars[':'] = PUNCT; 363 work(); 364 return 0; 365 } 366 367 static int 368 skeqn(void) 369 { 370 371 while ((c = getc(infile)) != rdelim) { 372 if (c == EOF) 373 c = eof(); 374 else if (c == '"') { 375 while ((c = getc(infile)) != '"') { 376 if (c == EOF || 377 (c == '\\' && (c = getc(infile)) == EOF)) 378 c = eof(); 379 } 380 } 381 } 382 if (msflag) 383 return c == 'x'; 384 return c == ' '; 385 } 386 387 static FILE * 388 opn(char *p) 389 { 390 FILE *fd; 391 392 if ((fd = fopen(p, "r")) == NULL) 393 err(1, "fopen %s", p); 394 395 return fd; 396 } 397 398 static int 399 eof(void) 400 { 401 402 if (infile != stdin) 403 fclose(infile); 404 if (filesp > files) 405 infile = *--filesp; 406 else if (argc > 0) { 407 infile = opn(argv[0]); 408 --argc; 409 ++argv; 410 } else 411 exit(0); 412 return C; 413 } 414 415 static void 416 getfname(void) 417 { 418 char *p; 419 struct chain { 420 struct chain *nextp; 421 char *datap; 422 } *q; 423 static struct chain *namechain= NULL; 424 425 while (C == ' ') 426 ; /* nothing */ 427 428 for (p = fname ; p - fname < (ptrdiff_t)sizeof(fname) && 429 (*p = c) != '\n' && 430 c != ' ' && c != '\t' && c != '\\'; ++p) 431 C; 432 *p = '\0'; 433 while (c != '\n') 434 C; 435 436 /* see if this name has already been used */ 437 for (q = namechain ; q; q = q->nextp) 438 if (strcmp(fname, q->datap) == 0) { 439 fname[0] = '\0'; 440 return; 441 } 442 443 q = (struct chain *) malloc(sizeof(struct chain)); 444 if (q == NULL) 445 err(1, NULL); 446 q->nextp = namechain; 447 q->datap = strdup(fname); 448 if (q->datap == NULL) 449 err(1, NULL); 450 namechain = q; 451 } 452 453 /*ARGSUSED*/ 454 static void 455 textline(char *str, int constant) 456 { 457 458 if (wordflag) { 459 msputwords(0); 460 return; 461 } 462 puts(str); 463 } 464 465 static void 466 work(void) 467 { 468 469 for (;;) { 470 C; 471 #ifdef FULLDEBUG 472 printf("Starting work with `%c'\n", c); 473 #endif /* FULLDEBUG */ 474 if (c == '.' || c == '\'') 475 comline(); 476 else 477 regline(textline, TWO); 478 } 479 } 480 481 static void 482 regline(void (*pfunc)(char *, int), int constant) 483 { 484 485 line[0] = c; 486 lp = line; 487 while (lp - line < (ptrdiff_t)sizeof(line)) { 488 if (c == '\\') { 489 *lp = ' '; 490 backsl(); 491 } 492 if (c == '\n') 493 break; 494 if (intable && c == 'T') { 495 *++lp = C; 496 if (c == '{' || c == '}') { 497 lp[-1] = ' '; 498 *lp = C; 499 } 500 } else { 501 *++lp = C; 502 } 503 } 504 *lp = '\0'; 505 506 if (line[0] != '\0') 507 (*pfunc)(line, constant); 508 } 509 510 static void 511 macro(void) 512 { 513 514 if (msflag) { 515 do { 516 SKIP; 517 } while (C!='.' || C!='.' || C=='.'); /* look for .. */ 518 if (c != '\n') 519 SKIP; 520 return; 521 } 522 SKIP; 523 inmacro = YES; 524 } 525 526 static void 527 tbl(void) 528 { 529 530 while (C != '.') 531 ; /* nothing */ 532 SKIP; 533 intable = YES; 534 } 535 536 static void 537 stbl(void) 538 { 539 540 while (C != '.') 541 ; /* nothing */ 542 SKIP_TO_COM; 543 if (c != 'T' || C != 'E') { 544 SKIP; 545 pc = c; 546 while (C != '.' || pc != '\n' || C != 'T' || C != 'E') 547 pc = c; 548 } 549 } 550 551 static void 552 eqn(void) 553 { 554 int c1, c2; 555 int dflg; 556 char last; 557 558 last=0; 559 dflg = 1; 560 SKIP; 561 562 for (;;) { 563 if (C1 == '.' || c == '\'') { 564 while (C1 == ' ' || c == '\t') 565 ; 566 if (c == 'E' && C1 == 'N') { 567 SKIP; 568 if (msflag && dflg) { 569 putchar('x'); 570 putchar(' '); 571 if (last) { 572 putchar(last); 573 putchar('\n'); 574 } 575 } 576 return; 577 } 578 } else if (c == 'd') { 579 /* look for delim */ 580 if (C1 == 'e' && C1 == 'l') 581 if (C1 == 'i' && C1 == 'm') { 582 while (C1 == ' ') 583 ; /* nothing */ 584 585 if ((c1 = c) == '\n' || 586 (c2 = C1) == '\n' || 587 (c1 == 'o' && c2 == 'f' && C1=='f')) { 588 ldelim = NOCHAR; 589 rdelim = NOCHAR; 590 } else { 591 ldelim = c1; 592 rdelim = c2; 593 } 594 } 595 dflg = 0; 596 } 597 598 if (c != '\n') 599 while (C1 != '\n') { 600 if (chars[c] == PUNCT) 601 last = c; 602 else if (c != ' ') 603 last = 0; 604 } 605 } 606 } 607 608 /* skip over a complete backslash construction */ 609 static void 610 backsl(void) 611 { 612 int bdelim; 613 614 sw: 615 switch (C) { 616 case '"': 617 SKIP; 618 return; 619 620 case 's': 621 if (C == '\\') 622 backsl(); 623 else { 624 while (C >= '0' && c <= '9') 625 ; /* nothing */ 626 ungetc(c, infile); 627 c = '0'; 628 } 629 --lp; 630 return; 631 632 case 'f': 633 case 'n': 634 case '*': 635 if (C != '(') 636 return; 637 638 case '(': 639 if (msflag) { 640 if (C == 'e') { 641 if (C == 'm') { 642 *lp = '-'; 643 return; 644 } 645 } 646 else if (c != '\n') 647 C; 648 return; 649 } 650 if (C != '\n') 651 C; 652 return; 653 654 case '$': 655 C; /* discard argument number */ 656 return; 657 658 case 'b': 659 case 'x': 660 case 'v': 661 case 'h': 662 case 'w': 663 case 'o': 664 case 'l': 665 case 'L': 666 if ((bdelim = C) == '\n') 667 return; 668 while (C != '\n' && c != bdelim) 669 if (c == '\\') 670 backsl(); 671 return; 672 673 case '\\': 674 if (inmacro) 675 goto sw; 676 677 default: 678 return; 679 } 680 } 681 682 static void 683 sce(void) 684 { 685 char *ap; 686 int n, i; 687 char a[10]; 688 689 for (ap = a; C != '\n'; ap++) { 690 *ap = c; 691 if (ap == &a[9]) { 692 SKIP; 693 ap = a; 694 break; 695 } 696 } 697 if (ap != a) 698 n = atoi(a); 699 else 700 n = 1; 701 for (i = 0; i < n;) { 702 if (C == '.') { 703 if (C == 'c') { 704 if (C == 'e') { 705 while (C == ' ') 706 ; /* nothing */ 707 if (c == '0') { 708 SKIP; 709 break; 710 } else 711 SKIP; 712 } 713 else 714 SKIP; 715 } else if (c == 'P' || C == 'P') { 716 if (c != '\n') 717 SKIP; 718 break; 719 } else if (c != '\n') 720 SKIP; 721 } else { 722 SKIP; 723 i++; 724 } 725 } 726 } 727 728 static void 729 refer(int c1) 730 { 731 int c2; 732 733 if (c1 != '\n') 734 SKIP; 735 736 for (c2 = -1;;) { 737 if (C != '.') 738 SKIP; 739 else { 740 if (C != ']') 741 SKIP; 742 else { 743 while (C != '\n') 744 c2 = c; 745 if (c2 != -1 && chars[c2] == PUNCT) 746 putchar(c2); 747 return; 748 } 749 } 750 } 751 } 752 753 static void 754 inpic(void) 755 { 756 int c1; 757 char *p1; 758 759 SKIP; 760 p1 = line; 761 c = '\n'; 762 for (;;) { 763 c1 = c; 764 if (C == '.' && c1 == '\n') { 765 if (C != 'P') { 766 if (c == '\n') 767 continue; 768 else { 769 SKIP; 770 c = '\n'; 771 continue; 772 } 773 } 774 if (C != 'E') { 775 if (c == '\n') 776 continue; 777 else { 778 SKIP; 779 c = '\n'; 780 continue; 781 } 782 } 783 SKIP; 784 return; 785 } 786 else if (c == '\"') { 787 while (C != '\"') { 788 if (c == '\\') { 789 if (C == '\"') 790 continue; 791 ungetc(c, infile); 792 backsl(); 793 } else 794 *p1++ = c; 795 } 796 *p1++ = ' '; 797 } 798 else if (c == '\n' && p1 != line) { 799 *p1 = '\0'; 800 if (wordflag) 801 msputwords(NO); 802 else { 803 puts(line); 804 putchar('\n'); 805 } 806 p1 = line; 807 } 808 } 809 } 810 811 #ifdef DEBUG 812 static int 813 _C1(void) 814 { 815 816 return C1get; 817 } 818 819 static int 820 _C(void) 821 { 822 823 return Cget; 824 } 825 #endif /* DEBUG */ 826 827 /* 828 * Put out a macro line, using ms and mm conventions. 829 */ 830 static void 831 msputmac(char *s, int constant) 832 { 833 char *t; 834 int found; 835 int last; 836 837 last = 0; 838 found = 0; 839 if (wordflag) { 840 msputwords(YES); 841 return; 842 } 843 while (*s) { 844 while (*s == ' ' || *s == '\t') 845 putchar(*s++); 846 for (t = s ; *t != ' ' && *t != '\t' && *t != '\0' ; ++t) 847 ; /* nothing */ 848 if (*s == '\"') 849 s++; 850 if (t > s + constant && chars[(unsigned char)s[0]] == LETTER && 851 chars[(unsigned char)s[1]] == LETTER) { 852 while (s < t) 853 if (*s == '\"') 854 s++; 855 else 856 putchar(*s++); 857 last = *(t-1); 858 found++; 859 } else if (found && chars[(unsigned char)s[0]] == PUNCT && 860 s[1] == '\0') { 861 putchar(*s++); 862 } else { 863 last = *(t - 1); 864 s = t; 865 } 866 } 867 putchar('\n'); 868 if (msflag && chars[last] == PUNCT) { 869 putchar(last); 870 putchar('\n'); 871 } 872 } 873 874 /* 875 * put out words (for the -w option) with ms and mm conventions 876 */ 877 static void 878 msputwords(int macline) 879 { 880 char *p, *p1; 881 int i, nlet; 882 883 for (p1 = line;;) { 884 /* 885 * skip initial specials ampersands and apostrophes 886 */ 887 while (chars[(unsigned char)*p1] < DIGIT) 888 if (*p1++ == '\0') 889 return; 890 nlet = 0; 891 for (p = p1 ; (i = chars[(unsigned char)*p]) != SPECIAL ; ++p) 892 if (i == LETTER) 893 ++nlet; 894 895 if (nlet > 1 && chars[(unsigned char)p1[0]] == LETTER) { 896 /* 897 * delete trailing ampersands and apostrophes 898 */ 899 while ((i = chars[(unsigned char)p[-1]]) == PUNCT || 900 i == APOS ) 901 --p; 902 while (p1 < p) 903 putchar(*p1++); 904 putchar('\n'); 905 } else { 906 p1 = p; 907 } 908 } 909 } 910 911 /* 912 * put out a macro using the me conventions 913 */ 914 #define SKIPBLANK(cp) while (*cp == ' ' || *cp == '\t') { cp++; } 915 #define SKIPNONBLANK(cp) while (*cp !=' ' && *cp !='\cp' && *cp !='\0') { cp++; } 916 917 static void 918 meputmac(char *cp, int constant) 919 { 920 char *np; 921 int found; 922 int argno; 923 int last; 924 int inquote; 925 926 last = 0; 927 found = 0; 928 if (wordflag) { 929 meputwords(YES); 930 return; 931 } 932 for (argno = 0; *cp; argno++) { 933 SKIPBLANK(cp); 934 inquote = (*cp == '"'); 935 if (inquote) 936 cp++; 937 for (np = cp; *np; np++) { 938 switch (*np) { 939 case '\n': 940 case '\0': 941 break; 942 943 case '\t': 944 case ' ': 945 if (inquote) 946 continue; 947 else 948 goto endarg; 949 950 case '"': 951 if (inquote && np[1] == '"') { 952 memmove(np, np + 1, strlen(np)); 953 np++; 954 continue; 955 } else { 956 *np = ' '; /* bye bye " */ 957 goto endarg; 958 } 959 960 default: 961 continue; 962 } 963 } 964 endarg: ; 965 /* 966 * cp points at the first char in the arg 967 * np points one beyond the last char in the arg 968 */ 969 if ((argconcat == 0) || (argconcat != argno)) 970 putchar(' '); 971 #ifdef FULLDEBUG 972 { 973 char *p; 974 printf("[%d,%d: ", argno, np - cp); 975 for (p = cp; p < np; p++) { 976 putchar(*p); 977 } 978 printf("]"); 979 } 980 #endif /* FULLDEBUG */ 981 /* 982 * Determine if the argument merits being printed 983 * 984 * constant is the cut off point below which something 985 * is not a word. 986 */ 987 if (((np - cp) > constant) && 988 (inquote || (chars[(unsigned char)cp[0]] == LETTER))) { 989 for (; cp < np; cp++) 990 putchar(*cp); 991 last = np[-1]; 992 found++; 993 } else if (found && (np - cp == 1) && 994 chars[(unsigned char)*cp] == PUNCT) { 995 putchar(*cp); 996 } else { 997 last = np[-1]; 998 } 999 cp = np; 1000 } 1001 if (msflag && chars[last] == PUNCT) 1002 putchar(last); 1003 putchar('\n'); 1004 } 1005 1006 /* 1007 * put out words (for the -w option) with ms and mm conventions 1008 */ 1009 static void 1010 meputwords(int macline) 1011 { 1012 1013 msputwords(macline); 1014 } 1015 1016 /* 1017 * 1018 * Skip over a nested set of macros 1019 * 1020 * Possible arguments to noblock are: 1021 * 1022 * fi end of unfilled text 1023 * PE pic ending 1024 * DE display ending 1025 * 1026 * for ms and mm only: 1027 * KE keep ending 1028 * 1029 * NE undocumented match to NS (for mm?) 1030 * LE mm only: matches RL or *L (for lists) 1031 * 1032 * for me: 1033 * ([lqbzcdf] 1034 */ 1035 static void 1036 noblock(char a1, char a2) 1037 { 1038 int c1,c2; 1039 int eqnf; 1040 int lct; 1041 1042 lct = 0; 1043 eqnf = 1; 1044 SKIP; 1045 for (;;) { 1046 while (C != '.') 1047 if (c == '\n') 1048 continue; 1049 else 1050 SKIP; 1051 if ((c1 = C) == '\n') 1052 continue; 1053 if ((c2 = C) == '\n') 1054 continue; 1055 if (c1 == a1 && c2 == a2) { 1056 SKIP; 1057 if (lct != 0) { 1058 lct--; 1059 continue; 1060 } 1061 if (eqnf) 1062 putchar('.'); 1063 putchar('\n'); 1064 return; 1065 } else if (a1 == 'L' && c2 == 'L') { 1066 lct++; 1067 SKIP; 1068 } 1069 /* 1070 * equations (EQ) nested within a display 1071 */ 1072 else if (c1 == 'E' && c2 == 'Q') { 1073 if ((mac == ME && a1 == ')') 1074 || (mac != ME && a1 == 'D')) { 1075 eqn(); 1076 eqnf=0; 1077 } 1078 } 1079 /* 1080 * turning on filling is done by the paragraphing 1081 * macros 1082 */ 1083 else if (a1 == 'f') { /* .fi */ 1084 if ((mac == ME && (c2 == 'h' || c2 == 'p')) 1085 || (mac != ME && (c1 == 'P' || c2 == 'P'))) { 1086 SKIP; 1087 return; 1088 } 1089 } else { 1090 SKIP; 1091 } 1092 } 1093 } 1094 1095 static int 1096 /*ARGSUSED*/ 1097 EQ(pacmac unused) 1098 { 1099 1100 eqn(); 1101 return 0; 1102 } 1103 1104 static int 1105 /*ARGSUSED*/ 1106 domacro(pacmac unused) 1107 { 1108 1109 macro(); 1110 return 0; 1111 } 1112 1113 static int 1114 /*ARGSUSED*/ 1115 PS(pacmac unused) 1116 { 1117 1118 for (C; c == ' ' || c == '\t'; C) 1119 ; /* nothing */ 1120 1121 if (c == '<') { /* ".PS < file" -- don't expect a .PE */ 1122 SKIP; 1123 return 0; 1124 } 1125 if (!msflag) 1126 inpic(); 1127 else 1128 noblock('P', 'E'); 1129 return 0; 1130 } 1131 1132 static int 1133 /*ARGSUSED*/ 1134 skip(pacmac unused) 1135 { 1136 1137 SKIP; 1138 return 0; 1139 } 1140 1141 static int 1142 /*ARGSUSED*/ 1143 intbl(pacmac unused) 1144 { 1145 1146 if (msflag) 1147 stbl(); 1148 else 1149 tbl(); 1150 return 0; 1151 } 1152 1153 static int 1154 /*ARGSUSED*/ 1155 outtbl(pacmac unused) 1156 { 1157 1158 intable = NO; 1159 return 0; 1160 } 1161 1162 static int 1163 /*ARGSUSED*/ 1164 so(pacmac unused) 1165 { 1166 1167 if (!iflag) { 1168 getfname(); 1169 if (fname[0]) { 1170 if (++filesp - &files[0] > MAXFILES) 1171 err(1, "too many nested files (max %d)", 1172 MAXFILES); 1173 infile = *filesp = opn(fname); 1174 } 1175 } 1176 return 0; 1177 } 1178 1179 static int 1180 /*ARGSUSED*/ 1181 nx(pacmac unused) 1182 { 1183 1184 if (!iflag) { 1185 getfname(); 1186 if (fname[0] == '\0') 1187 exit(0); 1188 if (infile != stdin) 1189 fclose(infile); 1190 infile = *filesp = opn(fname); 1191 } 1192 return 0; 1193 } 1194 1195 static int 1196 /*ARGSUSED*/ 1197 skiptocom(pacmac unused) 1198 { 1199 1200 SKIP_TO_COM; 1201 return COMX; 1202 } 1203 1204 static int 1205 PP(pacmac c12) 1206 { 1207 int c1, c2; 1208 1209 frommac(c12, c1, c2); 1210 printf(".%c%c", c1, c2); 1211 while (C != '\n') 1212 putchar(c); 1213 putchar('\n'); 1214 return 0; 1215 } 1216 1217 static int 1218 /*ARGSUSED*/ 1219 AU(pacmac unused) 1220 { 1221 1222 if (mac == MM) 1223 return 0; 1224 SKIP_TO_COM; 1225 return COMX; 1226 } 1227 1228 static int 1229 SH(pacmac c12) 1230 { 1231 int c1, c2; 1232 1233 frommac(c12, c1, c2); 1234 1235 if (parag) { 1236 printf(".%c%c", c1, c2); 1237 while (C != '\n') 1238 putchar(c); 1239 putchar(c); 1240 putchar('!'); 1241 for (;;) { 1242 while (C != '\n') 1243 putchar(c); 1244 putchar('\n'); 1245 if (C == '.') 1246 return COM; 1247 putchar('!'); 1248 putchar(c); 1249 } 1250 /*NOTREACHED*/ 1251 } else { 1252 SKIP_TO_COM; 1253 return COMX; 1254 } 1255 } 1256 1257 static int 1258 /*ARGSUSED*/ 1259 UX(pacmac unused) 1260 { 1261 1262 if (wordflag) 1263 printf("UNIX\n"); 1264 else 1265 printf("UNIX "); 1266 return 0; 1267 } 1268 1269 static int 1270 MMHU(pacmac c12) 1271 { 1272 int c1, c2; 1273 1274 frommac(c12, c1, c2); 1275 if (parag) { 1276 printf(".%c%c", c1, c2); 1277 while (C != '\n') 1278 putchar(c); 1279 putchar('\n'); 1280 } else { 1281 SKIP; 1282 } 1283 return 0; 1284 } 1285 1286 static int 1287 mesnblock(pacmac c12) 1288 { 1289 int c1, c2; 1290 1291 frommac(c12, c1, c2); 1292 noblock(')', c2); 1293 return 0; 1294 } 1295 1296 static int 1297 mssnblock(pacmac c12) 1298 { 1299 int c1, c2; 1300 1301 frommac(c12, c1, c2); 1302 noblock(c1, 'E'); 1303 return 0; 1304 } 1305 1306 static int 1307 /*ARGUSED*/ 1308 nf(pacmac unused) 1309 { 1310 1311 noblock('f', 'i'); 1312 return 0; 1313 } 1314 1315 static int 1316 /*ARGUSED*/ 1317 ce(pacmac unused) 1318 { 1319 1320 sce(); 1321 return 0; 1322 } 1323 1324 static int 1325 meip(pacmac c12) 1326 { 1327 1328 if (parag) 1329 mepp(c12); 1330 else if (wordflag) /* save the tag */ 1331 regline(meputmac, ONE); 1332 else 1333 SKIP; 1334 return 0; 1335 } 1336 1337 /* 1338 * only called for -me .pp or .sh, when parag is on 1339 */ 1340 static int 1341 mepp(pacmac c12) 1342 { 1343 1344 PP(c12); /* eats the line */ 1345 return 0; 1346 } 1347 1348 /* 1349 * Start of a section heading; output the section name if doing words 1350 */ 1351 static int 1352 mesh(pacmac c12) 1353 { 1354 1355 if (parag) 1356 mepp(c12); 1357 else if (wordflag) 1358 defcomline(c12); 1359 else 1360 SKIP; 1361 return 0; 1362 } 1363 1364 /* 1365 * process a font setting 1366 */ 1367 static int 1368 mefont(pacmac c12) 1369 { 1370 1371 argconcat = 1; 1372 defcomline(c12); 1373 argconcat = 0; 1374 return 0; 1375 } 1376 1377 static int 1378 manfont(pacmac c12) 1379 { 1380 1381 return mefont(c12); 1382 } 1383 1384 static int 1385 manpp(pacmac c12) 1386 { 1387 1388 return mepp(c12); 1389 } 1390 1391 static void 1392 defcomline(pacmac c12) 1393 { 1394 int c1, c2; 1395 1396 frommac(c12, c1, c2); 1397 if (msflag && mac == MM && c2 == 'L') { 1398 if (disp || c1 == 'R') { 1399 noblock('L', 'E'); 1400 } else { 1401 SKIP; 1402 putchar('.'); 1403 } 1404 } 1405 else if (c1 == '.' && c2 == '.') { 1406 if (msflag) { 1407 SKIP; 1408 return; 1409 } 1410 while (C == '.') 1411 /*VOID*/; 1412 } 1413 ++inmacro; 1414 /* 1415 * Process the arguments to the macro 1416 */ 1417 switch (mac) { 1418 default: 1419 case MM: 1420 case MS: 1421 if (c1 <= 'Z' && msflag) 1422 regline(msputmac, ONE); 1423 else 1424 regline(msputmac, TWO); 1425 break; 1426 case ME: 1427 regline(meputmac, ONE); 1428 break; 1429 } 1430 --inmacro; 1431 } 1432 1433 static void 1434 comline(void) 1435 { 1436 int c1; 1437 int c2; 1438 pacmac c12; 1439 int mid; 1440 int lb, ub; 1441 int hit; 1442 static int tabsize = 0; 1443 static const struct mactab *mactab = NULL; 1444 const struct mactab *mp; 1445 1446 if (mactab == 0) 1447 buildtab(&mactab, &tabsize); 1448 com: 1449 while (C == ' ' || c == '\t') 1450 ; 1451 comx: 1452 if ((c1 = c) == '\n') 1453 return; 1454 c2 = C; 1455 if (c1 == '.' && c2 != '.') 1456 inmacro = NO; 1457 if (msflag && c1 == '[') { 1458 refer(c2); 1459 return; 1460 } 1461 if (parag && mac==MM && c1 == 'P' && c2 == '\n') { 1462 printf(".P\n"); 1463 return; 1464 } 1465 if (c2 == '\n') 1466 return; 1467 /* 1468 * Single letter macro 1469 */ 1470 if (mac == ME && (c2 == ' ' || c2 == '\t') ) 1471 c2 = ' '; 1472 c12 = tomac(c1, c2); 1473 /* 1474 * binary search through the table of macros 1475 */ 1476 lb = 0; 1477 ub = tabsize - 1; 1478 while (lb <= ub) { 1479 mid = (ub + lb) / 2; 1480 mp = &mactab[mid]; 1481 if (mp->macname < c12) 1482 lb = mid + 1; 1483 else if (mp->macname > c12) 1484 ub = mid - 1; 1485 else { 1486 hit = 1; 1487 #ifdef FULLDEBUG 1488 printf("preliminary hit macro %c%c ", c1, c2); 1489 #endif /* FULLDEBUG */ 1490 switch (mp->condition) { 1491 case NONE: 1492 hit = YES; 1493 break; 1494 case FNEST: 1495 hit = (filesp == files); 1496 break; 1497 case NOMAC: 1498 hit = !inmacro; 1499 break; 1500 case MAC: 1501 hit = inmacro; 1502 break; 1503 case PARAG: 1504 hit = parag; 1505 break; 1506 case NBLK: 1507 hit = !keepblock; 1508 break; 1509 default: 1510 hit = 0; 1511 } 1512 1513 if (hit) { 1514 #ifdef FULLDEBUG 1515 printf("MATCH\n"); 1516 #endif /* FULLDEBUG */ 1517 switch ((*(mp->func))(c12)) { 1518 default: 1519 return; 1520 case COMX: 1521 goto comx; 1522 case COM: 1523 goto com; 1524 } 1525 } 1526 #ifdef FULLDEBUG 1527 printf("FAIL\n"); 1528 #endif /* FULLDEBUG */ 1529 break; 1530 } 1531 } 1532 defcomline(c12); 1533 } 1534 1535 static int 1536 macsort(const void *p1, const void *p2) 1537 { 1538 const struct mactab *t1 = p1; 1539 const struct mactab *t2 = p2; 1540 1541 return t1->macname - t2->macname; 1542 } 1543 1544 static int 1545 sizetab(const struct mactab *mp) 1546 { 1547 int i; 1548 1549 i = 0; 1550 if (mp) { 1551 for (; mp->macname; mp++, i++) 1552 /*VOID*/ ; 1553 } 1554 return i; 1555 } 1556 1557 static struct mactab * 1558 macfill(struct mactab *dst, const struct mactab *src) 1559 { 1560 1561 if (src) { 1562 while (src->macname) 1563 *dst++ = *src++; 1564 } 1565 return dst; 1566 } 1567 1568 static void 1569 usage(void) 1570 { 1571 extern char *__progname; 1572 1573 fprintf(stderr, "usage: %s [-ikpw ] [ -m a | e | l | m | s] [file ...]\n", __progname); 1574 exit(1); 1575 } 1576 1577 static void 1578 buildtab(const struct mactab **r_back, int *r_size) 1579 { 1580 size_t size; 1581 const struct mactab *p1, *p2; 1582 struct mactab *back, *p; 1583 1584 size = sizetab(troffmactab) + sizetab(ppmactab); 1585 p1 = p2 = NULL; 1586 if (msflag) { 1587 switch (mac) { 1588 case ME: 1589 p1 = memactab; 1590 break; 1591 case MM: 1592 p1 = msmactab; 1593 p2 = mmmactab; 1594 break; 1595 case MS: 1596 p1 = msmactab; 1597 break; 1598 case MA: 1599 p1 = manmactab; 1600 break; 1601 default: 1602 break; 1603 } 1604 } 1605 size += sizetab(p1); 1606 size += sizetab(p2); 1607 back = calloc(size + 2, sizeof(struct mactab)); 1608 if (back == NULL) 1609 err(1, NULL); 1610 1611 p = macfill(back, troffmactab); 1612 p = macfill(p, ppmactab); 1613 p = macfill(p, p1); 1614 p = macfill(p, p2); 1615 1616 qsort(back, size, sizeof(struct mactab), macsort); 1617 *r_size = size; 1618 *r_back = back; 1619 } 1620 1621 /* 1622 * troff commands 1623 */ 1624 static const struct mactab troffmactab[] = { 1625 M(NONE, '\\','"', skip), /* comment */ 1626 M(NOMAC, 'd','e', domacro), /* define */ 1627 M(NOMAC, 'i','g', domacro), /* ignore till .. */ 1628 M(NOMAC, 'a','m', domacro), /* append macro */ 1629 M(NBLK, 'n','f', nf), /* filled */ 1630 M(NBLK, 'c','e', ce), /* centered */ 1631 1632 M(NONE, 's','o', so), /* source a file */ 1633 M(NONE, 'n','x', nx), /* go to next file */ 1634 1635 M(NONE, 't','m', skip), /* print string on tty */ 1636 M(NONE, 'h','w', skip), /* exception hyphen words */ 1637 M(NONE, 0,0, 0) 1638 }; 1639 1640 /* 1641 * Preprocessor output 1642 */ 1643 static const struct mactab ppmactab[] = { 1644 M(FNEST, 'E','Q', EQ), /* equation starting */ 1645 M(FNEST, 'T','S', intbl), /* table starting */ 1646 M(FNEST, 'T','C', intbl), /* alternative table? */ 1647 M(FNEST, 'T','&', intbl), /* table reformatting */ 1648 M(NONE, 'T','E', outtbl),/* table ending */ 1649 M(NONE, 'P','S', PS), /* picture starting */ 1650 M(NONE, 0,0, 0) 1651 }; 1652 1653 /* 1654 * Particular to ms and mm 1655 */ 1656 static const struct mactab msmactab[] = { 1657 M(NONE, 'T','L', skiptocom), /* title follows */ 1658 M(NONE, 'F','S', skiptocom), /* start footnote */ 1659 M(NONE, 'O','K', skiptocom), /* Other kws */ 1660 1661 M(NONE, 'N','R', skip), /* undocumented */ 1662 M(NONE, 'N','D', skip), /* use supplied date */ 1663 1664 M(PARAG, 'P','P', PP), /* begin parag */ 1665 M(PARAG, 'I','P', PP), /* begin indent parag, tag x */ 1666 M(PARAG, 'L','P', PP), /* left blocked parag */ 1667 1668 M(NONE, 'A','U', AU), /* author */ 1669 M(NONE, 'A','I', AU), /* authors institution */ 1670 1671 M(NONE, 'S','H', SH), /* section heading */ 1672 M(NONE, 'S','N', SH), /* undocumented */ 1673 M(NONE, 'U','X', UX), /* unix */ 1674 1675 M(NBLK, 'D','S', mssnblock), /* start display text */ 1676 M(NBLK, 'K','S', mssnblock), /* start keep */ 1677 M(NBLK, 'K','F', mssnblock), /* start float keep */ 1678 M(NONE, 0,0, 0) 1679 }; 1680 1681 static const struct mactab mmmactab[] = { 1682 M(NONE, 'H',' ', MMHU), /* -mm ? */ 1683 M(NONE, 'H','U', MMHU), /* -mm ? */ 1684 M(PARAG, 'P',' ', PP), /* paragraph for -mm */ 1685 M(NBLK, 'N','S', mssnblock), /* undocumented */ 1686 M(NONE, 0,0, 0) 1687 }; 1688 1689 static const struct mactab memactab[] = { 1690 M(PARAG, 'p','p', mepp), 1691 M(PARAG, 'l','p', mepp), 1692 M(PARAG, 'n','p', mepp), 1693 M(NONE, 'i','p', meip), 1694 1695 M(NONE, 's','h', mesh), 1696 M(NONE, 'u','h', mesh), 1697 1698 M(NBLK, '(','l', mesnblock), 1699 M(NBLK, '(','q', mesnblock), 1700 M(NBLK, '(','b', mesnblock), 1701 M(NBLK, '(','z', mesnblock), 1702 M(NBLK, '(','c', mesnblock), 1703 1704 M(NBLK, '(','d', mesnblock), 1705 M(NBLK, '(','f', mesnblock), 1706 M(NBLK, '(','x', mesnblock), 1707 1708 M(NONE, 'r',' ', mefont), 1709 M(NONE, 'i',' ', mefont), 1710 M(NONE, 'b',' ', mefont), 1711 M(NONE, 'u',' ', mefont), 1712 M(NONE, 'q',' ', mefont), 1713 M(NONE, 'r','b', mefont), 1714 M(NONE, 'b','i', mefont), 1715 M(NONE, 'b','x', mefont), 1716 M(NONE, 0,0, 0) 1717 }; 1718 1719 static const struct mactab manmactab[] = { 1720 M(PARAG, 'B','I', manfont), 1721 M(PARAG, 'B','R', manfont), 1722 M(PARAG, 'I','B', manfont), 1723 M(PARAG, 'I','R', manfont), 1724 M(PARAG, 'R','B', manfont), 1725 M(PARAG, 'R','I', manfont), 1726 1727 M(PARAG, 'P','P', manpp), 1728 M(PARAG, 'L','P', manpp), 1729 M(PARAG, 'H','P', manpp), 1730 M(NONE, 0,0, 0) 1731 }; 1732