xref: /openbsd-src/usr.bin/deroff/deroff.c (revision d13be5d47e4149db2549a9828e244d59dbc43f15)
1 /*	$OpenBSD: deroff.c,v 1.8 2009/10/27 23:59:37 deraadt Exp $	*/
2 
3 /*-
4  * Copyright (c) 1988, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 /*
32  * Copyright (C) Caldera International Inc.  2001-2002.
33  * All rights reserved.
34  *
35  * Redistribution and use in source and binary forms, with or without
36  * modification, are permitted provided that the following conditions
37  * are met:
38  * 1. Redistributions of source code and documentation must retain the above
39  *    copyright notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgement:
45  *	This product includes software developed or owned by Caldera
46  *	International, Inc.
47  * 4. Neither the name of Caldera International, Inc. nor the names of other
48  *    contributors may be used to endorse or promote products derived from
49  *    this software without specific prior written permission.
50  *
51  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
52  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
53  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
54  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
55  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
56  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
57  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
58  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
60  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
61  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62  * POSSIBILITY OF SUCH DAMAGE.
63  */
64 
65 #include <err.h>
66 #include <limits.h>
67 #include <stdio.h>
68 #include <stdlib.h>
69 #include <string.h>
70 #include <unistd.h>
71 
72 /*
73  *	Deroff command -- strip troff, eqn, and Tbl sequences from
74  *	a file.  Has two flags argument, -w, to cause output one word per line
75  *	rather than in the original format.
76  *	-mm (or -ms) causes the corresponding macro's to be interpreted
77  *	so that just sentences are output
78  *	-ml  also gets rid of lists.
79  *	Deroff follows .so and .nx commands, removes contents of macro
80  *	definitions, equations (both .EQ ... .EN and $...$),
81  *	Tbl command sequences, and Troff backslash constructions.
82  *
83  *	All input is through the Cget macro;
84  *	the most recently read character is in c.
85  *
86  *	Modified by Robert Henry to process -me and -man macros.
87  */
88 
89 #define Cget ( (c=getc(infile)) == EOF ? eof() : ((c==ldelim)&&(filesp==files) ? skeqn() : c) )
90 #define C1get ( (c=getc(infile)) == EOF ? eof() :  c)
91 
92 #ifdef DEBUG
93 #  define C	_C()
94 #  define C1	_C1()
95 #else /* not DEBUG */
96 #  define C	Cget
97 #  define C1	C1get
98 #endif /* not DEBUG */
99 
100 #define SKIP while (C != '\n')
101 #define SKIP_TO_COM SKIP; SKIP; pc=c; while (C != '.' || pc != '\n' || C > 'Z')pc=c
102 
103 #define	YES 1
104 #define	NO 0
105 #define	MS 0	/* -ms */
106 #define	MM 1	/* -mm */
107 #define	ME 2	/* -me */
108 #define	MA 3	/* -man */
109 
110 #ifdef DEBUG
111 char *mactab[] = { "-ms", "-mm", "-me", "-ma" };
112 #endif /* DEBUG */
113 
114 #define	ONE 1
115 #define	TWO 2
116 
117 #define NOCHAR -2
118 #define SPECIAL 0
119 #define APOS 1
120 #define PUNCT 2
121 #define DIGIT 3
122 #define LETTER 4
123 
124 #define MAXFILES 20
125 
126 int	iflag;
127 int	wordflag;
128 int	msflag;		/* processing a source written using a mac package */
129 int	mac;		/* which package */
130 int	disp;
131 int	parag;
132 int	inmacro;
133 int	intable;
134 int	keepblock;	/* keep blocks of text; normally false when msflag */
135 
136 char chars[128];  /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */
137 
138 char line[LINE_MAX];
139 char *lp;
140 
141 int c;
142 int pc;
143 int ldelim;
144 int rdelim;
145 
146 char fname[PATH_MAX];
147 FILE *files[MAXFILES];
148 FILE **filesp;
149 FILE *infile;
150 
151 int argc;
152 char **argv;
153 
154 /*
155  *	Macro processing
156  *
157  *	Macro table definitions
158  */
159 typedef	int pacmac;		/* compressed macro name */
160 int	argconcat = 0;		/* concat arguments together (-me only) */
161 
162 #define	tomac(c1, c2)		((((c1) & 0xFF) << 8) | ((c2) & 0xFF))
163 #define	frommac(src, c1, c2)	(((c1)=((src)>>8)&0xFF),((c2) =(src)&0xFF))
164 
165 struct mactab{
166 	int	condition;
167 	pacmac	macname;
168 	int	(*func)();	/* XXX - args */
169 };
170 
171 struct	mactab	troffmactab[];
172 struct	mactab	ppmactab[];
173 struct	mactab	msmactab[];
174 struct	mactab	mmmactab[];
175 struct	mactab	memactab[];
176 struct	mactab	manmactab[];
177 
178 /*
179  *	Macro table initialization
180  */
181 #define	M(cond, c1, c2, func) {cond, tomac(c1, c2), func}
182 
183 /*
184  *	Flags for matching conditions other than
185  *	the macro name
186  */
187 #define	NONE		0
188 #define	FNEST		1		/* no nested files */
189 #define	NOMAC		2		/* no macro */
190 #define	MAC		3		/* macro */
191 #define	PARAG		4		/* in a paragraph */
192 #define	MSF		5		/* msflag is on */
193 #define	NBLK		6		/* set if no blocks to be kept */
194 
195 /*
196  *	Return codes from macro minions, determine where to jump,
197  *	how to repeat/reprocess text
198  */
199 #define	COMX		1		/* goto comx */
200 #define	COM		2		/* goto com */
201 
202 int	 skeqn(void);
203 int	 eof(void);
204 int	 _C1(void);
205 int	 _C(void);
206 int	 EQ(void);
207 int	 domacro(void);
208 int	 PS(void);
209 int	 skip(void);
210 int	 intbl(void);
211 int	 outtbl(void);
212 int	 so(void);
213 int	 nx(void);
214 int	 skiptocom(void);
215 int	 PP(pacmac);
216 int	 AU(void);
217 int	 SH(pacmac);
218 int	 UX(void);
219 int	 MMHU(pacmac);
220 int	 mesnblock(pacmac);
221 int	 mssnblock(pacmac);
222 int	 nf(void);
223 int	 ce(void);
224 int	 meip(pacmac);
225 int	 mepp(pacmac);
226 int	 mesh(pacmac);
227 int	 mefont(pacmac);
228 int	 manfont(pacmac);
229 int	 manpp(pacmac);
230 int	 macsort(const void *, const void *);
231 int	 sizetab(struct mactab *);
232 void	 getfname(void);
233 void	 textline(char *, int);
234 void	 work(void);
235 void	 regline(void (*)(char *, int), int);
236 void	 macro(void);
237 void	 tbl(void);
238 void	 stbl(void);
239 void	 eqn(void);
240 void	 backsl(void);
241 void	 sce(void);
242 void	 refer(int);
243 void	 inpic(void);
244 void	 msputmac(char *, int);
245 void	 msputwords(int);
246 void	 meputmac(char *, int);
247 void	 meputwords(int);
248 void	 noblock(char, char);
249 void	 defcomline(pacmac);
250 void	 comline(void);
251 void	 buildtab(struct mactab **, int *);
252 FILE	*opn(char *);
253 struct mactab *macfill(struct mactab *, struct mactab *);
254 __dead void usage(void);
255 
256 int
257 main(int ac, char **av)
258 {
259 	int	i, ch;
260 	int	errflg = 0;
261 	int	kflag = NO;
262 
263 	iflag = NO;
264 	wordflag = NO;
265 	msflag = NO;
266 	mac = ME;
267 	disp = NO;
268 	parag = NO;
269 	inmacro = NO;
270 	intable = NO;
271 	ldelim	= NOCHAR;
272 	rdelim	= NOCHAR;
273 	keepblock = YES;
274 
275 	while ((ch = getopt(ac, av, "ikpwm:")) != -1) {
276 		switch (ch) {
277 		case 'i':
278 			iflag = YES;
279 			break;
280 		case 'k':
281 			kflag = YES;
282 			break;
283 		case 'm':
284 			msflag = YES;
285 			keepblock = NO;
286 			switch (optarg[0]) {
287 			case 'm':
288 				mac = MM;
289 				break;
290 			case 's':
291 				mac = MS;
292 				break;
293 			case 'e':
294 				mac = ME;
295 				break;
296 			case 'a':
297 				mac = MA;
298 				break;
299 			case 'l':
300 				disp = YES;
301 				break;
302 			default:
303 				errflg++;
304 				break;
305 			}
306 			if (errflg == 0 && optarg[1] != '\0')
307 				errflg++;
308 			break;
309 		case 'p':
310 			parag = YES;
311 			break;
312 		case 'w':
313 			wordflag = YES;
314 			kflag = YES;
315 			break;
316 		default:
317 			errflg++;
318 		}
319 	}
320 	argc = ac - optind;
321 	argv = av + optind;
322 
323 	if (kflag)
324 		keepblock = YES;
325 	if (errflg)
326 		usage();
327 
328 #ifdef DEBUG
329 	printf("msflag = %d, mac = %s, keepblock = %d, disp = %d\n",
330 		msflag, mactab[mac], keepblock, disp);
331 #endif /* DEBUG */
332 	if (argc == 0) {
333 		infile = stdin;
334 	} else {
335 		infile = opn(argv[0]);
336 		--argc;
337 		++argv;
338 	}
339 	files[0] = infile;
340 	filesp = &files[0];
341 
342 	for (i = 'a'; i <= 'z' ; ++i)
343 		chars[i] = LETTER;
344 	for (i = 'A'; i <= 'Z'; ++i)
345 		chars[i] = LETTER;
346 	for (i = '0'; i <= '9'; ++i)
347 		chars[i] = DIGIT;
348 	chars['\''] = APOS;
349 	chars['&'] = APOS;
350 	chars['.'] = PUNCT;
351 	chars[','] = PUNCT;
352 	chars[';'] = PUNCT;
353 	chars['?'] = PUNCT;
354 	chars[':'] = PUNCT;
355 	work();
356 	exit(0);
357 }
358 
359 int
360 skeqn(void)
361 {
362 
363 	while ((c = getc(infile)) != rdelim) {
364 		if (c == EOF)
365 			c = eof();
366 		else if (c == '"') {
367 			while ((c = getc(infile)) != '"') {
368 				if (c == EOF ||
369 				    (c == '\\' && (c = getc(infile)) == EOF))
370 					c = eof();
371 			}
372 		}
373 	}
374 	if (msflag)
375 		return((c = 'x'));
376 	return((c = ' '));
377 }
378 
379 FILE *
380 opn(char *p)
381 {
382 	FILE *fd;
383 
384 	if ((fd = fopen(p, "r")) == NULL)
385 		err(1, "fopen %s", p);
386 
387 	return(fd);
388 }
389 
390 int
391 eof(void)
392 {
393 
394 	if (infile != stdin)
395 		fclose(infile);
396 	if (filesp > files)
397 		infile = *--filesp;
398 	else if (argc > 0) {
399 		infile = opn(argv[0]);
400 		--argc;
401 		++argv;
402 	} else
403 		exit(0);
404 	return(C);
405 }
406 
407 void
408 getfname(void)
409 {
410 	char *p;
411 	struct chain {
412 		struct chain *nextp;
413 		char *datap;
414 	} *q;
415 	static struct chain *namechain= NULL;
416 
417 	while (C == ' ')
418 		;	/* nothing */
419 
420 	for (p = fname ; p - fname < sizeof(fname) && (*p = c) != '\n' &&
421 	    c != ' ' && c != '\t' && c != '\\'; ++p)
422 		C;
423 	*p = '\0';
424 	while (c != '\n')
425 		C;
426 
427 	/* see if this name has already been used */
428 	for (q = namechain ; q; q = q->nextp)
429 		if (strcmp(fname, q->datap) == 0) {
430 			fname[0] = '\0';
431 			return;
432 		}
433 
434 	q = (struct chain *) malloc(sizeof(struct chain));
435 	if (q == NULL)
436 		err(1, NULL);
437 	q->nextp = namechain;
438 	q->datap = strdup(fname);
439 	if (q->datap == NULL)
440 		err(1, NULL);
441 	namechain = q;
442 }
443 
444 /*ARGSUSED*/
445 void
446 textline(char *str, int constant)
447 {
448 
449 	if (wordflag) {
450 		msputwords(0);
451 		return;
452 	}
453 	puts(str);
454 }
455 
456 void
457 work(void)
458 {
459 
460 	for (;;) {
461 		C;
462 #ifdef FULLDEBUG
463 		printf("Starting work with `%c'\n", c);
464 #endif /* FULLDEBUG */
465 		if (c == '.' || c == '\'')
466 			comline();
467 		else
468 			regline(textline, TWO);
469 	}
470 }
471 
472 void
473 regline(void (*pfunc)(char *, int), int constant)
474 {
475 
476 	line[0] = c;
477 	lp = line;
478 	while (lp - line < sizeof(line)) {
479 		if (c == '\\') {
480 			*lp = ' ';
481 			backsl();
482 		}
483 		if (c == '\n')
484 			break;
485 		if (intable && c == 'T') {
486 			*++lp = C;
487 			if (c == '{' || c == '}') {
488 				lp[-1] = ' ';
489 				*lp = C;
490 			}
491 		} else {
492 			*++lp = C;
493 		}
494 	}
495 	*lp = '\0';
496 
497 	if (line[0] != '\0')
498 		(*pfunc)(line, constant);
499 }
500 
501 void
502 macro(void)
503 {
504 
505 	if (msflag) {
506 		do {
507 			SKIP;
508 		} while (C!='.' || C!='.' || C=='.');	/* look for  .. */
509 		if (c != '\n')
510 			SKIP;
511 		return;
512 	}
513 	SKIP;
514 	inmacro = YES;
515 }
516 
517 void
518 tbl(void)
519 {
520 
521 	while (C != '.')
522 		;	/* nothing */
523 	SKIP;
524 	intable = YES;
525 }
526 
527 void
528 stbl(void)
529 {
530 
531 	while (C != '.')
532 		;	/* nothing */
533 	SKIP_TO_COM;
534 	if (c != 'T' || C != 'E') {
535 		SKIP;
536 		pc = c;
537 		while (C != '.' || pc != '\n' || C != 'T' || C != 'E')
538 			pc = c;
539 	}
540 }
541 
542 void
543 eqn(void)
544 {
545 	int c1, c2;
546 	int dflg;
547 	char last;
548 
549 	last=0;
550 	dflg = 1;
551 	SKIP;
552 
553 	for (;;) {
554 		if (C1 == '.'  || c == '\'') {
555 			while (C1 == ' ' || c == '\t')
556 				;
557 			if (c == 'E' && C1 == 'N') {
558 				SKIP;
559 				if (msflag && dflg) {
560 					putchar('x');
561 					putchar(' ');
562 					if (last) {
563 						putchar(last);
564 						putchar('\n');
565 					}
566 				}
567 				return;
568 			}
569 		} else if (c == 'd') {
570 			/* look for delim */
571 			if (C1 == 'e' && C1 == 'l')
572 				if (C1 == 'i' && C1 == 'm') {
573 					while (C1 == ' ')
574 						;	/* nothing */
575 
576 					if ((c1 = c) == '\n' ||
577 					    (c2 = C1) == '\n' ||
578 					    (c1 == 'o' && c2 == 'f' && C1=='f')) {
579 						ldelim = NOCHAR;
580 						rdelim = NOCHAR;
581 					} else {
582 						ldelim = c1;
583 						rdelim = c2;
584 					}
585 				}
586 			dflg = 0;
587 		}
588 
589 		if (c != '\n')
590 			while (C1 != '\n') {
591 				if (chars[c] == PUNCT)
592 					last = c;
593 				else if (c != ' ')
594 					last = 0;
595 			}
596 	}
597 }
598 
599 /* skip over a complete backslash construction */
600 void
601 backsl(void)
602 {
603 	int bdelim;
604 
605 sw:
606 	switch (C) {
607 	case '"':
608 		SKIP;
609 		return;
610 
611 	case 's':
612 		if (C == '\\')
613 			backsl();
614 		else {
615 			while (C >= '0' && c <= '9')
616 				;	/* nothing */
617 			ungetc(c, infile);
618 			c = '0';
619 		}
620 		--lp;
621 		return;
622 
623 	case 'f':
624 	case 'n':
625 	case '*':
626 		if (C != '(')
627 			return;
628 
629 	case '(':
630 		if (msflag) {
631 			if (C == 'e') {
632 				if (C == 'm') {
633 					*lp = '-';
634 					return;
635 				}
636 			}
637 			else if (c != '\n')
638 				C;
639 			return;
640 		}
641 		if (C != '\n')
642 			C;
643 		return;
644 
645 	case '$':
646 		C;	/* discard argument number */
647 		return;
648 
649 	case 'b':
650 	case 'x':
651 	case 'v':
652 	case 'h':
653 	case 'w':
654 	case 'o':
655 	case 'l':
656 	case 'L':
657 		if ((bdelim = C) == '\n')
658 			return;
659 		while (C != '\n' && c != bdelim)
660 			if (c == '\\')
661 				backsl();
662 		return;
663 
664 	case '\\':
665 		if (inmacro)
666 			goto sw;
667 
668 	default:
669 		return;
670 	}
671 }
672 
673 void
674 sce(void)
675 {
676 	char *ap;
677 	int n, i;
678 	char a[10];
679 
680 	for (ap = a; C != '\n'; ap++) {
681 		*ap = c;
682 		if (ap == &a[9]) {
683 			SKIP;
684 			ap = a;
685 			break;
686 		}
687 	}
688 	if (ap != a)
689 		n = atoi(a);
690 	else
691 		n = 1;
692 	for (i = 0; i < n;) {
693 		if (C == '.') {
694 			if (C == 'c') {
695 				if (C == 'e') {
696 					while (C == ' ')
697 						;	/* nothing */
698 					if (c == '0') {
699 						SKIP;
700 						break;
701 					} else
702 						SKIP;
703 				}
704 				else
705 					SKIP;
706 			} else if (c == 'P' || C == 'P') {
707 				if (c != '\n')
708 					SKIP;
709 				break;
710 			} else if (c != '\n')
711 				SKIP;
712 		} else {
713 			SKIP;
714 			i++;
715 		}
716 	}
717 }
718 
719 void
720 refer(int c1)
721 {
722 	int c2;
723 
724 	if (c1 != '\n')
725 		SKIP;
726 
727 	for (c2 = -1;;) {
728 		if (C != '.')
729 			SKIP;
730 		else {
731 			if (C != ']')
732 				SKIP;
733 			else {
734 				while (C != '\n')
735 					c2 = c;
736 				if (c2 != -1 && chars[c2] == PUNCT)
737 					putchar(c2);
738 				return;
739 			}
740 		}
741 	}
742 }
743 
744 void
745 inpic(void)
746 {
747 	int c1;
748 	char *p1;
749 
750 	SKIP;
751 	p1 = line;
752 	c = '\n';
753 	for (;;) {
754 		c1 = c;
755 		if (C == '.' && c1 == '\n') {
756 			if (C != 'P') {
757 				if (c == '\n')
758 					continue;
759 				else {
760 					SKIP;
761 					c = '\n';
762 					continue;
763 				}
764 			}
765 			if (C != 'E') {
766 				if (c == '\n')
767 					continue;
768 				else {
769 					SKIP;
770 					c = '\n';
771 					continue;
772 				}
773 			}
774 			SKIP;
775 			return;
776 		}
777 		else if (c == '\"') {
778 			while (C != '\"') {
779 				if (c == '\\') {
780 					if (C == '\"')
781 						continue;
782 					ungetc(c, infile);
783 					backsl();
784 				} else
785 					*p1++ = c;
786 			}
787 			*p1++ = ' ';
788 		}
789 		else if (c == '\n' && p1 != line) {
790 			*p1 = '\0';
791 			if (wordflag)
792 				msputwords(NO);
793 			else {
794 				puts(line);
795 				putchar('\n');
796 			}
797 			p1 = line;
798 		}
799 	}
800 }
801 
802 #ifdef DEBUG
803 int
804 _C1(void)
805 {
806 
807 	return(C1get);
808 }
809 
810 int
811 _C(void)
812 {
813 
814 	return(Cget);
815 }
816 #endif /* DEBUG */
817 
818 /*
819  *	Put out a macro line, using ms and mm conventions.
820  */
821 void
822 msputmac(char *s, int constant)
823 {
824 	char *t;
825 	int found;
826 	int last;
827 
828 	last = 0;
829 	found = 0;
830 	if (wordflag) {
831 		msputwords(YES);
832 		return;
833 	}
834 	while (*s) {
835 		while (*s == ' ' || *s == '\t')
836 			putchar(*s++);
837 		for (t = s ; *t != ' ' && *t != '\t' && *t != '\0' ; ++t)
838 			;	/* nothing */
839 		if (*s == '\"')
840 			s++;
841 		if (t > s + constant && chars[(unsigned char)s[0]] == LETTER &&
842 		    chars[(unsigned char)s[1]] == LETTER) {
843 			while (s < t)
844 				if (*s == '\"')
845 					s++;
846 				else
847 					putchar(*s++);
848 			last = *(t-1);
849 			found++;
850 		} else if (found && chars[(unsigned char)s[0]] == PUNCT &&
851 		    s[1] == '\0') {
852 			putchar(*s++);
853 		} else {
854 			last = *(t - 1);
855 			s = t;
856 		}
857 	}
858 	putchar('\n');
859 	if (msflag && chars[last] == PUNCT) {
860 		putchar(last);
861 		putchar('\n');
862 	}
863 }
864 
865 /*
866  *	put out words (for the -w option) with ms and mm conventions
867  */
868 void
869 msputwords(int macline)
870 {
871 	char *p, *p1;
872 	int i, nlet;
873 
874 	for (p1 = line;;) {
875 		/*
876 		 *	skip initial specials ampersands and apostrophes
877 		 */
878 		while (chars[(unsigned char)*p1] < DIGIT)
879 			if (*p1++ == '\0')
880 				return;
881 		nlet = 0;
882 		for (p = p1 ; (i = chars[(unsigned char)*p]) != SPECIAL ; ++p)
883 			if (i == LETTER)
884 				++nlet;
885 
886 		if (nlet > 1 && chars[(unsigned char)p1[0]] == LETTER) {
887 			/*
888 			 *	delete trailing ampersands and apostrophes
889 			 */
890 			while ((i = chars[(unsigned char)p[-1]]) == PUNCT ||
891 			    i == APOS )
892 				--p;
893 			while (p1 < p)
894 				putchar(*p1++);
895 			putchar('\n');
896 		} else {
897 			p1 = p;
898 		}
899 	}
900 }
901 
902 /*
903  *	put out a macro using the me conventions
904  */
905 #define SKIPBLANK(cp)	while (*cp == ' ' || *cp == '\t') { cp++; }
906 #define SKIPNONBLANK(cp) while (*cp !=' ' && *cp !='\cp' && *cp !='\0') { cp++; }
907 
908 void
909 meputmac(char *cp, int constant)
910 {
911 	char	*np;
912 	int	found;
913 	int	argno;
914 	int	last;
915 	int	inquote;
916 
917 	last = 0;
918 	found = 0;
919 	if (wordflag) {
920 		meputwords(YES);
921 		return;
922 	}
923 	for (argno = 0; *cp; argno++) {
924 		SKIPBLANK(cp);
925 		inquote = (*cp == '"');
926 		if (inquote)
927 			cp++;
928 		for (np = cp; *np; np++) {
929 			switch (*np) {
930 			case '\n':
931 			case '\0':
932 				break;
933 
934 			case '\t':
935 			case ' ':
936 				if (inquote)
937 					continue;
938 				else
939 					goto endarg;
940 
941 			case '"':
942 				if (inquote && np[1] == '"') {
943 					memmove(np, np + 1, strlen(np));
944 					np++;
945 					continue;
946 				} else {
947 					*np = ' '; 	/* bye bye " */
948 					goto endarg;
949 				}
950 
951 			default:
952 				continue;
953 			}
954 		}
955 		endarg: ;
956 		/*
957 		 *	cp points at the first char in the arg
958 		 *	np points one beyond the last char in the arg
959 		 */
960 		if ((argconcat == 0) || (argconcat != argno))
961 			putchar(' ');
962 #ifdef FULLDEBUG
963 		{
964 			char	*p;
965 			printf("[%d,%d: ", argno, np - cp);
966 			for (p = cp; p < np; p++) {
967 				putchar(*p);
968 			}
969 			printf("]");
970 		}
971 #endif /* FULLDEBUG */
972 		/*
973 		 *	Determine if the argument merits being printed
974 		 *
975 		 *	constant is the cut off point below which something
976 		 *	is not a word.
977 		 */
978 		if (((np - cp) > constant) &&
979 		    (inquote || (chars[(unsigned char)cp[0]] == LETTER))) {
980 			for (cp = cp; cp < np; cp++)
981 				putchar(*cp);
982 			last = np[-1];
983 			found++;
984 		} else if (found && (np - cp == 1) &&
985 		    chars[(unsigned char)*cp] == PUNCT) {
986 			putchar(*cp);
987 		} else {
988 			last = np[-1];
989 		}
990 		cp = np;
991 	}
992 	if (msflag && chars[last] == PUNCT)
993 		putchar(last);
994 	putchar('\n');
995 }
996 
997 /*
998  *	put out words (for the -w option) with ms and mm conventions
999  */
1000 void
1001 meputwords(int macline)
1002 {
1003 
1004 	msputwords(macline);
1005 }
1006 
1007 /*
1008  *
1009  *	Skip over a nested set of macros
1010  *
1011  *	Possible arguments to noblock are:
1012  *
1013  *	fi	end of unfilled text
1014  *	PE	pic ending
1015  *	DE	display ending
1016  *
1017  *	for ms and mm only:
1018  *		KE	keep ending
1019  *
1020  *		NE	undocumented match to NS (for mm?)
1021  *		LE	mm only: matches RL or *L (for lists)
1022  *
1023  *	for me:
1024  *		([lqbzcdf]
1025  */
1026 void
1027 noblock(char a1, char a2)
1028 {
1029 	int c1,c2;
1030 	int eqnf;
1031 	int lct;
1032 
1033 	lct = 0;
1034 	eqnf = 1;
1035 	SKIP;
1036 	for (;;) {
1037 		while (C != '.')
1038 			if (c == '\n')
1039 				continue;
1040 			else
1041 				SKIP;
1042 		if ((c1 = C) == '\n')
1043 			continue;
1044 		if ((c2 = C) == '\n')
1045 			continue;
1046 		if (c1 == a1 && c2 == a2) {
1047 			SKIP;
1048 			if (lct != 0) {
1049 				lct--;
1050 				continue;
1051 			}
1052 			if (eqnf)
1053 				putchar('.');
1054 			putchar('\n');
1055 			return;
1056 		} else if (a1 == 'L' && c2 == 'L') {
1057 			lct++;
1058 			SKIP;
1059 		}
1060 		/*
1061 		 *	equations (EQ) nested within a display
1062 		 */
1063 		else if (c1 == 'E' && c2 == 'Q') {
1064 			if ((mac == ME && a1 == ')')
1065 			    || (mac != ME && a1 == 'D')) {
1066 				eqn();
1067 				eqnf=0;
1068 			}
1069 		}
1070 		/*
1071 		 *	turning on filling is done by the paragraphing
1072 		 *	macros
1073 		 */
1074 		else if (a1 == 'f') {	/* .fi */
1075 			if  ((mac == ME && (c2 == 'h' || c2 == 'p'))
1076 			    || (mac != ME && (c1 == 'P' || c2 == 'P'))) {
1077 				SKIP;
1078 				return;
1079 			}
1080 		} else {
1081 			SKIP;
1082 		}
1083 	}
1084 }
1085 
1086 int
1087 EQ(void)
1088 {
1089 
1090 	eqn();
1091 	return(0);
1092 }
1093 
1094 int
1095 domacro(void)
1096 {
1097 
1098 	macro();
1099 	return(0);
1100 }
1101 
1102 int
1103 PS(void)
1104 {
1105 
1106 	for (C; c == ' ' || c == '\t'; C)
1107 		;	/* nothing */
1108 
1109 	if (c == '<') {		/* ".PS < file" -- don't expect a .PE */
1110 		SKIP;
1111 		return(0);
1112 	}
1113 	if (!msflag)
1114 		inpic();
1115 	else
1116 		noblock('P', 'E');
1117 	return(0);
1118 }
1119 
1120 int
1121 skip(void)
1122 {
1123 
1124 	SKIP;
1125 	return(0);
1126 }
1127 
1128 int
1129 intbl(void)
1130 {
1131 
1132 	if (msflag)
1133 		stbl();
1134 	else
1135 		tbl();
1136 	return(0);
1137 }
1138 
1139 int
1140 outtbl(void)
1141 {
1142 
1143 	intable = NO;
1144 	return(0);
1145 }
1146 
1147 int
1148 so(void)
1149 {
1150 
1151 	if (!iflag) {
1152 		getfname();
1153 		if (fname[0]) {
1154 			if (++filesp - &files[0] > MAXFILES)
1155 				err(1, "too many nested files (max %d)",
1156 				    MAXFILES);
1157 			infile = *filesp = opn(fname);
1158 		}
1159 	}
1160 	return(0);
1161 }
1162 
1163 int
1164 nx(void)
1165 {
1166 
1167 	if (!iflag) {
1168 		getfname();
1169 		if (fname[0] == '\0')
1170 			exit(0);
1171 		if (infile != stdin)
1172 			fclose(infile);
1173 		infile = *filesp = opn(fname);
1174 	}
1175 	return(0);
1176 }
1177 
1178 int
1179 skiptocom(void)
1180 {
1181 
1182 	SKIP_TO_COM;
1183 	return(COMX);
1184 }
1185 
1186 int
1187 PP(pacmac c12)
1188 {
1189 	int c1, c2;
1190 
1191 	frommac(c12, c1, c2);
1192 	printf(".%c%c", c1, c2);
1193 	while (C != '\n')
1194 		putchar(c);
1195 	putchar('\n');
1196 	return(0);
1197 }
1198 
1199 int
1200 AU(void)
1201 {
1202 
1203 	if (mac == MM)
1204 		return(0);
1205 	SKIP_TO_COM;
1206 	return(COMX);
1207 }
1208 
1209 int
1210 SH(pacmac c12)
1211 {
1212 	int c1, c2;
1213 
1214 	frommac(c12, c1, c2);
1215 
1216 	if (parag) {
1217 		printf(".%c%c", c1, c2);
1218 		while (C != '\n')
1219 			putchar(c);
1220 		putchar(c);
1221 		putchar('!');
1222 		for (;;) {
1223 			while (C != '\n')
1224 				putchar(c);
1225 			putchar('\n');
1226 			if (C == '.')
1227 				return(COM);
1228 			putchar('!');
1229 			putchar(c);
1230 		}
1231 		/*NOTREACHED*/
1232 	} else {
1233 		SKIP_TO_COM;
1234 		return(COMX);
1235 	}
1236 }
1237 
1238 int
1239 UX(void)
1240 {
1241 
1242 	if (wordflag)
1243 		printf("UNIX\n");
1244 	else
1245 		printf("UNIX ");
1246 	return(0);
1247 }
1248 
1249 int
1250 MMHU(pacmac c12)
1251 {
1252 	int c1, c2;
1253 
1254 	frommac(c12, c1, c2);
1255 	if (parag) {
1256 		printf(".%c%c", c1, c2);
1257 		while (C != '\n')
1258 			putchar(c);
1259 		putchar('\n');
1260 	} else {
1261 		SKIP;
1262 	}
1263 	return(0);
1264 }
1265 
1266 int
1267 mesnblock(pacmac c12)
1268 {
1269 	int c1, c2;
1270 
1271 	frommac(c12, c1, c2);
1272 	noblock(')', c2);
1273 	return(0);
1274 }
1275 
1276 int
1277 mssnblock(pacmac c12)
1278 {
1279 	int c1, c2;
1280 
1281 	frommac(c12, c1, c2);
1282 	noblock(c1, 'E');
1283 	return(0);
1284 }
1285 
1286 int
1287 nf(void)
1288 {
1289 
1290 	noblock('f', 'i');
1291 	return(0);
1292 }
1293 
1294 int
1295 ce(void)
1296 {
1297 
1298 	sce();
1299 	return(0);
1300 }
1301 
1302 int
1303 meip(pacmac c12)
1304 {
1305 
1306 	if (parag)
1307 		mepp(c12);
1308 	else if (wordflag)	/* save the tag */
1309 		regline(meputmac, ONE);
1310 	else
1311 		SKIP;
1312 	return(0);
1313 }
1314 
1315 /*
1316  *	only called for -me .pp or .sh, when parag is on
1317  */
1318 int
1319 mepp(pacmac c12)
1320 {
1321 
1322 	PP(c12);		/* eats the line */
1323 	return(0);
1324 }
1325 
1326 /*
1327  *	Start of a section heading; output the section name if doing words
1328  */
1329 int
1330 mesh(pacmac c12)
1331 {
1332 
1333 	if (parag)
1334 		mepp(c12);
1335 	else if (wordflag)
1336 		defcomline(c12);
1337 	else
1338 		SKIP;
1339 	return(0);
1340 }
1341 
1342 /*
1343  *	process a font setting
1344  */
1345 int
1346 mefont(pacmac c12)
1347 {
1348 
1349 	argconcat = 1;
1350 	defcomline(c12);
1351 	argconcat = 0;
1352 	return(0);
1353 }
1354 
1355 int
1356 manfont(pacmac c12)
1357 {
1358 
1359 	return(mefont(c12));
1360 }
1361 
1362 int
1363 manpp(pacmac c12)
1364 {
1365 
1366 	return(mepp(c12));
1367 }
1368 
1369 void
1370 defcomline(pacmac c12)
1371 {
1372 	int c1, c2;
1373 
1374 	frommac(c12, c1, c2);
1375 	if (msflag && mac == MM && c2 == 'L') {
1376 		if (disp || c1 == 'R') {
1377 			noblock('L', 'E');
1378 		} else {
1379 			SKIP;
1380 			putchar('.');
1381 		}
1382 	}
1383 	else if (c1 == '.' && c2 == '.') {
1384 		if (msflag) {
1385 			SKIP;
1386 			return;
1387 		}
1388 		while (C == '.')
1389 			/*VOID*/;
1390 	}
1391 	++inmacro;
1392 	/*
1393 	 *	Process the arguments to the macro
1394 	 */
1395 	switch (mac) {
1396 	default:
1397 	case MM:
1398 	case MS:
1399 		if (c1 <= 'Z' && msflag)
1400 			regline(msputmac, ONE);
1401 		else
1402 			regline(msputmac, TWO);
1403 		break;
1404 	case ME:
1405 		regline(meputmac, ONE);
1406 		break;
1407 	}
1408 	--inmacro;
1409 }
1410 
1411 void
1412 comline(void)
1413 {
1414 	int	c1;
1415 	int	c2;
1416 	pacmac	c12;
1417 	int	mid;
1418 	int	lb, ub;
1419 	int	hit;
1420 	static	int	tabsize = 0;
1421 	static	struct	mactab	*mactab = (struct mactab *)0;
1422 	struct	mactab	*mp;
1423 
1424 	if (mactab == 0)
1425 		 buildtab(&mactab, &tabsize);
1426 com:
1427 	while (C == ' ' || c == '\t')
1428 		;
1429 comx:
1430 	if ((c1 = c) == '\n')
1431 		return;
1432 	c2 = C;
1433 	if (c1 == '.' && c2 != '.')
1434 		inmacro = NO;
1435 	if (msflag && c1 == '[') {
1436 		refer(c2);
1437 		return;
1438 	}
1439 	if (parag && mac==MM && c1 == 'P' && c2 == '\n') {
1440 		printf(".P\n");
1441 		return;
1442 	}
1443 	if (c2 == '\n')
1444 		return;
1445 	/*
1446 	 *	Single letter macro
1447 	 */
1448 	if (mac == ME && (c2 == ' ' || c2 == '\t') )
1449 		c2 = ' ';
1450 	c12 = tomac(c1, c2);
1451 	/*
1452 	 *	binary search through the table of macros
1453 	 */
1454 	lb = 0;
1455 	ub = tabsize - 1;
1456 	while (lb <= ub) {
1457 		mid = (ub + lb) / 2;
1458 		mp = &mactab[mid];
1459 		if (mp->macname < c12)
1460 			lb = mid + 1;
1461 		else if (mp->macname > c12)
1462 			ub = mid - 1;
1463 		else {
1464 			hit = 1;
1465 #ifdef FULLDEBUG
1466 			printf("preliminary hit macro %c%c ", c1, c2);
1467 #endif /* FULLDEBUG */
1468 			switch (mp->condition) {
1469 			case NONE:
1470 				hit = YES;
1471 				break;
1472 			case FNEST:
1473 				hit = (filesp == files);
1474 				break;
1475 			case NOMAC:
1476 				hit = !inmacro;
1477 				break;
1478 			case MAC:
1479 				hit = inmacro;
1480 				break;
1481 			case PARAG:
1482 				hit = parag;
1483 				break;
1484 			case NBLK:
1485 				hit = !keepblock;
1486 				break;
1487 			default:
1488 				hit = 0;
1489 			}
1490 
1491 			if (hit) {
1492 #ifdef FULLDEBUG
1493 				printf("MATCH\n");
1494 #endif /* FULLDEBUG */
1495 				switch ((*(mp->func))(c12)) {
1496 				default:
1497 					return;
1498 				case COMX:
1499 					goto comx;
1500 				case COM:
1501 					goto com;
1502 				}
1503 			}
1504 #ifdef FULLDEBUG
1505 			printf("FAIL\n");
1506 #endif /* FULLDEBUG */
1507 			break;
1508 		}
1509 	}
1510 	defcomline(c12);
1511 }
1512 
1513 int
1514 macsort(const void *p1, const void *p2)
1515 {
1516 	struct mactab *t1 = (struct mactab *)p1;
1517 	struct mactab *t2 = (struct mactab *)p2;
1518 
1519 	return(t1->macname - t2->macname);
1520 }
1521 
1522 int
1523 sizetab(struct mactab *mp)
1524 {
1525 	int i;
1526 
1527 	i = 0;
1528 	if (mp) {
1529 		for (; mp->macname; mp++, i++)
1530 			/*VOID*/ ;
1531 	}
1532 	return(i);
1533 }
1534 
1535 struct mactab *
1536 macfill(struct mactab *dst, struct mactab *src)
1537 {
1538 
1539 	if (src) {
1540 		while (src->macname)
1541 			*dst++ = *src++;
1542 	}
1543 	return(dst);
1544 }
1545 
1546 __dead void
1547 usage(void)
1548 {
1549 	extern char *__progname;
1550 
1551 	fprintf(stderr, "usage: %s [-ikpw] [-m a | e | l | m | s] [file ...]\n", __progname);
1552 	exit(1);
1553 }
1554 
1555 void
1556 buildtab(struct mactab **r_back, int *r_size)
1557 {
1558 	int	size;
1559 	struct	mactab	*p, *p1, *p2;
1560 	struct	mactab	*back;
1561 
1562 	size = sizetab(troffmactab) + sizetab(ppmactab);
1563 	p1 = p2 = NULL;
1564 	if (msflag) {
1565 		switch (mac) {
1566 		case ME:
1567 			p1 = memactab;
1568 			break;
1569 		case MM:
1570 			p1 = msmactab;
1571 			p2 = mmmactab;
1572 			break;
1573 		case MS:
1574 			p1 = msmactab;
1575 			break;
1576 		case MA:
1577 			p1 = manmactab;
1578 			break;
1579 		default:
1580 			break;
1581 		}
1582 	}
1583 	size += sizetab(p1);
1584 	size += sizetab(p2);
1585 	back = (struct mactab *)calloc(size+2, sizeof(struct mactab));
1586 	if (back == NULL)
1587 		err(1, NULL);
1588 
1589 	p = macfill(back, troffmactab);
1590 	p = macfill(p, ppmactab);
1591 	p = macfill(p, p1);
1592 	p = macfill(p, p2);
1593 
1594 	qsort(back, size, sizeof(struct mactab), macsort);
1595 	*r_size = size;
1596 	*r_back = back;
1597 }
1598 
1599 /*
1600  *	troff commands
1601  */
1602 struct	mactab	troffmactab[] = {
1603 	M(NONE,		'\\','"',	skip),	/* comment */
1604 	M(NOMAC,	'd','e',	domacro),	/* define */
1605 	M(NOMAC,	'i','g',	domacro),	/* ignore till .. */
1606 	M(NOMAC,	'a','m',	domacro),	/* append macro */
1607 	M(NBLK,		'n','f',	nf),	/* filled */
1608 	M(NBLK,		'c','e',	ce),	/* centered */
1609 
1610 	M(NONE,		's','o',	so),	/* source a file */
1611 	M(NONE,		'n','x',	nx),	/* go to next file */
1612 
1613 	M(NONE,		't','m',	skip),	/* print string on tty */
1614 	M(NONE,		'h','w',	skip),	/* exception hyphen words */
1615 	M(NONE,		0,0,		0)
1616 };
1617 
1618 /*
1619  *	Preprocessor output
1620  */
1621 struct	mactab	ppmactab[] = {
1622 	M(FNEST,	'E','Q',	EQ),	/* equation starting */
1623 	M(FNEST,	'T','S',	intbl),	/* table starting */
1624 	M(FNEST,	'T','C',	intbl),	/* alternative table? */
1625 	M(FNEST,	'T','&',	intbl),	/* table reformatting */
1626 	M(NONE,		'T','E',	outtbl),/* table ending */
1627 	M(NONE,		'P','S',	PS),	/* picture starting */
1628 	M(NONE,		0,0,		0)
1629 };
1630 
1631 /*
1632  *	Particular to ms and mm
1633  */
1634 struct	mactab	msmactab[] = {
1635 	M(NONE,		'T','L',	skiptocom),	/* title follows */
1636 	M(NONE,		'F','S',	skiptocom),	/* start footnote */
1637 	M(NONE,		'O','K',	skiptocom),	/* Other kws */
1638 
1639 	M(NONE,		'N','R',	skip),	/* undocumented */
1640 	M(NONE,		'N','D',	skip),	/* use supplied date */
1641 
1642 	M(PARAG,	'P','P',	PP),	/* begin parag */
1643 	M(PARAG,	'I','P',	PP),	/* begin indent parag, tag x */
1644 	M(PARAG,	'L','P',	PP),	/* left blocked parag */
1645 
1646 	M(NONE,		'A','U',	AU),	/* author */
1647 	M(NONE,		'A','I',	AU),	/* authors institution */
1648 
1649 	M(NONE,		'S','H',	SH),	/* section heading */
1650 	M(NONE,		'S','N',	SH),	/* undocumented */
1651 	M(NONE,		'U','X',	UX),	/* unix */
1652 
1653 	M(NBLK,		'D','S',	mssnblock),	/* start display text */
1654 	M(NBLK,		'K','S',	mssnblock),	/* start keep */
1655 	M(NBLK,		'K','F',	mssnblock),	/* start float keep */
1656 	M(NONE,		0,0,		0)
1657 };
1658 
1659 struct	mactab	mmmactab[] = {
1660 	M(NONE,		'H',' ',	MMHU),	/* -mm ? */
1661 	M(NONE,		'H','U',	MMHU),	/* -mm ? */
1662 	M(PARAG,	'P',' ',	PP),	/* paragraph for -mm */
1663 	M(NBLK,		'N','S',	mssnblock),	/* undocumented */
1664 	M(NONE,		0,0,		0)
1665 };
1666 
1667 struct	mactab	memactab[] = {
1668 	M(PARAG,	'p','p',	mepp),
1669 	M(PARAG,	'l','p',	mepp),
1670 	M(PARAG,	'n','p',	mepp),
1671 	M(NONE,		'i','p',	meip),
1672 
1673 	M(NONE,		's','h',	mesh),
1674 	M(NONE,		'u','h',	mesh),
1675 
1676 	M(NBLK,		'(','l',	mesnblock),
1677 	M(NBLK,		'(','q',	mesnblock),
1678 	M(NBLK,		'(','b',	mesnblock),
1679 	M(NBLK,		'(','z',	mesnblock),
1680 	M(NBLK,		'(','c',	mesnblock),
1681 
1682 	M(NBLK,		'(','d',	mesnblock),
1683 	M(NBLK,		'(','f',	mesnblock),
1684 	M(NBLK,		'(','x',	mesnblock),
1685 
1686 	M(NONE,		'r',' ',	mefont),
1687 	M(NONE,		'i',' ',	mefont),
1688 	M(NONE,		'b',' ',	mefont),
1689 	M(NONE,		'u',' ',	mefont),
1690 	M(NONE,		'q',' ',	mefont),
1691 	M(NONE,		'r','b',	mefont),
1692 	M(NONE,		'b','i',	mefont),
1693 	M(NONE,		'b','x',	mefont),
1694 	M(NONE,		0,0,		0)
1695 };
1696 
1697 struct	mactab	manmactab[] = {
1698 	M(PARAG,	'B','I',	manfont),
1699 	M(PARAG,	'B','R',	manfont),
1700 	M(PARAG,	'I','B',	manfont),
1701 	M(PARAG,	'I','R',	manfont),
1702 	M(PARAG,	'R','B',	manfont),
1703 	M(PARAG,	'R','I',	manfont),
1704 
1705 	M(PARAG,	'P','P',	manpp),
1706 	M(PARAG,	'L','P',	manpp),
1707 	M(PARAG,	'H','P',	manpp),
1708 	M(NONE,		0,0,		0)
1709 };
1710