xref: /openbsd-src/usr.bin/deroff/deroff.c (revision fc405d53b73a2d73393cb97f684863d17b583e38)
1 /*	$OpenBSD: deroff.c,v 1.17 2023/03/08 04:43:10 guenther Exp $	*/
2 
3 /*-
4  * Copyright (c) 1988, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 /*
32  * Copyright (C) Caldera International Inc.  2001-2002.
33  * All rights reserved.
34  *
35  * Redistribution and use in source and binary forms, with or without
36  * modification, are permitted provided that the following conditions
37  * are met:
38  * 1. Redistributions of source code and documentation must retain the above
39  *    copyright notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgement:
45  *	This product includes software developed or owned by Caldera
46  *	International, Inc.
47  * 4. Neither the name of Caldera International, Inc. nor the names of other
48  *    contributors may be used to endorse or promote products derived from
49  *    this software without specific prior written permission.
50  *
51  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
52  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
53  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
54  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
55  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
56  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
57  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
58  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
60  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
61  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62  * POSSIBILITY OF SUCH DAMAGE.
63  */
64 
65 #include <err.h>
66 #include <limits.h>
67 #include <stdio.h>
68 #include <stdlib.h>
69 #include <string.h>
70 #include <unistd.h>
71 
72 /*
73  *	Deroff command -- strip troff, eqn, and Tbl sequences from
74  *	a file.  Has two flags argument, -w, to cause output one word per line
75  *	rather than in the original format.
76  *	-mm (or -ms) causes the corresponding macro's to be interpreted
77  *	so that just sentences are output
78  *	-ml  also gets rid of lists.
79  *	Deroff follows .so and .nx commands, removes contents of macro
80  *	definitions, equations (both .EQ ... .EN and $...$),
81  *	Tbl command sequences, and Troff backslash constructions.
82  *
83  *	All input is through the Cget macro;
84  *	the most recently read character is in c.
85  *
86  *	Modified by Robert Henry to process -me and -man macros.
87  */
88 
89 #define Cget ( (c=getc(infile)) == EOF ? eof() : ((c==ldelim)&&(filesp==files) ? skeqn() : c) )
90 #define C1get ( (c=getc(infile)) == EOF ? eof() :  c)
91 
92 #ifdef DEBUG
93 #  define C	_C()
94 #  define C1	_C1()
95 #else /* not DEBUG */
96 #  define C	Cget
97 #  define C1	C1get
98 #endif /* not DEBUG */
99 
100 #define SKIP while (C != '\n')
101 #define SKIP_TO_COM SKIP; SKIP; pc=c; while (C != '.' || pc != '\n' || C > 'Z')pc=c
102 
103 #define	YES 1
104 #define	NO 0
105 #define	MS 0	/* -ms */
106 #define	MM 1	/* -mm */
107 #define	ME 2	/* -me */
108 #define	MA 3	/* -man */
109 
110 #ifdef DEBUG
111 char *mactab[] = { "-ms", "-mm", "-me", "-ma" };
112 #endif /* DEBUG */
113 
114 #define	ONE 1
115 #define	TWO 2
116 
117 #define NOCHAR -2
118 #define SPECIAL 0
119 #define APOS 1
120 #define PUNCT 2
121 #define DIGIT 3
122 #define LETTER 4
123 
124 #define MAXFILES 20
125 
126 int	iflag;
127 int	wordflag;
128 int	msflag;		/* processing a source written using a mac package */
129 int	mac;		/* which package */
130 int	disp;
131 int	parag;
132 int	inmacro;
133 int	intable;
134 int	keepblock;	/* keep blocks of text; normally false when msflag */
135 
136 char chars[128];  /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */
137 
138 char line[LINE_MAX];
139 char *lp;
140 
141 int c;
142 int pc;
143 int ldelim;
144 int rdelim;
145 
146 char fname[PATH_MAX];
147 FILE *files[MAXFILES];
148 FILE **filesp;
149 FILE *infile;
150 
151 int argc;
152 char **argv;
153 
154 /*
155  *	Macro processing
156  *
157  *	Macro table definitions
158  */
159 typedef	int pacmac;		/* compressed macro name */
160 int	argconcat = 0;		/* concat arguments together (-me only) */
161 
162 #define	tomac(c1, c2)		((((c1) & 0xFF) << 8) | ((c2) & 0xFF))
163 #define	frommac(src, c1, c2)	(((c1)=((src)>>8)&0xFF),((c2) =(src)&0xFF))
164 
165 struct mactab{
166 	int	condition;
167 	pacmac	macname;
168 	int	(*func)();	/* XXX - args */
169 };
170 
171 struct	mactab	troffmactab[];
172 struct	mactab	ppmactab[];
173 struct	mactab	msmactab[];
174 struct	mactab	mmmactab[];
175 struct	mactab	memactab[];
176 struct	mactab	manmactab[];
177 
178 /*
179  *	Macro table initialization
180  */
181 #define	M(cond, c1, c2, func) {cond, tomac(c1, c2), func}
182 
183 /*
184  *	Flags for matching conditions other than
185  *	the macro name
186  */
187 #define	NONE		0
188 #define	FNEST		1		/* no nested files */
189 #define	NOMAC		2		/* no macro */
190 #define	MAC		3		/* macro */
191 #define	PARAG		4		/* in a paragraph */
192 #define	MSF		5		/* msflag is on */
193 #define	NBLK		6		/* set if no blocks to be kept */
194 
195 /*
196  *	Return codes from macro minions, determine where to jump,
197  *	how to repeat/reprocess text
198  */
199 #define	COMX		1		/* goto comx */
200 #define	COM		2		/* goto com */
201 
202 int	 skeqn(void);
203 int	 eof(void);
204 int	 _C1(void);
205 int	 _C(void);
206 int	 EQ(void);
207 int	 domacro(void);
208 int	 PS(void);
209 int	 skip(void);
210 int	 intbl(void);
211 int	 outtbl(void);
212 int	 so(void);
213 int	 nx(void);
214 int	 skiptocom(void);
215 int	 PP(pacmac);
216 int	 AU(void);
217 int	 SH(pacmac);
218 int	 UX(void);
219 int	 MMHU(pacmac);
220 int	 mesnblock(pacmac);
221 int	 mssnblock(pacmac);
222 int	 nf(void);
223 int	 ce(void);
224 int	 meip(pacmac);
225 int	 mepp(pacmac);
226 int	 mesh(pacmac);
227 int	 mefont(pacmac);
228 int	 manfont(pacmac);
229 int	 manpp(pacmac);
230 int	 macsort(const void *, const void *);
231 int	 sizetab(struct mactab *);
232 void	 getfname(void);
233 void	 textline(char *, int);
234 void	 work(void);
235 void	 regline(void (*)(char *, int), int);
236 void	 macro(void);
237 void	 tbl(void);
238 void	 stbl(void);
239 void	 eqn(void);
240 void	 backsl(void);
241 void	 sce(void);
242 void	 refer(int);
243 void	 inpic(void);
244 void	 msputmac(char *, int);
245 void	 msputwords(int);
246 void	 meputmac(char *, int);
247 void	 meputwords(int);
248 void	 noblock(char, char);
249 void	 defcomline(pacmac);
250 void	 comline(void);
251 void	 buildtab(struct mactab **, int *);
252 FILE	*opn(char *);
253 struct mactab *macfill(struct mactab *, struct mactab *);
254 __dead void usage(void);
255 
256 int
257 main(int ac, char **av)
258 {
259 	int	i, ch;
260 	int	errflg = 0;
261 	int	kflag = NO;
262 
263 	if (pledge("stdio rpath", NULL) == -1)
264 		err(1, "pledge");
265 
266 	iflag = NO;
267 	wordflag = NO;
268 	msflag = NO;
269 	mac = ME;
270 	disp = NO;
271 	parag = NO;
272 	inmacro = NO;
273 	intable = NO;
274 	ldelim	= NOCHAR;
275 	rdelim	= NOCHAR;
276 	keepblock = YES;
277 
278 	while ((ch = getopt(ac, av, "ikpwm:")) != -1) {
279 		switch (ch) {
280 		case 'i':
281 			iflag = YES;
282 			break;
283 		case 'k':
284 			kflag = YES;
285 			break;
286 		case 'm':
287 			msflag = YES;
288 			keepblock = NO;
289 			switch (optarg[0]) {
290 			case 'm':
291 				mac = MM;
292 				break;
293 			case 's':
294 				mac = MS;
295 				break;
296 			case 'e':
297 				mac = ME;
298 				break;
299 			case 'a':
300 				mac = MA;
301 				break;
302 			case 'l':
303 				disp = YES;
304 				break;
305 			default:
306 				errflg = 1;
307 				break;
308 			}
309 			if (optarg[1] != '\0')
310 				errflg = 1;
311 			break;
312 		case 'p':
313 			parag = YES;
314 			break;
315 		case 'w':
316 			wordflag = YES;
317 			kflag = YES;
318 			break;
319 		default:
320 			errflg = 1;
321 		}
322 	}
323 	argc = ac - optind;
324 	argv = av + optind;
325 
326 	if (kflag)
327 		keepblock = YES;
328 	if (errflg)
329 		usage();
330 
331 #ifdef DEBUG
332 	printf("msflag = %d, mac = %s, keepblock = %d, disp = %d\n",
333 		msflag, mactab[mac], keepblock, disp);
334 #endif /* DEBUG */
335 	if (argc == 0) {
336 		infile = stdin;
337 	} else {
338 		infile = opn(argv[0]);
339 		--argc;
340 		++argv;
341 	}
342 	files[0] = infile;
343 	filesp = &files[0];
344 
345 	for (i = 'a'; i <= 'z'; ++i)
346 		chars[i] = LETTER;
347 	for (i = 'A'; i <= 'Z'; ++i)
348 		chars[i] = LETTER;
349 	for (i = '0'; i <= '9'; ++i)
350 		chars[i] = DIGIT;
351 	chars['\''] = APOS;
352 	chars['&'] = APOS;
353 	chars['.'] = PUNCT;
354 	chars[','] = PUNCT;
355 	chars[';'] = PUNCT;
356 	chars['?'] = PUNCT;
357 	chars[':'] = PUNCT;
358 	work();
359 	exit(0);
360 }
361 
362 int
363 skeqn(void)
364 {
365 
366 	while ((c = getc(infile)) != rdelim) {
367 		if (c == EOF)
368 			c = eof();
369 		else if (c == '"') {
370 			while ((c = getc(infile)) != '"') {
371 				if (c == EOF ||
372 				    (c == '\\' && (c = getc(infile)) == EOF))
373 					c = eof();
374 			}
375 		}
376 	}
377 	if (msflag)
378 		return((c = 'x'));
379 	return((c = ' '));
380 }
381 
382 FILE *
383 opn(char *p)
384 {
385 	FILE *fd;
386 
387 	if ((fd = fopen(p, "r")) == NULL)
388 		err(1, "fopen %s", p);
389 
390 	return(fd);
391 }
392 
393 int
394 eof(void)
395 {
396 
397 	if (infile != stdin)
398 		fclose(infile);
399 	if (filesp > files)
400 		infile = *--filesp;
401 	else if (argc > 0) {
402 		infile = opn(argv[0]);
403 		--argc;
404 		++argv;
405 	} else
406 		exit(0);
407 	return(C);
408 }
409 
410 void
411 getfname(void)
412 {
413 	char *p;
414 	struct chain {
415 		struct chain *nextp;
416 		char *datap;
417 	} *q;
418 	static struct chain *namechain= NULL;
419 
420 	while (C == ' ')
421 		;	/* nothing */
422 
423 	for (p = fname ; p - fname < sizeof(fname) && (*p = c) != '\n' &&
424 	    c != ' ' && c != '\t' && c != '\\'; ++p)
425 		C;
426 	*p = '\0';
427 	while (c != '\n')
428 		C;
429 
430 	/* see if this name has already been used */
431 	for (q = namechain ; q; q = q->nextp)
432 		if (strcmp(fname, q->datap) == 0) {
433 			fname[0] = '\0';
434 			return;
435 		}
436 
437 	q = malloc(sizeof(struct chain));
438 	if (q == NULL)
439 		err(1, NULL);
440 	q->nextp = namechain;
441 	q->datap = strdup(fname);
442 	if (q->datap == NULL)
443 		err(1, NULL);
444 	namechain = q;
445 }
446 
447 void
448 textline(char *str, int constant)
449 {
450 
451 	if (wordflag) {
452 		msputwords(0);
453 		return;
454 	}
455 	puts(str);
456 }
457 
458 void
459 work(void)
460 {
461 
462 	for (;;) {
463 		C;
464 #ifdef FULLDEBUG
465 		printf("Starting work with `%c'\n", c);
466 #endif /* FULLDEBUG */
467 		if (c == '.' || c == '\'')
468 			comline();
469 		else
470 			regline(textline, TWO);
471 	}
472 }
473 
474 void
475 regline(void (*pfunc)(char *, int), int constant)
476 {
477 
478 	line[0] = c;
479 	lp = line;
480 	while (lp - line < sizeof(line)) {
481 		if (c == '\\') {
482 			*lp = ' ';
483 			backsl();
484 		}
485 		if (c == '\n')
486 			break;
487 		if (intable && c == 'T') {
488 			*++lp = C;
489 			if (c == '{' || c == '}') {
490 				lp[-1] = ' ';
491 				*lp = C;
492 			}
493 		} else {
494 			*++lp = C;
495 		}
496 	}
497 	*lp = '\0';
498 
499 	if (line[0] != '\0')
500 		(*pfunc)(line, constant);
501 }
502 
503 void
504 macro(void)
505 {
506 
507 	if (msflag) {
508 		do {
509 			SKIP;
510 		} while (C!='.' || C!='.' || C=='.');	/* look for  .. */
511 		if (c != '\n')
512 			SKIP;
513 		return;
514 	}
515 	SKIP;
516 	inmacro = YES;
517 }
518 
519 void
520 tbl(void)
521 {
522 
523 	while (C != '.')
524 		;	/* nothing */
525 	SKIP;
526 	intable = YES;
527 }
528 
529 void
530 stbl(void)
531 {
532 
533 	while (C != '.')
534 		;	/* nothing */
535 	SKIP_TO_COM;
536 	if (c != 'T' || C != 'E') {
537 		SKIP;
538 		pc = c;
539 		while (C != '.' || pc != '\n' || C != 'T' || C != 'E')
540 			pc = c;
541 	}
542 }
543 
544 void
545 eqn(void)
546 {
547 	int c1, c2;
548 	int dflg;
549 	char last;
550 
551 	last=0;
552 	dflg = 1;
553 	SKIP;
554 
555 	for (;;) {
556 		if (C1 == '.'  || c == '\'') {
557 			while (C1 == ' ' || c == '\t')
558 				;
559 			if (c == 'E' && C1 == 'N') {
560 				SKIP;
561 				if (msflag && dflg) {
562 					putchar('x');
563 					putchar(' ');
564 					if (last) {
565 						putchar(last);
566 						putchar('\n');
567 					}
568 				}
569 				return;
570 			}
571 		} else if (c == 'd') {
572 			/* look for delim */
573 			if (C1 == 'e' && C1 == 'l')
574 				if (C1 == 'i' && C1 == 'm') {
575 					while (C1 == ' ')
576 						;	/* nothing */
577 
578 					if ((c1 = c) == '\n' ||
579 					    (c2 = C1) == '\n' ||
580 					    (c1 == 'o' && c2 == 'f' && C1=='f')) {
581 						ldelim = NOCHAR;
582 						rdelim = NOCHAR;
583 					} else {
584 						ldelim = c1;
585 						rdelim = c2;
586 					}
587 				}
588 			dflg = 0;
589 		}
590 
591 		if (c != '\n')
592 			while (C1 != '\n') {
593 				if (chars[c] == PUNCT)
594 					last = c;
595 				else if (c != ' ')
596 					last = 0;
597 			}
598 	}
599 }
600 
601 /* skip over a complete backslash construction */
602 void
603 backsl(void)
604 {
605 	int bdelim;
606 
607 sw:
608 	switch (C) {
609 	case '"':
610 		SKIP;
611 		return;
612 
613 	case 's':
614 		if (C == '\\')
615 			backsl();
616 		else {
617 			while (C >= '0' && c <= '9')
618 				;	/* nothing */
619 			ungetc(c, infile);
620 			c = '0';
621 		}
622 		--lp;
623 		return;
624 
625 	case 'f':
626 	case 'n':
627 	case '*':
628 		if (C != '(')
629 			return;
630 
631 	case '(':
632 		if (msflag) {
633 			if (C == 'e') {
634 				if (C == 'm') {
635 					*lp = '-';
636 					return;
637 				}
638 			}
639 			else if (c != '\n')
640 				C;
641 			return;
642 		}
643 		if (C != '\n')
644 			C;
645 		return;
646 
647 	case '$':
648 		C;	/* discard argument number */
649 		return;
650 
651 	case 'b':
652 	case 'x':
653 	case 'v':
654 	case 'h':
655 	case 'w':
656 	case 'o':
657 	case 'l':
658 	case 'L':
659 		if ((bdelim = C) == '\n')
660 			return;
661 		while (C != '\n' && c != bdelim)
662 			if (c == '\\')
663 				backsl();
664 		return;
665 
666 	case '\\':
667 		if (inmacro)
668 			goto sw;
669 
670 	default:
671 		return;
672 	}
673 }
674 
675 void
676 sce(void)
677 {
678 	char *ap;
679 	int n, i;
680 	char a[10];
681 
682 	for (ap = a; C != '\n'; ap++) {
683 		*ap = c;
684 		if (ap == &a[9]) {
685 			SKIP;
686 			ap = a;
687 			break;
688 		}
689 	}
690 	if (ap != a)
691 		n = atoi(a);
692 	else
693 		n = 1;
694 	for (i = 0; i < n;) {
695 		if (C == '.') {
696 			if (C == 'c') {
697 				if (C == 'e') {
698 					while (C == ' ')
699 						;	/* nothing */
700 					if (c == '0') {
701 						SKIP;
702 						break;
703 					} else
704 						SKIP;
705 				}
706 				else
707 					SKIP;
708 			} else if (c == 'P' || C == 'P') {
709 				if (c != '\n')
710 					SKIP;
711 				break;
712 			} else if (c != '\n')
713 				SKIP;
714 		} else {
715 			SKIP;
716 			i++;
717 		}
718 	}
719 }
720 
721 void
722 refer(int c1)
723 {
724 	int c2;
725 
726 	if (c1 != '\n')
727 		SKIP;
728 
729 	for (c2 = -1;;) {
730 		if (C != '.')
731 			SKIP;
732 		else {
733 			if (C != ']')
734 				SKIP;
735 			else {
736 				while (C != '\n')
737 					c2 = c;
738 				if (c2 != -1 && chars[c2] == PUNCT)
739 					putchar(c2);
740 				return;
741 			}
742 		}
743 	}
744 }
745 
746 void
747 inpic(void)
748 {
749 	int c1;
750 	char *p1, *ep;
751 
752 	SKIP;
753 	p1 = line;
754 	ep = line + sizeof(line) - 1;
755 	c = '\n';
756 	for (;;) {
757 		c1 = c;
758 		if (C == '.' && c1 == '\n') {
759 			if (C != 'P') {
760 				if (c == '\n')
761 					continue;
762 				else {
763 					SKIP;
764 					c = '\n';
765 					continue;
766 				}
767 			}
768 			if (C != 'E') {
769 				if (c == '\n')
770 					continue;
771 				else {
772 					SKIP;
773 					c = '\n';
774 					continue;
775 				}
776 			}
777 			SKIP;
778 			return;
779 		}
780 		else if (c == '\"') {
781 			while (C != '\"') {
782 				if (c == '\\') {
783 					if (C == '\"')
784 						continue;
785 					ungetc(c, infile);
786 					backsl();
787 				} else if (p1 + 1 >= ep) {
788 					errx(1, ".PS length exceeds limit");
789 				} else {
790 					*p1++ = c;
791 				}
792 			}
793 			*p1++ = ' ';
794 		}
795 		else if (c == '\n' && p1 != line) {
796 			*p1 = '\0';
797 			if (wordflag)
798 				msputwords(NO);
799 			else {
800 				puts(line);
801 				putchar('\n');
802 			}
803 			p1 = line;
804 		}
805 	}
806 }
807 
808 #ifdef DEBUG
809 int
810 _C1(void)
811 {
812 
813 	return(C1get);
814 }
815 
816 int
817 _C(void)
818 {
819 
820 	return(Cget);
821 }
822 #endif /* DEBUG */
823 
824 /*
825  *	Put out a macro line, using ms and mm conventions.
826  */
827 void
828 msputmac(char *s, int constant)
829 {
830 	char *t;
831 	int found;
832 	int last;
833 
834 	last = 0;
835 	found = 0;
836 	if (wordflag) {
837 		msputwords(YES);
838 		return;
839 	}
840 	while (*s) {
841 		while (*s == ' ' || *s == '\t')
842 			putchar(*s++);
843 		for (t = s ; *t != ' ' && *t != '\t' && *t != '\0' ; ++t)
844 			;	/* nothing */
845 		if (*s == '\"')
846 			s++;
847 		if (t > s + constant && chars[(unsigned char)s[0]] == LETTER &&
848 		    chars[(unsigned char)s[1]] == LETTER) {
849 			while (s < t)
850 				if (*s == '\"')
851 					s++;
852 				else
853 					putchar(*s++);
854 			last = *(t-1);
855 			found++;
856 		} else if (found && chars[(unsigned char)s[0]] == PUNCT &&
857 		    s[1] == '\0') {
858 			putchar(*s++);
859 		} else {
860 			last = *(t - 1);
861 			s = t;
862 		}
863 	}
864 	putchar('\n');
865 	if (msflag && chars[last] == PUNCT) {
866 		putchar(last);
867 		putchar('\n');
868 	}
869 }
870 
871 /*
872  *	put out words (for the -w option) with ms and mm conventions
873  */
874 void
875 msputwords(int macline)
876 {
877 	char *p, *p1;
878 	int i, nlet;
879 
880 	for (p1 = line;;) {
881 		/*
882 		 *	skip initial specials ampersands and apostrophes
883 		 */
884 		while (chars[(unsigned char)*p1] < DIGIT)
885 			if (*p1++ == '\0')
886 				return;
887 		nlet = 0;
888 		for (p = p1 ; (i = chars[(unsigned char)*p]) != SPECIAL ; ++p)
889 			if (i == LETTER)
890 				++nlet;
891 
892 		if (nlet > 1 && chars[(unsigned char)p1[0]] == LETTER) {
893 			/*
894 			 *	delete trailing ampersands and apostrophes
895 			 */
896 			while ((i = chars[(unsigned char)p[-1]]) == PUNCT ||
897 			    i == APOS )
898 				--p;
899 			while (p1 < p)
900 				putchar(*p1++);
901 			putchar('\n');
902 		} else {
903 			p1 = p;
904 		}
905 	}
906 }
907 
908 /*
909  *	put out a macro using the me conventions
910  */
911 #define SKIPBLANK(cp)	while (*cp == ' ' || *cp == '\t') { cp++; }
912 #define SKIPNONBLANK(cp) while (*cp !=' ' && *cp !='\cp' && *cp !='\0') { cp++; }
913 
914 void
915 meputmac(char *cp, int constant)
916 {
917 	char	*np;
918 	int	found;
919 	int	argno;
920 	int	last;
921 	int	inquote;
922 
923 	last = 0;
924 	found = 0;
925 	if (wordflag) {
926 		meputwords(YES);
927 		return;
928 	}
929 	for (argno = 0; *cp; argno++) {
930 		SKIPBLANK(cp);
931 		inquote = (*cp == '"');
932 		if (inquote)
933 			cp++;
934 		for (np = cp; *np; np++) {
935 			switch (*np) {
936 			case '\n':
937 			case '\0':
938 				break;
939 
940 			case '\t':
941 			case ' ':
942 				if (inquote)
943 					continue;
944 				else
945 					goto endarg;
946 
947 			case '"':
948 				if (inquote && np[1] == '"') {
949 					memmove(np, np + 1, strlen(np));
950 					np++;
951 					continue;
952 				} else {
953 					*np = ' ';	/* bye bye " */
954 					goto endarg;
955 				}
956 
957 			default:
958 				continue;
959 			}
960 		}
961 		endarg: ;
962 		/*
963 		 *	cp points at the first char in the arg
964 		 *	np points one beyond the last char in the arg
965 		 */
966 		if ((argconcat == 0) || (argconcat != argno))
967 			putchar(' ');
968 #ifdef FULLDEBUG
969 		{
970 			char	*p;
971 			printf("[%d,%d: ", argno, np - cp);
972 			for (p = cp; p < np; p++) {
973 				putchar(*p);
974 			}
975 			printf("]");
976 		}
977 #endif /* FULLDEBUG */
978 		/*
979 		 *	Determine if the argument merits being printed
980 		 *
981 		 *	constant is the cut off point below which something
982 		 *	is not a word.
983 		 */
984 		if (((np - cp) > constant) &&
985 		    (inquote || (chars[(unsigned char)cp[0]] == LETTER))) {
986 			for (; cp < np; cp++)
987 				putchar(*cp);
988 			last = np[-1];
989 			found++;
990 		} else if (found && (np - cp == 1) &&
991 		    chars[(unsigned char)*cp] == PUNCT) {
992 			putchar(*cp);
993 		} else {
994 			last = np[-1];
995 		}
996 		cp = np;
997 	}
998 	if (msflag && chars[last] == PUNCT)
999 		putchar(last);
1000 	putchar('\n');
1001 }
1002 
1003 /*
1004  *	put out words (for the -w option) with ms and mm conventions
1005  */
1006 void
1007 meputwords(int macline)
1008 {
1009 
1010 	msputwords(macline);
1011 }
1012 
1013 /*
1014  *
1015  *	Skip over a nested set of macros
1016  *
1017  *	Possible arguments to noblock are:
1018  *
1019  *	fi	end of unfilled text
1020  *	PE	pic ending
1021  *	DE	display ending
1022  *
1023  *	for ms and mm only:
1024  *		KE	keep ending
1025  *
1026  *		NE	undocumented match to NS (for mm?)
1027  *		LE	mm only: matches RL or *L (for lists)
1028  *
1029  *	for me:
1030  *		([lqbzcdf]
1031  */
1032 void
1033 noblock(char a1, char a2)
1034 {
1035 	int c1,c2;
1036 	int eqnf;
1037 	int lct;
1038 
1039 	lct = 0;
1040 	eqnf = 1;
1041 	SKIP;
1042 	for (;;) {
1043 		while (C != '.')
1044 			if (c == '\n')
1045 				continue;
1046 			else
1047 				SKIP;
1048 		if ((c1 = C) == '\n')
1049 			continue;
1050 		if ((c2 = C) == '\n')
1051 			continue;
1052 		if (c1 == a1 && c2 == a2) {
1053 			SKIP;
1054 			if (lct != 0) {
1055 				lct--;
1056 				continue;
1057 			}
1058 			if (eqnf)
1059 				putchar('.');
1060 			putchar('\n');
1061 			return;
1062 		} else if (a1 == 'L' && c2 == 'L') {
1063 			lct++;
1064 			SKIP;
1065 		}
1066 		/*
1067 		 *	equations (EQ) nested within a display
1068 		 */
1069 		else if (c1 == 'E' && c2 == 'Q') {
1070 			if ((mac == ME && a1 == ')')
1071 			    || (mac != ME && a1 == 'D')) {
1072 				eqn();
1073 				eqnf=0;
1074 			}
1075 		}
1076 		/*
1077 		 *	turning on filling is done by the paragraphing
1078 		 *	macros
1079 		 */
1080 		else if (a1 == 'f') {	/* .fi */
1081 			if  ((mac == ME && (c2 == 'h' || c2 == 'p'))
1082 			    || (mac != ME && (c1 == 'P' || c2 == 'P'))) {
1083 				SKIP;
1084 				return;
1085 			}
1086 		} else {
1087 			SKIP;
1088 		}
1089 	}
1090 }
1091 
1092 int
1093 EQ(void)
1094 {
1095 
1096 	eqn();
1097 	return(0);
1098 }
1099 
1100 int
1101 domacro(void)
1102 {
1103 
1104 	macro();
1105 	return(0);
1106 }
1107 
1108 int
1109 PS(void)
1110 {
1111 
1112 	for (C; c == ' ' || c == '\t'; C)
1113 		;	/* nothing */
1114 
1115 	if (c == '<') {		/* ".PS < file" -- don't expect a .PE */
1116 		SKIP;
1117 		return(0);
1118 	}
1119 	if (!msflag)
1120 		inpic();
1121 	else
1122 		noblock('P', 'E');
1123 	return(0);
1124 }
1125 
1126 int
1127 skip(void)
1128 {
1129 
1130 	SKIP;
1131 	return(0);
1132 }
1133 
1134 int
1135 intbl(void)
1136 {
1137 
1138 	if (msflag)
1139 		stbl();
1140 	else
1141 		tbl();
1142 	return(0);
1143 }
1144 
1145 int
1146 outtbl(void)
1147 {
1148 
1149 	intable = NO;
1150 	return(0);
1151 }
1152 
1153 int
1154 so(void)
1155 {
1156 
1157 	if (!iflag) {
1158 		getfname();
1159 		if (fname[0]) {
1160 			if (++filesp - &files[0] > MAXFILES)
1161 				err(1, "too many nested files (max %d)",
1162 				    MAXFILES);
1163 			infile = *filesp = opn(fname);
1164 		}
1165 	}
1166 	return(0);
1167 }
1168 
1169 int
1170 nx(void)
1171 {
1172 
1173 	if (!iflag) {
1174 		getfname();
1175 		if (fname[0] == '\0')
1176 			exit(0);
1177 		if (infile != stdin)
1178 			fclose(infile);
1179 		infile = *filesp = opn(fname);
1180 	}
1181 	return(0);
1182 }
1183 
1184 int
1185 skiptocom(void)
1186 {
1187 
1188 	SKIP_TO_COM;
1189 	return(COMX);
1190 }
1191 
1192 int
1193 PP(pacmac c12)
1194 {
1195 	int c1, c2;
1196 
1197 	frommac(c12, c1, c2);
1198 	printf(".%c%c", c1, c2);
1199 	while (C != '\n')
1200 		putchar(c);
1201 	putchar('\n');
1202 	return(0);
1203 }
1204 
1205 int
1206 AU(void)
1207 {
1208 
1209 	if (mac == MM)
1210 		return(0);
1211 	SKIP_TO_COM;
1212 	return(COMX);
1213 }
1214 
1215 int
1216 SH(pacmac c12)
1217 {
1218 	int c1, c2;
1219 
1220 	frommac(c12, c1, c2);
1221 
1222 	if (parag) {
1223 		printf(".%c%c", c1, c2);
1224 		while (C != '\n')
1225 			putchar(c);
1226 		putchar(c);
1227 		putchar('!');
1228 		for (;;) {
1229 			while (C != '\n')
1230 				putchar(c);
1231 			putchar('\n');
1232 			if (C == '.')
1233 				return(COM);
1234 			putchar('!');
1235 			putchar(c);
1236 		}
1237 		/*NOTREACHED*/
1238 	} else {
1239 		SKIP_TO_COM;
1240 		return(COMX);
1241 	}
1242 }
1243 
1244 int
1245 UX(void)
1246 {
1247 
1248 	if (wordflag)
1249 		printf("UNIX\n");
1250 	else
1251 		printf("UNIX ");
1252 	return(0);
1253 }
1254 
1255 int
1256 MMHU(pacmac c12)
1257 {
1258 	int c1, c2;
1259 
1260 	frommac(c12, c1, c2);
1261 	if (parag) {
1262 		printf(".%c%c", c1, c2);
1263 		while (C != '\n')
1264 			putchar(c);
1265 		putchar('\n');
1266 	} else {
1267 		SKIP;
1268 	}
1269 	return(0);
1270 }
1271 
1272 int
1273 mesnblock(pacmac c12)
1274 {
1275 	int c1, c2;
1276 
1277 	frommac(c12, c1, c2);
1278 	noblock(')', c2);
1279 	return(0);
1280 }
1281 
1282 int
1283 mssnblock(pacmac c12)
1284 {
1285 	int c1, c2;
1286 
1287 	frommac(c12, c1, c2);
1288 	noblock(c1, 'E');
1289 	return(0);
1290 }
1291 
1292 int
1293 nf(void)
1294 {
1295 
1296 	noblock('f', 'i');
1297 	return(0);
1298 }
1299 
1300 int
1301 ce(void)
1302 {
1303 
1304 	sce();
1305 	return(0);
1306 }
1307 
1308 int
1309 meip(pacmac c12)
1310 {
1311 
1312 	if (parag)
1313 		mepp(c12);
1314 	else if (wordflag)	/* save the tag */
1315 		regline(meputmac, ONE);
1316 	else
1317 		SKIP;
1318 	return(0);
1319 }
1320 
1321 /*
1322  *	only called for -me .pp or .sh, when parag is on
1323  */
1324 int
1325 mepp(pacmac c12)
1326 {
1327 
1328 	PP(c12);		/* eats the line */
1329 	return(0);
1330 }
1331 
1332 /*
1333  *	Start of a section heading; output the section name if doing words
1334  */
1335 int
1336 mesh(pacmac c12)
1337 {
1338 
1339 	if (parag)
1340 		mepp(c12);
1341 	else if (wordflag)
1342 		defcomline(c12);
1343 	else
1344 		SKIP;
1345 	return(0);
1346 }
1347 
1348 /*
1349  *	process a font setting
1350  */
1351 int
1352 mefont(pacmac c12)
1353 {
1354 
1355 	argconcat = 1;
1356 	defcomline(c12);
1357 	argconcat = 0;
1358 	return(0);
1359 }
1360 
1361 int
1362 manfont(pacmac c12)
1363 {
1364 
1365 	return(mefont(c12));
1366 }
1367 
1368 int
1369 manpp(pacmac c12)
1370 {
1371 
1372 	return(mepp(c12));
1373 }
1374 
1375 void
1376 defcomline(pacmac c12)
1377 {
1378 	int c1, c2;
1379 
1380 	frommac(c12, c1, c2);
1381 	if (msflag && mac == MM && c2 == 'L') {
1382 		if (disp || c1 == 'R') {
1383 			noblock('L', 'E');
1384 		} else {
1385 			SKIP;
1386 			putchar('.');
1387 		}
1388 	}
1389 	else if (c1 == '.' && c2 == '.') {
1390 		if (msflag) {
1391 			SKIP;
1392 			return;
1393 		}
1394 		while (C == '.')
1395 			/*VOID*/;
1396 	}
1397 	++inmacro;
1398 	/*
1399 	 *	Process the arguments to the macro
1400 	 */
1401 	switch (mac) {
1402 	default:
1403 	case MM:
1404 	case MS:
1405 		if (c1 <= 'Z' && msflag)
1406 			regline(msputmac, ONE);
1407 		else
1408 			regline(msputmac, TWO);
1409 		break;
1410 	case ME:
1411 		regline(meputmac, ONE);
1412 		break;
1413 	}
1414 	--inmacro;
1415 }
1416 
1417 void
1418 comline(void)
1419 {
1420 	int	c1;
1421 	int	c2;
1422 	pacmac	c12;
1423 	int	mid;
1424 	int	lb, ub;
1425 	int	hit;
1426 	static	int	tabsize = 0;
1427 	static	struct	mactab	*mactab = NULL;
1428 	struct	mactab	*mp;
1429 
1430 	if (mactab == 0)
1431 		 buildtab(&mactab, &tabsize);
1432 com:
1433 	while (C == ' ' || c == '\t')
1434 		;
1435 comx:
1436 	if ((c1 = c) == '\n')
1437 		return;
1438 	c2 = C;
1439 	if (c1 == '.' && c2 != '.')
1440 		inmacro = NO;
1441 	if (msflag && c1 == '[') {
1442 		refer(c2);
1443 		return;
1444 	}
1445 	if (parag && mac==MM && c1 == 'P' && c2 == '\n') {
1446 		printf(".P\n");
1447 		return;
1448 	}
1449 	if (c2 == '\n')
1450 		return;
1451 	/*
1452 	 *	Single letter macro
1453 	 */
1454 	if (mac == ME && (c2 == ' ' || c2 == '\t') )
1455 		c2 = ' ';
1456 	c12 = tomac(c1, c2);
1457 	/*
1458 	 *	binary search through the table of macros
1459 	 */
1460 	lb = 0;
1461 	ub = tabsize - 1;
1462 	while (lb <= ub) {
1463 		mid = (ub + lb) / 2;
1464 		mp = &mactab[mid];
1465 		if (mp->macname < c12)
1466 			lb = mid + 1;
1467 		else if (mp->macname > c12)
1468 			ub = mid - 1;
1469 		else {
1470 			hit = 1;
1471 #ifdef FULLDEBUG
1472 			printf("preliminary hit macro %c%c ", c1, c2);
1473 #endif /* FULLDEBUG */
1474 			switch (mp->condition) {
1475 			case NONE:
1476 				hit = YES;
1477 				break;
1478 			case FNEST:
1479 				hit = (filesp == files);
1480 				break;
1481 			case NOMAC:
1482 				hit = !inmacro;
1483 				break;
1484 			case MAC:
1485 				hit = inmacro;
1486 				break;
1487 			case PARAG:
1488 				hit = parag;
1489 				break;
1490 			case NBLK:
1491 				hit = !keepblock;
1492 				break;
1493 			default:
1494 				hit = 0;
1495 			}
1496 
1497 			if (hit) {
1498 #ifdef FULLDEBUG
1499 				printf("MATCH\n");
1500 #endif /* FULLDEBUG */
1501 				switch ((*(mp->func))(c12)) {
1502 				default:
1503 					return;
1504 				case COMX:
1505 					goto comx;
1506 				case COM:
1507 					goto com;
1508 				}
1509 			}
1510 #ifdef FULLDEBUG
1511 			printf("FAIL\n");
1512 #endif /* FULLDEBUG */
1513 			break;
1514 		}
1515 	}
1516 	defcomline(c12);
1517 }
1518 
1519 int
1520 macsort(const void *p1, const void *p2)
1521 {
1522 	struct mactab *t1 = (struct mactab *)p1;
1523 	struct mactab *t2 = (struct mactab *)p2;
1524 
1525 	return(t1->macname - t2->macname);
1526 }
1527 
1528 int
1529 sizetab(struct mactab *mp)
1530 {
1531 	int i;
1532 
1533 	i = 0;
1534 	if (mp) {
1535 		for (; mp->macname; mp++, i++)
1536 			/*VOID*/ ;
1537 	}
1538 	return(i);
1539 }
1540 
1541 struct mactab *
1542 macfill(struct mactab *dst, struct mactab *src)
1543 {
1544 
1545 	if (src) {
1546 		while (src->macname)
1547 			*dst++ = *src++;
1548 	}
1549 	return(dst);
1550 }
1551 
1552 __dead void
1553 usage(void)
1554 {
1555 	extern char *__progname;
1556 
1557 	fprintf(stderr, "usage: %s [-ikpw] [-m a | e | l | m | s] [file ...]\n", __progname);
1558 	exit(1);
1559 }
1560 
1561 void
1562 buildtab(struct mactab **r_back, int *r_size)
1563 {
1564 	int	size;
1565 	struct	mactab	*p, *p1, *p2;
1566 	struct	mactab	*back;
1567 
1568 	size = sizetab(troffmactab) + sizetab(ppmactab);
1569 	p1 = p2 = NULL;
1570 	if (msflag) {
1571 		switch (mac) {
1572 		case ME:
1573 			p1 = memactab;
1574 			break;
1575 		case MM:
1576 			p1 = msmactab;
1577 			p2 = mmmactab;
1578 			break;
1579 		case MS:
1580 			p1 = msmactab;
1581 			break;
1582 		case MA:
1583 			p1 = manmactab;
1584 			break;
1585 		default:
1586 			break;
1587 		}
1588 	}
1589 	size += sizetab(p1);
1590 	size += sizetab(p2);
1591 	back = calloc(size+2, sizeof(struct mactab));
1592 	if (back == NULL)
1593 		err(1, NULL);
1594 
1595 	p = macfill(back, troffmactab);
1596 	p = macfill(p, ppmactab);
1597 	p = macfill(p, p1);
1598 	p = macfill(p, p2);
1599 
1600 	qsort(back, size, sizeof(struct mactab), macsort);
1601 	*r_size = size;
1602 	*r_back = back;
1603 }
1604 
1605 /*
1606  *	troff commands
1607  */
1608 struct	mactab	troffmactab[] = {
1609 	M(NONE,		'\\','"',	skip),	/* comment */
1610 	M(NOMAC,	'd','e',	domacro),	/* define */
1611 	M(NOMAC,	'i','g',	domacro),	/* ignore till .. */
1612 	M(NOMAC,	'a','m',	domacro),	/* append macro */
1613 	M(NBLK,		'n','f',	nf),	/* filled */
1614 	M(NBLK,		'c','e',	ce),	/* centered */
1615 
1616 	M(NONE,		's','o',	so),	/* source a file */
1617 	M(NONE,		'n','x',	nx),	/* go to next file */
1618 
1619 	M(NONE,		't','m',	skip),	/* print string on tty */
1620 	M(NONE,		'h','w',	skip),	/* exception hyphen words */
1621 	M(NONE,		0,0,		0)
1622 };
1623 
1624 /*
1625  *	Preprocessor output
1626  */
1627 struct	mactab	ppmactab[] = {
1628 	M(FNEST,	'E','Q',	EQ),	/* equation starting */
1629 	M(FNEST,	'T','S',	intbl),	/* table starting */
1630 	M(FNEST,	'T','C',	intbl),	/* alternative table? */
1631 	M(FNEST,	'T','&',	intbl),	/* table reformatting */
1632 	M(NONE,		'T','E',	outtbl),/* table ending */
1633 	M(NONE,		'P','S',	PS),	/* picture starting */
1634 	M(NONE,		0,0,		0)
1635 };
1636 
1637 /*
1638  *	Particular to ms and mm
1639  */
1640 struct	mactab	msmactab[] = {
1641 	M(NONE,		'T','L',	skiptocom),	/* title follows */
1642 	M(NONE,		'F','S',	skiptocom),	/* start footnote */
1643 	M(NONE,		'O','K',	skiptocom),	/* Other kws */
1644 
1645 	M(NONE,		'N','R',	skip),	/* undocumented */
1646 	M(NONE,		'N','D',	skip),	/* use supplied date */
1647 
1648 	M(PARAG,	'P','P',	PP),	/* begin parag */
1649 	M(PARAG,	'I','P',	PP),	/* begin indent parag, tag x */
1650 	M(PARAG,	'L','P',	PP),	/* left blocked parag */
1651 
1652 	M(NONE,		'A','U',	AU),	/* author */
1653 	M(NONE,		'A','I',	AU),	/* authors institution */
1654 
1655 	M(NONE,		'S','H',	SH),	/* section heading */
1656 	M(NONE,		'S','N',	SH),	/* undocumented */
1657 	M(NONE,		'U','X',	UX),	/* unix */
1658 
1659 	M(NBLK,		'D','S',	mssnblock),	/* start display text */
1660 	M(NBLK,		'K','S',	mssnblock),	/* start keep */
1661 	M(NBLK,		'K','F',	mssnblock),	/* start float keep */
1662 	M(NONE,		0,0,		0)
1663 };
1664 
1665 struct	mactab	mmmactab[] = {
1666 	M(NONE,		'H',' ',	MMHU),	/* -mm ? */
1667 	M(NONE,		'H','U',	MMHU),	/* -mm ? */
1668 	M(PARAG,	'P',' ',	PP),	/* paragraph for -mm */
1669 	M(NBLK,		'N','S',	mssnblock),	/* undocumented */
1670 	M(NONE,		0,0,		0)
1671 };
1672 
1673 struct	mactab	memactab[] = {
1674 	M(PARAG,	'p','p',	mepp),
1675 	M(PARAG,	'l','p',	mepp),
1676 	M(PARAG,	'n','p',	mepp),
1677 	M(NONE,		'i','p',	meip),
1678 
1679 	M(NONE,		's','h',	mesh),
1680 	M(NONE,		'u','h',	mesh),
1681 
1682 	M(NBLK,		'(','l',	mesnblock),
1683 	M(NBLK,		'(','q',	mesnblock),
1684 	M(NBLK,		'(','b',	mesnblock),
1685 	M(NBLK,		'(','z',	mesnblock),
1686 	M(NBLK,		'(','c',	mesnblock),
1687 
1688 	M(NBLK,		'(','d',	mesnblock),
1689 	M(NBLK,		'(','f',	mesnblock),
1690 	M(NBLK,		'(','x',	mesnblock),
1691 
1692 	M(NONE,		'r',' ',	mefont),
1693 	M(NONE,		'i',' ',	mefont),
1694 	M(NONE,		'b',' ',	mefont),
1695 	M(NONE,		'u',' ',	mefont),
1696 	M(NONE,		'q',' ',	mefont),
1697 	M(NONE,		'r','b',	mefont),
1698 	M(NONE,		'b','i',	mefont),
1699 	M(NONE,		'b','x',	mefont),
1700 	M(NONE,		0,0,		0)
1701 };
1702 
1703 struct	mactab	manmactab[] = {
1704 	M(PARAG,	'B','I',	manfont),
1705 	M(PARAG,	'B','R',	manfont),
1706 	M(PARAG,	'I','B',	manfont),
1707 	M(PARAG,	'I','R',	manfont),
1708 	M(PARAG,	'R','B',	manfont),
1709 	M(PARAG,	'R','I',	manfont),
1710 
1711 	M(PARAG,	'P','P',	manpp),
1712 	M(PARAG,	'L','P',	manpp),
1713 	M(PARAG,	'H','P',	manpp),
1714 	M(NONE,		0,0,		0)
1715 };
1716