xref: /csrg-svn/usr.bin/m4/main.c (revision 42689)
138823Sbostic /*
238823Sbostic  * Copyright (c) 1989 The Regents of the University of California.
338823Sbostic  * All rights reserved.
438823Sbostic  *
538823Sbostic  * This code is derived from software contributed to Berkeley by
638823Sbostic  * Ozan Yigit.
738823Sbostic  *
8*42689Sbostic  * %sccs.include.redist.c%
938823Sbostic  */
1038823Sbostic 
1138823Sbostic #ifndef lint
12*42689Sbostic static char sccsid[] = "@(#)main.c	5.4 (Berkeley) 06/01/90";
1338823Sbostic #endif /* not lint */
1438823Sbostic 
1538823Sbostic /*
1638823Sbostic  * main.c
1738823Sbostic  * Facility: m4 macro processor
1838823Sbostic  * by: oz
1938823Sbostic  */
2038823Sbostic 
2138823Sbostic #include "mdef.h"
2238823Sbostic 
2338823Sbostic /*
2438823Sbostic  * m4 - macro processor
2538823Sbostic  *
2638823Sbostic  * PD m4 is based on the macro tool distributed with the software
2738823Sbostic  * tools (VOS) package, and described in the "SOFTWARE TOOLS" and
2838823Sbostic  * "SOFTWARE TOOLS IN PASCAL" books. It has been expanded to include
2938823Sbostic  * most of the command set of SysV m4, the standard UN*X macro processor.
3038823Sbostic  *
3138823Sbostic  * Since both PD m4 and UN*X m4 are based on SOFTWARE TOOLS macro,
3238823Sbostic  * there may be certain implementation similarities between
3338823Sbostic  * the two. The PD m4 was produced without ANY references to m4
3438823Sbostic  * sources.
3538823Sbostic  *
3638823Sbostic  * References:
3738823Sbostic  *
3838823Sbostic  *	Software Tools distribution: macro
3938823Sbostic  *
4038823Sbostic  *	Kernighan, Brian W. and P. J. Plauger, SOFTWARE
4138823Sbostic  *	TOOLS IN PASCAL, Addison-Wesley, Mass. 1981
4238823Sbostic  *
4338823Sbostic  *	Kernighan, Brian W. and P. J. Plauger, SOFTWARE
4438823Sbostic  *	TOOLS, Addison-Wesley, Mass. 1976
4538823Sbostic  *
4638823Sbostic  *	Kernighan, Brian W. and Dennis M. Ritchie,
4738823Sbostic  *	THE M4 MACRO PROCESSOR, Unix Programmer's Manual,
4838823Sbostic  *	Seventh Edition, Vol. 2, Bell Telephone Labs, 1979
4938823Sbostic  *
5038823Sbostic  *	System V man page for M4
5138823Sbostic  *
5238823Sbostic  * Modification History:
5338823Sbostic  *
5438823Sbostic  * Jan 28 1986 Oz	Break the whole thing into little
5538823Sbostic  *			pieces, for easier (?) maintenance.
5638823Sbostic  *
5738823Sbostic  * Dec 12 1985 Oz	Optimize the code, try to squeeze
5838823Sbostic  *			few microseconds out..
5938823Sbostic  *
6038823Sbostic  * Dec 05 1985 Oz	Add getopt interface, define (-D),
6138823Sbostic  *			undefine (-U) options.
6238823Sbostic  *
6338823Sbostic  * Oct 21 1985 Oz	Clean up various bugs, add comment handling.
6438823Sbostic  *
6538823Sbostic  * June 7 1985 Oz	Add some of SysV m4 stuff (m4wrap, pushdef,
6638823Sbostic  *			popdef, decr, shift etc.).
6738823Sbostic  *
6838823Sbostic  * June 5 1985 Oz	Initial cut.
6938823Sbostic  *
7038823Sbostic  * Implementation Notes:
7138823Sbostic  *
7238823Sbostic  * [1]	PD m4 uses a different (and simpler) stack mechanism than the one
7338823Sbostic  *	described in Software Tools and Software Tools in Pascal books.
7438823Sbostic  *	The triple stack nonsense is replaced with a single stack containing
7538823Sbostic  *	the call frames and the arguments. Each frame is back-linked to a
7638823Sbostic  * 	previous stack frame, which enables us to rewind the stack after
7738823Sbostic  * 	each nested call is completed. Each argument is a character pointer
7838823Sbostic  *	to the beginning of the argument string within the string space.
7938823Sbostic  *	The only exceptions to this are (*) arg 0 and arg 1, which are
8038823Sbostic  * 	the macro definition and macro name strings, stored dynamically
8138823Sbostic  *	for the hash table.
8238823Sbostic  *
8338823Sbostic  *	    .					   .
8438823Sbostic  *	|   .	|  <-- sp			|  .  |
8538823Sbostic  *	+-------+				+-----+
8638823Sbostic  *	| arg 3 ------------------------------->| str |
8738823Sbostic  *	+-------+				|  .  |
8838823Sbostic  *	| arg 2 --------------+ 		   .
8938823Sbostic  *	+-------+	      |
9038823Sbostic  *	    *		      |			|     |
9138823Sbostic  *	+-------+	      | 		+-----+
9238823Sbostic  *	| plev	|  <-- fp     +---------------->| str |
9338823Sbostic  *	+-------+				|  .  |
9438823Sbostic  *	| type	|				   .
9538823Sbostic  *	+-------+
9638823Sbostic  *	| prcf	-----------+		plev: paren level
9738823Sbostic  *	+-------+  	   |		type: call type
9838823Sbostic  *	|   .	| 	   |		prcf: prev. call frame
9938823Sbostic  *	    .	   	   |
10038823Sbostic  *	+-------+	   |
10138823Sbostic  *	|	<----------+
10238823Sbostic  *	+-------+
10338823Sbostic  *
10438823Sbostic  * [2]	We have three types of null values:
10538823Sbostic  *
10638823Sbostic  *		nil  - nodeblock pointer type 0
10738823Sbostic  *		null - null string ("")
10838823Sbostic  *		NULL - Stdio-defined NULL
10938823Sbostic  *
11038823Sbostic  */
11138823Sbostic 
11238823Sbostic ndptr hashtab[HASHSIZE];	/* hash table for macros etc.  */
11338823Sbostic char buf[BUFSIZE];		/* push-back buffer	       */
11438823Sbostic char *bp = buf; 		/* first available character   */
11538823Sbostic char *endpbb = buf+BUFSIZE;	/* end of push-back buffer     */
11638823Sbostic stae mstack[STACKMAX+1]; 	/* stack of m4 machine         */
11738823Sbostic char strspace[STRSPMAX+1];	/* string space for evaluation */
11838823Sbostic char *ep = strspace;		/* first free char in strspace */
11938823Sbostic char *endest= strspace+STRSPMAX;/* end of string space	       */
12038823Sbostic int sp; 			/* current m4  stack pointer   */
12138823Sbostic int fp; 			/* m4 call frame pointer       */
12238823Sbostic FILE *infile[MAXINP];		/* input file stack (0=stdin)  */
12338823Sbostic FILE *outfile[MAXOUT];		/* diversion array(0=bitbucket)*/
12438823Sbostic FILE *active;			/* active output file pointer  */
12538823Sbostic char *m4temp;			/* filename for diversions     */
12638823Sbostic int ilevel = 0; 		/* input file stack pointer    */
12738823Sbostic int oindex = 0; 		/* diversion index..	       */
12838823Sbostic char *null = "";                /* as it says.. just a null..  */
12938823Sbostic char *m4wraps = "";             /* m4wrap string default..     */
13038823Sbostic char lquote = LQUOTE;		/* left quote character  (`)   */
13138823Sbostic char rquote = RQUOTE;		/* right quote character (')   */
13238823Sbostic char scommt = SCOMMT;		/* start character for comment */
13338823Sbostic char ecommt = ECOMMT;		/* end character for comment   */
13438823Sbostic struct keyblk keywrds[] = {	/* m4 keywords to be installed */
13538823Sbostic 	"include",      INCLTYPE,
13638823Sbostic 	"sinclude",     SINCTYPE,
13738823Sbostic 	"define",       DEFITYPE,
13838823Sbostic 	"defn",         DEFNTYPE,
13938823Sbostic 	"divert",       DIVRTYPE,
14038823Sbostic 	"expr",         EXPRTYPE,
14138823Sbostic 	"eval",         EXPRTYPE,
14238823Sbostic 	"substr",       SUBSTYPE,
14338823Sbostic 	"ifelse",       IFELTYPE,
14438823Sbostic 	"ifdef",        IFDFTYPE,
14538823Sbostic 	"len",          LENGTYPE,
14638823Sbostic 	"incr",         INCRTYPE,
14738823Sbostic 	"decr",         DECRTYPE,
14838823Sbostic 	"dnl",          DNLNTYPE,
14938823Sbostic 	"changequote",  CHNQTYPE,
15038823Sbostic 	"changecom",    CHNCTYPE,
15138823Sbostic 	"index",        INDXTYPE,
15238823Sbostic #ifdef EXTENDED
15338823Sbostic 	"paste",        PASTTYPE,
15438823Sbostic 	"spaste",       SPASTYPE,
15538823Sbostic #endif
15638823Sbostic 	"popdef",       POPDTYPE,
15738823Sbostic 	"pushdef",      PUSDTYPE,
15838823Sbostic 	"dumpdef",      DUMPTYPE,
15938823Sbostic 	"shift",        SHIFTYPE,
16038823Sbostic 	"translit",     TRNLTYPE,
16138823Sbostic 	"undefine",     UNDFTYPE,
16238823Sbostic 	"undivert",     UNDVTYPE,
16338823Sbostic 	"divnum",       DIVNTYPE,
16438823Sbostic 	"maketemp",     MKTMTYPE,
16538823Sbostic 	"errprint",     ERRPTYPE,
16638823Sbostic 	"m4wrap",       M4WRTYPE,
16738823Sbostic 	"m4exit",       EXITTYPE,
16838823Sbostic 	"syscmd",       SYSCTYPE,
16938823Sbostic 	"sysval",       SYSVTYPE,
17038823Sbostic 	"unix",         MACRTYPE,
17138823Sbostic };
17238823Sbostic 
17338823Sbostic #define MAXKEYS	(sizeof(keywrds)/sizeof(struct keyblk))
17438823Sbostic 
17538823Sbostic extern ndptr lookup();
17638823Sbostic extern ndptr addent();
17738823Sbostic extern int onintr();
17838823Sbostic 
17938823Sbostic extern char *malloc();
18038823Sbostic extern char *mktemp();
18138823Sbostic 
18238823Sbostic extern int optind;
18338823Sbostic extern char *optarg;
18438823Sbostic 
18538823Sbostic main(argc,argv)
18638823Sbostic char *argv[];
18738823Sbostic {
18838823Sbostic 	register int c;
18938823Sbostic 	register int n;
19038823Sbostic 	char *p;
19138823Sbostic 
19238823Sbostic 	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
19338823Sbostic 		signal(SIGINT, onintr);
19438823Sbostic #ifdef NONZEROPAGES
19538823Sbostic 	initm4();
19638823Sbostic #endif
19738823Sbostic 	initkwds();
19838823Sbostic 
19938823Sbostic 	while ((c = getopt(argc, argv, "tD:U:o:")) != EOF)
20038823Sbostic 		switch(c) {
20138823Sbostic 
20238823Sbostic 		case 'D':               /* define something..*/
20338823Sbostic 			for (p = optarg; *p; p++)
20438823Sbostic 				if (*p == '=')
20538823Sbostic 					break;
20638823Sbostic 			if (*p)
20738823Sbostic 				*p++ = EOS;
20838823Sbostic 			dodefine(optarg, p);
20938823Sbostic 			break;
21038823Sbostic 		case 'U':               /* undefine...       */
21138823Sbostic 			remhash(optarg, TOP);
21238823Sbostic 			break;
21338823Sbostic 		case 'o':		/* specific output   */
21438823Sbostic 		case '?':
21538823Sbostic 		default:
21638823Sbostic 			usage();
21738823Sbostic 		}
21838823Sbostic 
21938823Sbostic 	infile[0] = stdin;		/* default input (naturally) */
22038823Sbostic 	active = stdout;		/* default active output     */
22138823Sbostic 	m4temp = mktemp(DIVNAM);	/* filename for diversions   */
22238823Sbostic 
22338823Sbostic 	sp = -1;			/* stack pointer initialized */
22438823Sbostic 	fp = 0; 			/* frame pointer initialized */
22538823Sbostic 
22638823Sbostic 	macro();			/* get some work done here   */
22738823Sbostic 
22838823Sbostic 	if (*m4wraps) { 		/* anything for rundown ??   */
22938823Sbostic 		ilevel = 0;		/* in case m4wrap includes.. */
23038823Sbostic 		putback(EOF);		/* eof is a must !!	     */
23138823Sbostic 		pbstr(m4wraps); 	/* user-defined wrapup act   */
23238823Sbostic 		macro();		/* last will and testament   */
23338823Sbostic 	}
23438823Sbostic 
23539102Sbostic 	if (active != stdout)
23639102Sbostic 		active = stdout;	/* reset output just in case */
23739102Sbostic 	for (n = 1; n < MAXOUT; n++)	/* default wrap-up: undivert */
23839102Sbostic 		if (outfile[n] != NULL)
23939102Sbostic 			getdiv(n);
24038823Sbostic 					/* remove bitbucket if used  */
24138823Sbostic 	if (outfile[0] != NULL) {
24238823Sbostic 		(void) fclose(outfile[0]);
24338823Sbostic 		m4temp[UNIQUE] = '0';
24438823Sbostic 		(void) unlink(m4temp);
24538823Sbostic 	}
24638823Sbostic 
24738823Sbostic 	exit(0);
24838823Sbostic }
24938823Sbostic 
25038823Sbostic ndptr inspect();	/* forward ... */
25138823Sbostic 
25238823Sbostic /*
25338823Sbostic  * macro - the work horse..
25438823Sbostic  *
25538823Sbostic  */
25638823Sbostic macro() {
25738823Sbostic 	char token[MAXTOK];
25838823Sbostic 	register char *s;
25938823Sbostic 	register int t, l;
26038823Sbostic 	register ndptr p;
26138823Sbostic 	register int  nlpar;
26238823Sbostic 
26338823Sbostic 	cycle {
26438823Sbostic 		if ((t = gpbc()) == '_' || isalpha(t)) {
26538823Sbostic 			putback(t);
26638823Sbostic 			if ((p = inspect(s = token)) == nil) {
26738823Sbostic 				if (sp < 0)
26838823Sbostic 					while (*s)
26938823Sbostic 						putc(*s++, active);
27038823Sbostic 				else
27138823Sbostic 					while (*s)
27238823Sbostic 						chrsave(*s++);
27338823Sbostic 			}
27438823Sbostic 			else {
27538823Sbostic 		/*
27638823Sbostic 		 * real thing.. First build a call frame:
27738823Sbostic 		 *
27838823Sbostic 		 */
27938823Sbostic 				pushf(fp);	/* previous call frm */
28038823Sbostic 				pushf(p->type); /* type of the call  */
28138823Sbostic 				pushf(0);	/* parenthesis level */
28238823Sbostic 				fp = sp;	/* new frame pointer */
28338823Sbostic 		/*
28438823Sbostic 		 * now push the string arguments:
28538823Sbostic 		 *
28638823Sbostic 		 */
28738823Sbostic 				pushs(p->defn);	      /* defn string */
28838823Sbostic 				pushs(p->name);	      /* macro name  */
28938823Sbostic 				pushs(ep);	      /* start next..*/
29038823Sbostic 
29138823Sbostic 				putback(l = gpbc());
29238823Sbostic 				if (l != LPAREN)  {   /* add bracks  */
29338823Sbostic 					putback(RPAREN);
29438823Sbostic 					putback(LPAREN);
29538823Sbostic 				}
29638823Sbostic 			}
29738823Sbostic 		}
29838823Sbostic 		else if (t == EOF) {
29938823Sbostic 			if (sp > -1)
30038823Sbostic 				error("m4: unexpected end of input");
30138823Sbostic 			if (--ilevel < 0)
30238823Sbostic 				break;			/* all done thanks.. */
30338823Sbostic 			(void) fclose(infile[ilevel+1]);
30438823Sbostic 			continue;
30538823Sbostic 		}
30638823Sbostic 	/*
30738823Sbostic 	 * non-alpha single-char token seen..
30838823Sbostic 	 * [the order of else if .. stmts is
30938823Sbostic 	 * important.]
31038823Sbostic 	 *
31138823Sbostic 	 */
31238823Sbostic 		else if (t == lquote) { 		/* strip quotes */
31338823Sbostic 			nlpar = 1;
31438823Sbostic 			do {
31538823Sbostic 				if ((l = gpbc()) == rquote)
31638823Sbostic 					nlpar--;
31738823Sbostic 				else if (l == lquote)
31838823Sbostic 					nlpar++;
31938823Sbostic 				else if (l == EOF)
32038823Sbostic 					error("m4: missing right quote");
32138823Sbostic 				if (nlpar > 0) {
32238823Sbostic 					if (sp < 0)
32338823Sbostic 						putc(l, active);
32438823Sbostic 					else
32538823Sbostic 						chrsave(l);
32638823Sbostic 				}
32738823Sbostic 			}
32838823Sbostic 			while (nlpar != 0);
32938823Sbostic 		}
33038823Sbostic 
33138823Sbostic 		else if (sp < 0) {		/* not in a macro at all */
33238823Sbostic 			if (t == scommt) {	/* comment handling here */
33338823Sbostic 				putc(t, active);
33438823Sbostic 				while ((t = gpbc()) != ecommt)
33538823Sbostic 					putc(t, active);
33638823Sbostic 			}
33738823Sbostic 			putc(t, active);	/* output directly..	 */
33838823Sbostic 		}
33938823Sbostic 
34038823Sbostic 		else switch(t) {
34138823Sbostic 
34238823Sbostic 		case LPAREN:
34338823Sbostic 			if (PARLEV > 0)
34438823Sbostic 				chrsave(t);
34538823Sbostic 			while (isspace(l = gpbc()))
34638823Sbostic 				;		/* skip blank, tab, nl.. */
34738823Sbostic 			putback(l);
34838823Sbostic 			PARLEV++;
34938823Sbostic 			break;
35038823Sbostic 
35138823Sbostic 		case RPAREN:
35238823Sbostic 			if (--PARLEV > 0)
35338823Sbostic 				chrsave(t);
35438823Sbostic 			else {			/* end of argument list */
35538823Sbostic 				chrsave(EOS);
35638823Sbostic 
35738823Sbostic 				if (sp == STACKMAX)
35838823Sbostic 					error("m4: internal stack overflow");
35938823Sbostic 
36038823Sbostic 				if (CALTYP == MACRTYPE)
36138823Sbostic 					expand(mstack+fp+1, sp-fp);
36238823Sbostic 				else
36338823Sbostic 					eval(mstack+fp+1, sp-fp, CALTYP);
36438823Sbostic 
36538823Sbostic 				ep = PREVEP;	/* flush strspace */
36638823Sbostic 				sp = PREVSP;	/* previous sp..  */
36738823Sbostic 				fp = PREVFP;	/* rewind stack...*/
36838823Sbostic 			}
36938823Sbostic 			break;
37038823Sbostic 
37138823Sbostic 		case COMMA:
37238823Sbostic 			if (PARLEV == 1)	{
37338823Sbostic 				chrsave(EOS);		/* new argument   */
37438823Sbostic 				while (isspace(l = gpbc()))
37538823Sbostic 					;
37638823Sbostic 				putback(l);
37738823Sbostic 				pushs(ep);
37838823Sbostic 			}
37938823Sbostic 			break;
38038823Sbostic 		default:
38138823Sbostic 			chrsave(t);			/* stack the char */
38238823Sbostic 			break;
38338823Sbostic 		}
38438823Sbostic 	}
38538823Sbostic }
38638823Sbostic 
38738823Sbostic 
38838823Sbostic /*
38938823Sbostic  * build an input token..
39038823Sbostic  * consider only those starting with _ or A-Za-z. This is a
39138823Sbostic  * combo with lookup to speed things up.
39238823Sbostic  */
39338823Sbostic ndptr
39438823Sbostic inspect(tp)
39538823Sbostic register char *tp;
39638823Sbostic {
39738823Sbostic 	register int h = 0;
39838823Sbostic 	register char c;
39938823Sbostic 	register char *name = tp;
40038823Sbostic 	register char *etp = tp+MAXTOK;
40138823Sbostic 	register ndptr p;
40238823Sbostic 
40338823Sbostic 	while (tp < etp && (isalnum(c = gpbc()) || c == '_'))
40438823Sbostic 		h += (*tp++ = c);
40538823Sbostic 	putback(c);
40638823Sbostic 	if (tp == etp)
40738823Sbostic 		error("m4: token too long");
40838823Sbostic 	*tp = EOS;
40938823Sbostic 	for (p = hashtab[h%HASHSIZE]; p != nil; p = p->nxtptr)
41038823Sbostic 		if (strcmp(name, p->name) == 0)
41138823Sbostic 			break;
41238823Sbostic 	return(p);
41338823Sbostic }
41438823Sbostic 
41538823Sbostic #ifdef NONZEROPAGES
41638823Sbostic /*
41738823Sbostic  * initm4 - initialize various tables. Useful only if your system
41838823Sbostic  * does not know anything about demand-zero pages.
41938823Sbostic  *
42038823Sbostic  */
42138823Sbostic initm4()
42238823Sbostic {
42338823Sbostic 	register int i;
42438823Sbostic 
42538823Sbostic 	for (i = 0; i < HASHSIZE; i++)
42638823Sbostic 		hashtab[i] = nil;
42738823Sbostic 	for (i = 0; i < MAXOUT; i++)
42838823Sbostic 		outfile[i] = NULL;
42938823Sbostic }
43038823Sbostic #endif
43138823Sbostic 
43238823Sbostic /*
43338823Sbostic  * initkwds - initialise m4 keywords as fast as possible.
43438823Sbostic  * This very similar to install, but without certain overheads,
43538823Sbostic  * such as calling lookup. Malloc is not used for storing the
43638823Sbostic  * keyword strings, since we simply use the static  pointers
43738823Sbostic  * within keywrds block. We also assume that there is enough memory
43838823Sbostic  * to at least install the keywords (i.e. malloc won't fail).
43938823Sbostic  *
44038823Sbostic  */
44138823Sbostic initkwds() {
44238823Sbostic 	register int i;
44338823Sbostic 	register int h;
44438823Sbostic 	register ndptr p;
44538823Sbostic 
44638823Sbostic 	for (i = 0; i < MAXKEYS; i++) {
44738823Sbostic 		h = hash(keywrds[i].knam);
44838823Sbostic 		p = (ndptr) malloc(sizeof(struct ndblock));
44938823Sbostic 		p->nxtptr = hashtab[h];
45038823Sbostic 		hashtab[h] = p;
45138823Sbostic 		p->name = keywrds[i].knam;
45238823Sbostic 		p->defn = null;
45338823Sbostic 		p->type = keywrds[i].ktyp | STATIC;
45438823Sbostic 	}
45538823Sbostic }
456