xref: /netbsd-src/usr.bin/m4/main.c (revision c41a4eebefede43f6950f838a387dc18c6a431bf)
1 /*	$NetBSD: main.c,v 1.15 1997/12/29 19:52:56 cgd Exp $	*/
2 
3 /*-
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Ozan Yigit at York University.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38 
39 #include <sys/cdefs.h>
40 #ifndef lint
41 __COPYRIGHT("@(#) Copyright (c) 1989, 1993\n\
42 	The Regents of the University of California.  All rights reserved.\n");
43 #endif /* not lint */
44 
45 #ifndef lint
46 #if 0
47 static char sccsid[] = "@(#)main.c	8.1 (Berkeley) 6/6/93";
48 #else
49 __RCSID("$NetBSD: main.c,v 1.15 1997/12/29 19:52:56 cgd Exp $");
50 #endif
51 #endif /* not lint */
52 
53 /*
54  * main.c
55  * Facility: m4 macro processor
56  * by: oz
57  */
58 
59 #include <sys/types.h>
60 #include <ctype.h>
61 #include <err.h>
62 #include <errno.h>
63 #include <signal.h>
64 #include <stdio.h>
65 #include <string.h>
66 #include <unistd.h>
67 #include "mdef.h"
68 #include "stdd.h"
69 #include "extern.h"
70 #include "pathnames.h"
71 
72 ndptr hashtab[HASHSIZE];	/* hash table for macros etc.  */
73 pbent buf[BUFSIZE];		/* push-back buffer	       */
74 pbent *bufbase = buf;		/* the base for current ilevel */
75 pbent *bbase[MAXINP];		/* the base for each ilevel    */
76 pbent *bp = buf; 		/* first available character   */
77 pbent *endpbb = buf+BUFSIZE;	/* end of push-back buffer     */
78 stae mstack[STACKMAX+1]; 	/* stack of m4 machine         */
79 char strspace[STRSPMAX+1];	/* string space for evaluation */
80 char *ep = strspace;		/* first free char in strspace */
81 char *endest= strspace+STRSPMAX;/* end of string space	       */
82 int sp; 			/* current m4  stack pointer   */
83 int fp; 			/* m4 call frame pointer       */
84 FILE *infile[MAXINP];		/* input file stack (0=stdin)  */
85 FILE *outfile[MAXOUT];		/* diversion array(0=bitbucket)*/
86 FILE *active;			/* active output file pointer  */
87 char *m4temp;			/* filename for diversions     */
88 int ilevel = 0; 		/* input file stack pointer    */
89 int oindex = 0; 		/* diversion index..	       */
90 char *null = "";                /* as it says.. just a null..  */
91 char *m4wraps = "";             /* m4wrap string default..     */
92 char *progname;			/* name of this program        */
93 char lquote[MAXCCHARS+1] = {LQUOTE};	/* left quote character  (`)   */
94 char rquote[MAXCCHARS+1] = {RQUOTE};	/* right quote character (')   */
95 char scommt[MAXCCHARS+1] = {SCOMMT};	/* start character for comment */
96 char ecommt[MAXCCHARS+1] = {ECOMMT};	/* end character for comment   */
97 
98 struct keyblk keywrds[] = {	/* m4 keywords to be installed */
99 	{ "include",      INCLTYPE },
100 	{ "sinclude",     SINCTYPE },
101 	{ "define",       DEFITYPE },
102 	{ "defn",         DEFNTYPE },
103 	{ "divert",       DIVRTYPE },
104 	{ "expr",         EXPRTYPE },
105 	{ "eval",         EXPRTYPE },
106 	{ "substr",       SUBSTYPE },
107 	{ "ifelse",       IFELTYPE },
108 	{ "ifdef",        IFDFTYPE },
109 	{ "len",          LENGTYPE },
110 	{ "incr",         INCRTYPE },
111 	{ "decr",         DECRTYPE },
112 	{ "dnl",          DNLNTYPE },
113 	{ "changequote",  CHNQTYPE },
114 	{ "changecom",    CHNCTYPE },
115 	{ "index",        INDXTYPE },
116 #ifdef EXTENDED
117 	{ "paste",        PASTTYPE },
118 	{ "spaste",       SPASTYPE },
119 #endif
120 	{ "popdef",       POPDTYPE },
121 	{ "pushdef",      PUSDTYPE },
122 	{ "dumpdef",      DUMPTYPE },
123 	{ "shift",        SHIFTYPE },
124 	{ "translit",     TRNLTYPE },
125 	{ "undefine",     UNDFTYPE },
126 	{ "undivert",     UNDVTYPE },
127 	{ "divnum",       DIVNTYPE },
128 	{ "maketemp",     MKTMTYPE },
129 	{ "errprint",     ERRPTYPE },
130 	{ "m4wrap",       M4WRTYPE },
131 	{ "m4exit",       EXITTYPE },
132 	{ "syscmd",       SYSCTYPE },
133 	{ "sysval",       SYSVTYPE },
134 
135 #if defined(unix) || defined(__NetBSD__)
136 	{ "unix",         MACRTYPE },
137 #else
138 #ifdef vms
139 	{ "vms",          MACRTYPE },
140 #endif
141 #endif
142 };
143 
144 #define MAXKEYS	(sizeof(keywrds)/sizeof(struct keyblk))
145 
146 int	do_look_ahead __P((int, char *));
147 ndptr	inspect __P((char *));
148 void	initkwds __P((void));
149 void	macro __P((void));
150 int	main __P((int, char **));
151 
152 int
153 main(argc,argv)
154 	int argc;
155 	char *argv[];
156 {
157 	int c;
158 	int n;
159 	char *p;
160 	FILE *ifp;
161 
162 	progname = basename(argv[0]);
163 
164 	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
165 		signal(SIGINT, onintr);
166 
167 	initkwds();
168 
169 	while ((c = getopt(argc, argv, "tD:U:o:")) != -1)
170 		switch(c) {
171 
172 		case 'D':               /* define something..*/
173 			for (p = optarg; *p; p++)
174 				if (*p == '=')
175 					break;
176 			if (*p)
177 				*p++ = EOS;
178 			dodefine(optarg, p);
179 			break;
180 		case 'U':               /* undefine...       */
181 			remhash(optarg, TOP);
182 			break;
183 		case 'o':		/* specific output   */
184 		case '?':
185 			usage();
186 		}
187 
188         argc -= optind;
189         argv += optind;
190 
191 	active = stdout;		/* default active output     */
192 					/* filename for diversions   */
193 	m4temp = mktemp(xstrdup(_PATH_DIVNAME));
194 
195 	bbase[0] = bufbase;
196         if (!argc) {
197  		sp = -1;		/* stack pointer initialized */
198 		fp = 0; 		/* frame pointer initialized */
199 		infile[0] = stdin;	/* default input (naturally) */
200 		macro();
201 	} else
202 		for (; argc--; ++argv) {
203 			p = *argv;
204 			if (p[0] == '-' && p[1] == '\0')
205 				ifp = stdin;
206 			else if ((ifp = fopen(p, "r")) == NULL)
207 				err(1, "%s", p);
208 			sp = -1;
209 			fp = 0;
210 			infile[0] = ifp;
211 			macro();
212 			if (ifp != stdin)
213 				(void)fclose(ifp);
214 		}
215 
216 	if (*m4wraps) { 		/* anything for rundown ??   */
217 		ilevel = 0;		/* in case m4wrap includes.. */
218 		bufbase = bp = buf;	/* use the entire buffer   */
219 		putbackeof();		/* eof is a must !!	     */
220 		pbstr(m4wraps); 	/* user-defined wrapup act   */
221 		macro();		/* last will and testament   */
222 	}
223 
224 	if (active != stdout)
225 		active = stdout;	/* reset output just in case */
226 	for (n = 1; n < MAXOUT; n++)	/* default wrap-up: undivert */
227 		if (outfile[n] != NULL)
228 			getdiv(n);
229 					/* remove bitbucket if used  */
230 	if (outfile[0] != NULL) {
231 		(void) fclose(outfile[0]);
232 		m4temp[UNIQUE] = '0';
233 #ifdef vms
234 		(void) remove(m4temp);
235 #else
236 		(void) unlink(m4temp);
237 #endif
238 	}
239 
240 	return 0;
241 }
242 
243 /*
244  * Look ahead (at most MAXCCHARS characters) for `token'.
245  * (on input `t == token[0]')
246  * Used for comment and quoting delimiters.
247  * Returns 1 if `token' present; copied to output.
248  *         0 if `token' not found; all characters pushed back
249  */
250 int
251 do_look_ahead(t, token)
252 	int	t;
253 	char	*token;
254 {
255 	int i;
256 
257 	if (t != token[0])
258 		errx(1, "internal error");
259 
260 	for (i = 1; *++token; i++) {
261 		t = gpbc();
262 		if (t == EOF || t != *token) {
263 			if (t != EOF)
264 				putback(t);
265 			while (--i)
266 				putback(*--token);
267 			return 0;
268 		}
269 	}
270 	return 1;
271 }
272 
273 #define LOOK_AHEAD(t, token) ((t)==(token)[0] && do_look_ahead(t,token))
274 
275 /*
276  * macro - the work horse..
277  */
278 void
279 macro()
280 {
281 	char token[MAXTOK], chars[2];
282 	char *s;
283 	int t, l;
284 	ndptr p;
285 	int  nlpar;
286 
287 	s = NULL;
288 	cycle {
289 		t = gpbc();
290 		if (t == '_' || isalpha(t)) {
291 			putback(t);
292 			if ((p = inspect(s = token)) == nil) {
293 				if (sp < 0)
294 					while (*s)
295 						putc(*s++, active);
296 				else
297 					while (*s)
298 						chrsave(*s++);
299 			}
300 			else {
301 		/*
302 		 * real thing.. First build a call frame:
303 		 */
304 				pushf(fp);	/* previous call frm */
305 				pushf(p->type); /* type of the call  */
306 				pushf(0);	/* parenthesis level */
307 				fp = sp;	/* new frame pointer */
308 		/*
309 		 * now push the string arguments:
310 		 */
311 				pushs(p->defn);	      /* defn string */
312 				pushs(p->name);	      /* macro name  */
313 				pushs(ep);	      /* start next..*/
314 
315 				putback(l = gpbc());
316 				if (l != LPAREN)  {   /* add bracks  */
317 					putback(RPAREN);
318 					putback(LPAREN);
319 				}
320 			}
321 		}
322 		else if (t == EOF) {
323 			if (sp > -1)
324 				errx(1, "unexpected end of input");
325 			if (ilevel <= 0)
326 				break;			/* all done thanks.. */
327 			--ilevel;
328 			(void) fclose(infile[ilevel+1]);
329 			bufbase = bbase[ilevel];
330 			continue;
331 		}
332 	/*
333 	 * non-alpha token possibly seen..
334 	 * [the order of else if .. stmts is important.]
335 	 */
336 		else if (LOOK_AHEAD(t,lquote)) {	/* strip quotes */
337 			nlpar = 1;
338 			do {
339 
340 				l = gpbc();
341 				if (LOOK_AHEAD(l,rquote)) {
342 					nlpar--;
343 					s = rquote;
344 				} else if (LOOK_AHEAD(l,lquote)) {
345 					nlpar++;
346 					s = lquote;
347 				} else if (l == EOF)
348 					errx(1, "missing right quote");
349 				else {
350 					chars[0] = l;
351 					chars[1] = '\0';
352 					s = chars;
353 				}
354 				if (nlpar > 0) {
355 					if (sp < 0)
356 						while (*s)
357 							putc(*s++, active);
358 					else
359 						while (*s)
360 							chrsave(*s++);
361 				}
362 			}
363 			while (nlpar != 0);
364 		}
365 
366 		else if (sp < 0 && LOOK_AHEAD(t, scommt)) {
367 			int i;
368 			for (i = 0; i < MAXCCHARS && scommt[i]; i++)
369 				putc(scommt[i], active);
370 
371 			for(;;) {
372 				t = gpbc();
373 				if (LOOK_AHEAD(t, ecommt)) {
374 					for (i = 0; i < MAXCCHARS && ecommt[i];
375 					     i++)
376 						putc(ecommt[i], active);
377 					break;
378 				}
379 				if (t == EOF)
380 					break;
381 				putc(t, active);
382 			}
383 		}
384 
385 		else if (sp < 0) {		/* not in a macro at all */
386 			putc(t, active);	/* output directly..	 */
387 		}
388 
389 		else switch(t) {
390 
391 		case LPAREN:
392 			if (PARLEV > 0)
393 				chrsave(t);
394 			while (isspace(l = gpbc()))
395 				;		/* skip blank, tab, nl.. */
396 			putback(l);
397 			PARLEV++;
398 			break;
399 
400 		case RPAREN:
401 			if (--PARLEV > 0)
402 				chrsave(t);
403 			else {			/* end of argument list */
404 				chrsave(EOS);
405 
406 				if (sp == STACKMAX)
407 					errx(1, "internal stack overflow");
408 
409 				if (CALTYP == MACRTYPE)
410 					expand((char **) mstack+fp+1, sp-fp);
411 				else
412 					eval((char **) mstack+fp+1, sp-fp, CALTYP);
413 
414 				ep = PREVEP;	/* flush strspace */
415 				sp = PREVSP;	/* previous sp..  */
416 				fp = PREVFP;	/* rewind stack...*/
417 			}
418 			break;
419 
420 		case COMMA:
421 			if (PARLEV == 1) {
422 				chrsave(EOS);		/* new argument   */
423 				while (isspace(l = gpbc()))
424 					;
425 				putback(l);
426 				pushs(ep);
427 			} else
428 				chrsave(t);
429 			break;
430 
431 		default:
432 			chrsave(t);			/* stack the char */
433 			break;
434 		}
435 	}
436 }
437 
438 /*
439  * build an input token..
440  * consider only those starting with _ or A-Za-z. This is a
441  * combo with lookup to speed things up.
442  */
443 ndptr
444 inspect(tp)
445 	char *tp;
446 {
447 	char c;
448 	char *name = tp;
449 	char *etp = tp+MAXTOK;
450 	ndptr p;
451 	unsigned long h = 0;
452 
453 	while ((isalnum(c = gpbc()) || c == '_') && tp < etp)
454 		h = (h << 5) + h + (*tp++ = c);
455 	putback(c);
456 	if (tp == etp)
457 		errx(1, "token too long");
458 
459 	*tp = EOS;
460 
461 	for (p = hashtab[h%HASHSIZE]; p != nil; p = p->nxtptr)
462 		if (STREQ(name, p->name))
463 			break;
464 	return p;
465 }
466 
467 /*
468  * initkwds - initialise m4 keywords as fast as possible.
469  * This very similar to install, but without certain overheads,
470  * such as calling lookup. Malloc is not used for storing the
471  * keyword strings, since we simply use the static pointers
472  * within keywrds block.
473  */
474 void
475 initkwds()
476 {
477 	int i;
478 	int h;
479 	ndptr p;
480 
481 	for (i = 0; i < MAXKEYS; i++) {
482 		h = hash(keywrds[i].knam);
483 		p = (ndptr) xalloc(sizeof(struct ndblock));
484 		p->nxtptr = hashtab[h];
485 		hashtab[h] = p;
486 		p->name = keywrds[i].knam;
487 		p->defn = null;
488 		p->type = keywrds[i].ktyp | STATIC;
489 	}
490 }
491