xref: /netbsd-src/usr.bin/m4/main.c (revision dc306354b0b29af51801a7632f1e95265a68cd81)
1 /*	$NetBSD: main.c,v 1.20 1998/12/19 19:54:26 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Ozan Yigit at York University.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38 
39 #include <sys/cdefs.h>
40 #ifndef lint
41 __COPYRIGHT("@(#) Copyright (c) 1989, 1993\n\
42 	The Regents of the University of California.  All rights reserved.\n");
43 #endif /* not lint */
44 
45 #ifndef lint
46 #if 0
47 static char sccsid[] = "@(#)main.c	8.1 (Berkeley) 6/6/93";
48 #else
49 __RCSID("$NetBSD: main.c,v 1.20 1998/12/19 19:54:26 christos Exp $");
50 #endif
51 #endif /* not lint */
52 
53 /*
54  * main.c
55  * Facility: m4 macro processor
56  * by: oz
57  */
58 
59 #include <sys/types.h>
60 #include <ctype.h>
61 #include <err.h>
62 #include <errno.h>
63 #include <signal.h>
64 #include <stdio.h>
65 #include <stdlib.h>
66 #include <string.h>
67 #include <unistd.h>
68 #include "mdef.h"
69 #include "stdd.h"
70 #include "extern.h"
71 #include "pathnames.h"
72 
73 ndptr hashtab[HASHSIZE];	/* hash table for macros etc.  */
74 pbent buf[BUFSIZE];		/* push-back buffer	       */
75 pbent *bufbase = buf;		/* the base for current ilevel */
76 pbent *bbase[MAXINP];		/* the base for each ilevel    */
77 pbent *bp = buf; 		/* first available character   */
78 pbent *endpbb = buf+BUFSIZE;	/* end of push-back buffer     */
79 stae mstack[STACKMAX+1]; 	/* stack of m4 machine         */
80 char strspace[STRSPMAX+1];	/* string space for evaluation */
81 char *ep = strspace;		/* first free char in strspace */
82 char *endest= strspace+STRSPMAX;/* end of string space	       */
83 int sp; 			/* current m4  stack pointer   */
84 int fp; 			/* m4 call frame pointer       */
85 FILE *infile[MAXINP];		/* input file stack (0=stdin)  */
86 FILE *outfile[MAXOUT];		/* diversion array(0=bitbucket)*/
87 FILE *active;			/* active output file pointer  */
88 char *m4temp;			/* filename for diversions     */
89 int ilevel = 0; 		/* input file stack pointer    */
90 int oindex = 0; 		/* diversion index..	       */
91 char *null = "";                /* as it says.. just a null..  */
92 char *m4wraps = "";             /* m4wrap string default..     */
93 char *progname;			/* name of this program        */
94 char lquote[MAXCCHARS+1] = {LQUOTE};	/* left quote character  (`)   */
95 char rquote[MAXCCHARS+1] = {RQUOTE};	/* right quote character (')   */
96 char scommt[MAXCCHARS+1] = {SCOMMT};	/* start character for comment */
97 char ecommt[MAXCCHARS+1] = {ECOMMT};	/* end character for comment   */
98 
99 struct keyblk keywrds[] = {	/* m4 keywords to be installed */
100 	{ "include",      INCLTYPE },
101 	{ "sinclude",     SINCTYPE },
102 	{ "define",       DEFITYPE },
103 	{ "defn",         DEFNTYPE },
104 	{ "divert",       DIVRTYPE },
105 	{ "expr",         EXPRTYPE },
106 	{ "eval",         EXPRTYPE },
107 	{ "substr",       SUBSTYPE },
108 	{ "ifelse",       IFELTYPE },
109 	{ "ifdef",        IFDFTYPE },
110 	{ "len",          LENGTYPE },
111 	{ "incr",         INCRTYPE },
112 	{ "decr",         DECRTYPE },
113 	{ "dnl",          DNLNTYPE },
114 	{ "changequote",  CHNQTYPE },
115 	{ "changecom",    CHNCTYPE },
116 	{ "index",        INDXTYPE },
117 #ifdef EXTENDED
118 	{ "paste",        PASTTYPE },
119 	{ "spaste",       SPASTYPE },
120 #endif
121 	{ "popdef",       POPDTYPE },
122 	{ "pushdef",      PUSDTYPE },
123 	{ "dumpdef",      DUMPTYPE },
124 	{ "shift",        SHIFTYPE },
125 	{ "translit",     TRNLTYPE },
126 	{ "undefine",     UNDFTYPE },
127 	{ "undivert",     UNDVTYPE },
128 	{ "divnum",       DIVNTYPE },
129 	{ "maketemp",     MKTMTYPE },
130 	{ "errprint",     ERRPTYPE },
131 	{ "m4wrap",       M4WRTYPE },
132 	{ "m4exit",       EXITTYPE },
133 	{ "syscmd",       SYSCTYPE },
134 	{ "sysval",       SYSVTYPE },
135 
136 #ifdef unix
137 	{ "unix",         MACRTYPE },
138 #else
139 #ifdef vms
140 	{ "vms",          MACRTYPE },
141 #endif
142 #endif
143 };
144 
145 #define MAXKEYS	(sizeof(keywrds)/sizeof(struct keyblk))
146 
147 int	do_look_ahead __P((int, char *));
148 ndptr	inspect __P((char *));
149 void	initkwds __P((void));
150 void	macro __P((void));
151 int	main __P((int, char **));
152 
153 int
154 main(argc,argv)
155 	int argc;
156 	char *argv[];
157 {
158 	int c;
159 	int n;
160 	char *p;
161 	FILE *ifp;
162 
163 	progname = basename(argv[0]);
164 
165 	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
166 		signal(SIGINT, onintr);
167 
168 	initkwds();
169 
170 	while ((c = getopt(argc, argv, "tD:U:o:")) != -1)
171 		switch(c) {
172 
173 		case 'D':               /* define something..*/
174 			for (p = optarg; *p; p++)
175 				if (*p == '=')
176 					break;
177 			if (*p)
178 				*p++ = EOS;
179 			dodefine(optarg, p);
180 			break;
181 		case 'U':               /* undefine...       */
182 			remhash(optarg, TOP);
183 			break;
184 		case 'o':		/* specific output   */
185 		case '?':
186 			usage();
187 		}
188 
189         argc -= optind;
190         argv += optind;
191 
192 	active = stdout;		/* default active output     */
193 					/* filename for diversions   */
194 	m4temp = mktemp(xstrdup(_PATH_DIVNAME));
195 
196 	bbase[0] = bufbase;
197         if (!argc) {
198  		sp = -1;		/* stack pointer initialized */
199 		fp = 0; 		/* frame pointer initialized */
200 		infile[0] = stdin;	/* default input (naturally) */
201 		macro();
202 	} else
203 		for (; argc--; ++argv) {
204 			p = *argv;
205 			if (p[0] == '-' && p[1] == '\0')
206 				ifp = stdin;
207 			else if ((ifp = fopen(p, "r")) == NULL)
208 				err(1, "%s", p);
209 			sp = -1;
210 			fp = 0;
211 			infile[0] = ifp;
212 			macro();
213 			if (ifp != stdin)
214 				(void)fclose(ifp);
215 		}
216 
217 	if (*m4wraps) { 		/* anything for rundown ??   */
218 		ilevel = 0;		/* in case m4wrap includes.. */
219 		bufbase = bp = buf;	/* use the entire buffer   */
220 		putbackeof();		/* eof is a must !!	     */
221 		pbstr(m4wraps); 	/* user-defined wrapup act   */
222 		macro();		/* last will and testament   */
223 	}
224 
225 	if (active != stdout)
226 		active = stdout;	/* reset output just in case */
227 	for (n = 1; n < MAXOUT; n++)	/* default wrap-up: undivert */
228 		if (outfile[n] != NULL)
229 			getdiv(n);
230 					/* remove bitbucket if used  */
231 	if (outfile[0] != NULL) {
232 		(void) fclose(outfile[0]);
233 		m4temp[UNIQUE] = '0';
234 #ifdef vms
235 		(void) remove(m4temp);
236 #else
237 		(void) unlink(m4temp);
238 #endif
239 	}
240 
241 	return 0;
242 }
243 
244 /*
245  * Look ahead (at most MAXCCHARS characters) for `token'.
246  * (on input `t == token[0]')
247  * Used for comment and quoting delimiters.
248  * Returns 1 if `token' present; copied to output.
249  *         0 if `token' not found; all characters pushed back
250  */
251 int
252 do_look_ahead(t, token)
253 	int	t;
254 	char	*token;
255 {
256 	int i;
257 
258 	if (t != token[0])
259 		errx(1, "internal error");
260 
261 	for (i = 1; *++token; i++) {
262 		t = gpbc();
263 		if (t == EOF || t != *token) {
264 			if (t != EOF)
265 				putback(t);
266 			while (--i)
267 				putback(*--token);
268 			return 0;
269 		}
270 	}
271 	return 1;
272 }
273 
274 #define LOOK_AHEAD(t, token) ((t)==(token)[0] && do_look_ahead(t,token))
275 
276 /*
277  * macro - the work horse..
278  */
279 void
280 macro()
281 {
282 	char token[MAXTOK], chars[2];
283 	char *s;
284 	int t, l;
285 	ndptr p;
286 	int  nlpar;
287 
288 	s = NULL;
289 	cycle {
290 		t = gpbc();
291 		if (t == '_' || isalpha(t)) {
292 			putback(t);
293 			if ((p = inspect(s = token)) == nil) {
294 				if (sp < 0)
295 					while (*s)
296 						putc(*s++, active);
297 				else
298 					while (*s)
299 						chrsave(*s++);
300 			}
301 			else {
302 		/*
303 		 * real thing.. First build a call frame:
304 		 */
305 				pushf(fp);	/* previous call frm */
306 				pushf(p->type); /* type of the call  */
307 				pushf(0);	/* parenthesis level */
308 				fp = sp;	/* new frame pointer */
309 		/*
310 		 * now push the string arguments:
311 		 */
312 				pushs(p->defn);	      /* defn string */
313 				pushs(p->name);	      /* macro name  */
314 				pushs(ep);	      /* start next..*/
315 
316 				putback(l = gpbc());
317 				if (l != LPAREN)  {   /* add bracks  */
318 					putback(RPAREN);
319 					putback(LPAREN);
320 				}
321 			}
322 		}
323 		else if (t == EOF) {
324 			if (sp > -1)
325 				errx(1, "unexpected end of input");
326 			if (ilevel <= 0)
327 				break;			/* all done thanks.. */
328 			--ilevel;
329 			(void) fclose(infile[ilevel+1]);
330 			bufbase = bbase[ilevel];
331 			continue;
332 		}
333 	/*
334 	 * non-alpha token possibly seen..
335 	 * [the order of else if .. stmts is important.]
336 	 */
337 		else if (LOOK_AHEAD(t,lquote)) {	/* strip quotes */
338 			nlpar = 1;
339 			do {
340 
341 				l = gpbc();
342 				if (LOOK_AHEAD(l,rquote)) {
343 					nlpar--;
344 					s = rquote;
345 				} else if (LOOK_AHEAD(l,lquote)) {
346 					nlpar++;
347 					s = lquote;
348 				} else if (l == EOF)
349 					errx(1, "missing right quote");
350 				else {
351 					chars[0] = l;
352 					chars[1] = '\0';
353 					s = chars;
354 				}
355 				if (nlpar > 0) {
356 					if (sp < 0)
357 						while (*s)
358 							putc(*s++, active);
359 					else
360 						while (*s)
361 							chrsave(*s++);
362 				}
363 			}
364 			while (nlpar != 0);
365 		}
366 
367 		else if (sp < 0 && LOOK_AHEAD(t, scommt)) {
368 			int i;
369 			for (i = 0; i < MAXCCHARS && scommt[i]; i++)
370 				putc(scommt[i], active);
371 
372 			for(;;) {
373 				t = gpbc();
374 				if (LOOK_AHEAD(t, ecommt)) {
375 					for (i = 0; i < MAXCCHARS && ecommt[i];
376 					     i++)
377 						putc(ecommt[i], active);
378 					break;
379 				}
380 				if (t == EOF)
381 					break;
382 				putc(t, active);
383 			}
384 		}
385 
386 		else if (sp < 0) {		/* not in a macro at all */
387 			putc(t, active);	/* output directly..	 */
388 		}
389 
390 		else switch(t) {
391 
392 		case LPAREN:
393 			if (PARLEV > 0)
394 				chrsave(t);
395 			while (isspace(l = gpbc()))
396 				;		/* skip blank, tab, nl.. */
397 			putback(l);
398 			PARLEV++;
399 			break;
400 
401 		case RPAREN:
402 			if (--PARLEV > 0)
403 				chrsave(t);
404 			else {			/* end of argument list */
405 				chrsave(EOS);
406 
407 				if (sp == STACKMAX)
408 					errx(1, "internal stack overflow");
409 
410 				if (CALTYP == MACRTYPE)
411 					expand((char **) mstack+fp+1, sp-fp);
412 				else
413 					eval((char **) mstack+fp+1, sp-fp, CALTYP);
414 
415 				ep = PREVEP;	/* flush strspace */
416 				sp = PREVSP;	/* previous sp..  */
417 				fp = PREVFP;	/* rewind stack...*/
418 			}
419 			break;
420 
421 		case COMMA:
422 			if (PARLEV == 1) {
423 				chrsave(EOS);		/* new argument   */
424 				while (isspace(l = gpbc()))
425 					;
426 				putback(l);
427 				pushs(ep);
428 			} else
429 				chrsave(t);
430 			break;
431 
432 		default:
433 			chrsave(t);			/* stack the char */
434 			break;
435 		}
436 	}
437 }
438 
439 /*
440  * build an input token..
441  * consider only those starting with _ or A-Za-z. This is a
442  * combo with lookup to speed things up.
443  */
444 ndptr
445 inspect(tp)
446 	char *tp;
447 {
448 	char c;
449 	char *name = tp;
450 	char *etp = tp+MAXTOK;
451 	ndptr p;
452 	unsigned long h = 0;
453 
454 	while ((isalnum((unsigned char)(c = gpbc())) || c == '_') && tp < etp)
455 		h = (h << 5) + h + (*tp++ = c);
456 	putback(c);
457 	if (tp == etp)
458 		errx(1, "token too long");
459 
460 	*tp = EOS;
461 
462 	for (p = hashtab[h%HASHSIZE]; p != nil; p = p->nxtptr)
463 		if (STREQ(name, p->name))
464 			break;
465 	return p;
466 }
467 
468 /*
469  * initkwds - initialise m4 keywords as fast as possible.
470  * This very similar to install, but without certain overheads,
471  * such as calling lookup. Malloc is not used for storing the
472  * keyword strings, since we simply use the static pointers
473  * within keywrds block.
474  */
475 void
476 initkwds()
477 {
478 	int i;
479 	int h;
480 	ndptr p;
481 
482 	for (i = 0; i < MAXKEYS; i++) {
483 		h = hash(keywrds[i].knam);
484 		p = (ndptr) xalloc(sizeof(struct ndblock));
485 		p->nxtptr = hashtab[h];
486 		hashtab[h] = p;
487 		p->name = keywrds[i].knam;
488 		p->defn = null;
489 		p->type = keywrds[i].ktyp | STATIC;
490 	}
491 }
492