xref: /netbsd-src/usr.bin/m4/main.c (revision 76dfffe33547c37f8bdd446e3e4ab0f3c16cea4b)
1 /*	$NetBSD: main.c,v 1.11 1996/01/13 23:25:26 pk Exp $	*/
2 
3 /*-
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Ozan Yigit at York University.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38 
39 #ifndef lint
40 static char copyright[] =
41 "@(#) Copyright (c) 1989, 1993\n\
42 	The Regents of the University of California.  All rights reserved.\n";
43 #endif /* not lint */
44 
45 #ifndef lint
46 #if 0
47 static char sccsid[] = "@(#)main.c	8.1 (Berkeley) 6/6/93";
48 #else
49 static char rcsid[] = "$NetBSD: main.c,v 1.11 1996/01/13 23:25:26 pk Exp $";
50 #endif
51 #endif /* not lint */
52 
53 /*
54  * main.c
55  * Facility: m4 macro processor
56  * by: oz
57  */
58 
59 #include <sys/types.h>
60 #include <signal.h>
61 #include <errno.h>
62 #include <unistd.h>
63 #include <stdio.h>
64 #include <ctype.h>
65 #include <string.h>
66 #include "mdef.h"
67 #include "stdd.h"
68 #include "extern.h"
69 #include "pathnames.h"
70 
71 ndptr hashtab[HASHSIZE];	/* hash table for macros etc.  */
72 char buf[BUFSIZE];		/* push-back buffer	       */
73 char *bufbase = buf;		/* the base for current ilevel */
74 char *bbase[MAXINP];		/* the base for each ilevel    */
75 char *bp = buf; 		/* first available character   */
76 char *endpbb = buf+BUFSIZE;	/* end of push-back buffer     */
77 stae mstack[STACKMAX+1]; 	/* stack of m4 machine         */
78 char strspace[STRSPMAX+1];	/* string space for evaluation */
79 char *ep = strspace;		/* first free char in strspace */
80 char *endest= strspace+STRSPMAX;/* end of string space	       */
81 int sp; 			/* current m4  stack pointer   */
82 int fp; 			/* m4 call frame pointer       */
83 FILE *infile[MAXINP];		/* input file stack (0=stdin)  */
84 FILE *outfile[MAXOUT];		/* diversion array(0=bitbucket)*/
85 FILE *active;			/* active output file pointer  */
86 char *m4temp;			/* filename for diversions     */
87 int ilevel = 0; 		/* input file stack pointer    */
88 int oindex = 0; 		/* diversion index..	       */
89 char *null = "";                /* as it says.. just a null..  */
90 char *m4wraps = "";             /* m4wrap string default..     */
91 char *progname;			/* name of this program        */
92 char lquote[MAXCCHARS+1] = {LQUOTE};	/* left quote character  (`)   */
93 char rquote[MAXCCHARS+1] = {RQUOTE};	/* right quote character (')   */
94 char scommt[MAXCCHARS+1] = {SCOMMT};	/* start character for comment */
95 char ecommt[MAXCCHARS+1] = {ECOMMT};	/* end character for comment   */
96 
97 struct keyblk keywrds[] = {	/* m4 keywords to be installed */
98 	"include",      INCLTYPE,
99 	"sinclude",     SINCTYPE,
100 	"define",       DEFITYPE,
101 	"defn",         DEFNTYPE,
102 	"divert",       DIVRTYPE,
103 	"expr",         EXPRTYPE,
104 	"eval",         EXPRTYPE,
105 	"substr",       SUBSTYPE,
106 	"ifelse",       IFELTYPE,
107 	"ifdef",        IFDFTYPE,
108 	"len",          LENGTYPE,
109 	"incr",         INCRTYPE,
110 	"decr",         DECRTYPE,
111 	"dnl",          DNLNTYPE,
112 	"changequote",  CHNQTYPE,
113 	"changecom",    CHNCTYPE,
114 	"index",        INDXTYPE,
115 #ifdef EXTENDED
116 	"paste",        PASTTYPE,
117 	"spaste",       SPASTYPE,
118 #endif
119 	"popdef",       POPDTYPE,
120 	"pushdef",      PUSDTYPE,
121 	"dumpdef",      DUMPTYPE,
122 	"shift",        SHIFTYPE,
123 	"translit",     TRNLTYPE,
124 	"undefine",     UNDFTYPE,
125 	"undivert",     UNDVTYPE,
126 	"divnum",       DIVNTYPE,
127 	"maketemp",     MKTMTYPE,
128 	"errprint",     ERRPTYPE,
129 	"m4wrap",       M4WRTYPE,
130 	"m4exit",       EXITTYPE,
131 	"syscmd",       SYSCTYPE,
132 	"sysval",       SYSVTYPE,
133 
134 #if defined(unix) || defined(__NetBSD__)
135 	"unix",         MACRTYPE,
136 #else
137 #ifdef vms
138 	"vms",          MACRTYPE,
139 #endif
140 #endif
141 };
142 
143 #define MAXKEYS	(sizeof(keywrds)/sizeof(struct keyblk))
144 
145 extern int optind;
146 extern char *optarg;
147 
148 void macro();
149 void initkwds();
150 extern int getopt();
151 
152 int
153 main(argc,argv)
154 	int argc;
155 	char *argv[];
156 {
157 	register int c;
158 	register int n;
159 	char *p;
160 	register FILE *ifp;
161 
162 	progname = basename(argv[0]);
163 
164 	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
165 		signal(SIGINT, onintr);
166 
167 	initkwds();
168 
169 	while ((c = getopt(argc, argv, "tD:U:o:")) != EOF)
170 		switch(c) {
171 
172 		case 'D':               /* define something..*/
173 			for (p = optarg; *p; p++)
174 				if (*p == '=')
175 					break;
176 			if (*p)
177 				*p++ = EOS;
178 			dodefine(optarg, p);
179 			break;
180 		case 'U':               /* undefine...       */
181 			remhash(optarg, TOP);
182 			break;
183 		case 'o':		/* specific output   */
184 		case '?':
185 			usage();
186 		}
187 
188         argc -= optind;
189         argv += optind;
190 
191 	active = stdout;		/* default active output     */
192 					/* filename for diversions   */
193 	m4temp = mktemp(xstrdup(_PATH_DIVNAME));
194 
195 	bbase[0] = bufbase;
196         if (!argc) {
197  		sp = -1;		/* stack pointer initialized */
198 		fp = 0; 		/* frame pointer initialized */
199 		infile[0] = stdin;	/* default input (naturally) */
200 		macro();
201 	} else
202 		for (; argc--; ++argv) {
203 			p = *argv;
204 			if (p[0] == '-' && p[1] == '\0')
205 				ifp = stdin;
206 			else if ((ifp = fopen(p, "r")) == NULL)
207 				oops("%s: %s", p, strerror(errno));
208 			sp = -1;
209 			fp = 0;
210 			infile[0] = ifp;
211 			macro();
212 			if (ifp != stdin)
213 				(void)fclose(ifp);
214 		}
215 
216 	if (*m4wraps) { 		/* anything for rundown ??   */
217 		ilevel = 0;		/* in case m4wrap includes.. */
218 		bufbase = bp = buf;	/* use the entire buffer   */
219 		putback(EOF);		/* eof is a must !!	     */
220 		pbstr(m4wraps); 	/* user-defined wrapup act   */
221 		macro();		/* last will and testament   */
222 	}
223 
224 	if (active != stdout)
225 		active = stdout;	/* reset output just in case */
226 	for (n = 1; n < MAXOUT; n++)	/* default wrap-up: undivert */
227 		if (outfile[n] != NULL)
228 			getdiv(n);
229 					/* remove bitbucket if used  */
230 	if (outfile[0] != NULL) {
231 		(void) fclose(outfile[0]);
232 		m4temp[UNIQUE] = '0';
233 #ifdef vms
234 		(void) remove(m4temp);
235 #else
236 		(void) unlink(m4temp);
237 #endif
238 	}
239 
240 	return 0;
241 }
242 
243 ndptr inspect();
244 
245 /*
246  * Look ahead (at most MAXCCHARS characters) for `token'.
247  * (on input `t == token[0]')
248  * Used for comment and quoting delimiters.
249  * Returns 1 if `token' present; copied to output.
250  *         0 if `token' not found; all characters pushed back
251  */
252 int
253 do_look_ahead(t, token)
254 	int	t;
255 	char	*token;
256 {
257 	int i;
258 
259 	if (t != token[0])
260 		oops("internal error", "");
261 
262 	for (i = 1; *++token; i++) {
263 		t = gpbc();
264 		if (t == EOF || t != *token) {
265 			if (t != EOF)
266 				putback(t);
267 			while (--i)
268 				putback(*--token);
269 			return 0;
270 		}
271 	}
272 	return 1;
273 }
274 
275 #define LOOK_AHEAD(t, token) ((t)==(token)[0] && do_look_ahead(t,token))
276 
277 /*
278  * macro - the work horse..
279  */
280 void
281 macro() {
282 	char token[MAXTOK];
283 	register char *s;
284 	register int t, l;
285 	register ndptr p;
286 	register int  nlpar;
287 
288 	cycle {
289 		t = gpbc();
290 		if (t == '_' || isalpha(t)) {
291 			putback(t);
292 			if ((p = inspect(s = token)) == nil) {
293 				if (sp < 0)
294 					while (*s)
295 						putc(*s++, active);
296 				else
297 					while (*s)
298 						chrsave(*s++);
299 			}
300 			else {
301 		/*
302 		 * real thing.. First build a call frame:
303 		 */
304 				pushf(fp);	/* previous call frm */
305 				pushf(p->type); /* type of the call  */
306 				pushf(0);	/* parenthesis level */
307 				fp = sp;	/* new frame pointer */
308 		/*
309 		 * now push the string arguments:
310 		 */
311 				pushs(p->defn);	      /* defn string */
312 				pushs(p->name);	      /* macro name  */
313 				pushs(ep);	      /* start next..*/
314 
315 				putback(l = gpbc());
316 				if (l != LPAREN)  {   /* add bracks  */
317 					putback(RPAREN);
318 					putback(LPAREN);
319 				}
320 			}
321 		}
322 		else if (t == EOF) {
323 			if (sp > -1)
324 				oops("unexpected end of input", "");
325 			if (ilevel <= 0)
326 				break;			/* all done thanks.. */
327 			--ilevel;
328 			(void) fclose(infile[ilevel+1]);
329 			bufbase = bbase[ilevel];
330 			continue;
331 		}
332 	/*
333 	 * non-alpha single-char token seen..
334 	 * [the order of else if .. stmts is important.]
335 	 */
336 		else if (LOOK_AHEAD(t,lquote)) {	/* strip quotes */
337 			nlpar = 1;
338 			do {
339 				l = gpbc();
340 				if (LOOK_AHEAD(l,rquote))
341 					nlpar--;
342 				else if (LOOK_AHEAD(l,lquote))
343 					nlpar++;
344 				else if (l == EOF)
345 					oops("missing right quote", "");
346 				if (nlpar > 0) {
347 					if (sp < 0)
348 						putc(l, active);
349 					else
350 						chrsave(l);
351 				}
352 			}
353 			while (nlpar != 0);
354 		}
355 
356 		else if (sp < 0 && LOOK_AHEAD(t, scommt)) {
357 			int i;
358 			for (i = 0; i < MAXCCHARS && scommt[i]; i++)
359 				putc(scommt[i], active);
360 
361 			for(;;) {
362 				t = gpbc();
363 				if (LOOK_AHEAD(t, ecommt)) {
364 					for (i = 0; i < MAXCCHARS && ecommt[i];
365 					     i++)
366 						putc(ecommt[i], active);
367 					break;
368 				}
369 				if (t == EOF)
370 					break;
371 				putc(t, active);
372 			}
373 		}
374 
375 		else if (sp < 0) {		/* not in a macro at all */
376 			putc(t, active);	/* output directly..	 */
377 		}
378 
379 		else switch(t) {
380 
381 		case LPAREN:
382 			if (PARLEV > 0)
383 				chrsave(t);
384 			while (isspace(l = gpbc()))
385 				;		/* skip blank, tab, nl.. */
386 			putback(l);
387 			PARLEV++;
388 			break;
389 
390 		case RPAREN:
391 			if (--PARLEV > 0)
392 				chrsave(t);
393 			else {			/* end of argument list */
394 				chrsave(EOS);
395 
396 				if (sp == STACKMAX)
397 					oops("internal stack overflow", "");
398 
399 				if (CALTYP == MACRTYPE)
400 					expand((char **) mstack+fp+1, sp-fp);
401 				else
402 					eval((char **) mstack+fp+1, sp-fp, CALTYP);
403 
404 				ep = PREVEP;	/* flush strspace */
405 				sp = PREVSP;	/* previous sp..  */
406 				fp = PREVFP;	/* rewind stack...*/
407 			}
408 			break;
409 
410 		case COMMA:
411 			if (PARLEV == 1) {
412 				chrsave(EOS);		/* new argument   */
413 				while (isspace(l = gpbc()))
414 					;
415 				putback(l);
416 				pushs(ep);
417 			} else
418 				chrsave(t);
419 			break;
420 
421 		default:
422 			chrsave(t);			/* stack the char */
423 			break;
424 		}
425 	}
426 }
427 
428 /*
429  * build an input token..
430  * consider only those starting with _ or A-Za-z. This is a
431  * combo with lookup to speed things up.
432  */
433 ndptr
434 inspect(tp)
435 register char *tp;
436 {
437 	register char c;
438 	register char *name = tp;
439 	register char *etp = tp+MAXTOK;
440 	register ndptr p;
441 	register unsigned long h = 0;
442 
443 	while ((isalnum(c = gpbc()) || c == '_') && tp < etp)
444 		h = (h << 5) + h + (*tp++ = c);
445 	putback(c);
446 	if (tp == etp)
447 		oops("token too long", "");
448 
449 	*tp = EOS;
450 
451 	for (p = hashtab[h%HASHSIZE]; p != nil; p = p->nxtptr)
452 		if (STREQ(name, p->name))
453 			break;
454 	return p;
455 }
456 
457 /*
458  * initkwds - initialise m4 keywords as fast as possible.
459  * This very similar to install, but without certain overheads,
460  * such as calling lookup. Malloc is not used for storing the
461  * keyword strings, since we simply use the static  pointers
462  * within keywrds block.
463  */
464 void
465 initkwds() {
466 	register int i;
467 	register int h;
468 	register ndptr p;
469 
470 	for (i = 0; i < MAXKEYS; i++) {
471 		h = hash(keywrds[i].knam);
472 		p = (ndptr) xalloc(sizeof(struct ndblock));
473 		p->nxtptr = hashtab[h];
474 		hashtab[h] = p;
475 		p->name = keywrds[i].knam;
476 		p->defn = null;
477 		p->type = keywrds[i].ktyp | STATIC;
478 	}
479 }
480