xref: /netbsd-src/usr.bin/m4/main.c (revision d0fed6c87ddc40a8bffa6f99e7433ddfc864dd83)
1 /*	$NetBSD: main.c,v 1.12 1997/02/08 23:54:49 cgd Exp $	*/
2 
3 /*-
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Ozan Yigit at York University.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38 
39 #ifndef lint
40 static char copyright[] =
41 "@(#) Copyright (c) 1989, 1993\n\
42 	The Regents of the University of California.  All rights reserved.\n";
43 #endif /* not lint */
44 
45 #ifndef lint
46 #if 0
47 static char sccsid[] = "@(#)main.c	8.1 (Berkeley) 6/6/93";
48 #else
49 static char rcsid[] = "$NetBSD: main.c,v 1.12 1997/02/08 23:54:49 cgd Exp $";
50 #endif
51 #endif /* not lint */
52 
53 /*
54  * main.c
55  * Facility: m4 macro processor
56  * by: oz
57  */
58 
59 #include <sys/types.h>
60 #include <signal.h>
61 #include <errno.h>
62 #include <unistd.h>
63 #include <stdio.h>
64 #include <ctype.h>
65 #include <string.h>
66 #include "mdef.h"
67 #include "stdd.h"
68 #include "extern.h"
69 #include "pathnames.h"
70 
71 ndptr hashtab[HASHSIZE];	/* hash table for macros etc.  */
72 char buf[BUFSIZE];		/* push-back buffer	       */
73 char *bufbase = buf;		/* the base for current ilevel */
74 char *bbase[MAXINP];		/* the base for each ilevel    */
75 char *bp = buf; 		/* first available character   */
76 char *endpbb = buf+BUFSIZE;	/* end of push-back buffer     */
77 stae mstack[STACKMAX+1]; 	/* stack of m4 machine         */
78 char strspace[STRSPMAX+1];	/* string space for evaluation */
79 char *ep = strspace;		/* first free char in strspace */
80 char *endest= strspace+STRSPMAX;/* end of string space	       */
81 int sp; 			/* current m4  stack pointer   */
82 int fp; 			/* m4 call frame pointer       */
83 FILE *infile[MAXINP];		/* input file stack (0=stdin)  */
84 FILE *outfile[MAXOUT];		/* diversion array(0=bitbucket)*/
85 FILE *active;			/* active output file pointer  */
86 char *m4temp;			/* filename for diversions     */
87 int ilevel = 0; 		/* input file stack pointer    */
88 int oindex = 0; 		/* diversion index..	       */
89 char *null = "";                /* as it says.. just a null..  */
90 char *m4wraps = "";             /* m4wrap string default..     */
91 char *progname;			/* name of this program        */
92 char lquote[MAXCCHARS+1] = {LQUOTE};	/* left quote character  (`)   */
93 char rquote[MAXCCHARS+1] = {RQUOTE};	/* right quote character (')   */
94 char scommt[MAXCCHARS+1] = {SCOMMT};	/* start character for comment */
95 char ecommt[MAXCCHARS+1] = {ECOMMT};	/* end character for comment   */
96 
97 struct keyblk keywrds[] = {	/* m4 keywords to be installed */
98 	"include",      INCLTYPE,
99 	"sinclude",     SINCTYPE,
100 	"define",       DEFITYPE,
101 	"defn",         DEFNTYPE,
102 	"divert",       DIVRTYPE,
103 	"expr",         EXPRTYPE,
104 	"eval",         EXPRTYPE,
105 	"substr",       SUBSTYPE,
106 	"ifelse",       IFELTYPE,
107 	"ifdef",        IFDFTYPE,
108 	"len",          LENGTYPE,
109 	"incr",         INCRTYPE,
110 	"decr",         DECRTYPE,
111 	"dnl",          DNLNTYPE,
112 	"changequote",  CHNQTYPE,
113 	"changecom",    CHNCTYPE,
114 	"index",        INDXTYPE,
115 #ifdef EXTENDED
116 	"paste",        PASTTYPE,
117 	"spaste",       SPASTYPE,
118 #endif
119 	"popdef",       POPDTYPE,
120 	"pushdef",      PUSDTYPE,
121 	"dumpdef",      DUMPTYPE,
122 	"shift",        SHIFTYPE,
123 	"translit",     TRNLTYPE,
124 	"undefine",     UNDFTYPE,
125 	"undivert",     UNDVTYPE,
126 	"divnum",       DIVNTYPE,
127 	"maketemp",     MKTMTYPE,
128 	"errprint",     ERRPTYPE,
129 	"m4wrap",       M4WRTYPE,
130 	"m4exit",       EXITTYPE,
131 	"syscmd",       SYSCTYPE,
132 	"sysval",       SYSVTYPE,
133 
134 #if defined(unix) || defined(__NetBSD__)
135 	"unix",         MACRTYPE,
136 #else
137 #ifdef vms
138 	"vms",          MACRTYPE,
139 #endif
140 #endif
141 };
142 
143 #define MAXKEYS	(sizeof(keywrds)/sizeof(struct keyblk))
144 
145 extern int optind;
146 extern char *optarg;
147 
148 void macro();
149 void initkwds();
150 extern int getopt();
151 
152 int
153 main(argc,argv)
154 	int argc;
155 	char *argv[];
156 {
157 	register int c;
158 	register int n;
159 	char *p;
160 	register FILE *ifp;
161 
162 	progname = basename(argv[0]);
163 
164 	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
165 		signal(SIGINT, onintr);
166 
167 	initkwds();
168 
169 	while ((c = getopt(argc, argv, "tD:U:o:")) != EOF)
170 		switch(c) {
171 
172 		case 'D':               /* define something..*/
173 			for (p = optarg; *p; p++)
174 				if (*p == '=')
175 					break;
176 			if (*p)
177 				*p++ = EOS;
178 			dodefine(optarg, p);
179 			break;
180 		case 'U':               /* undefine...       */
181 			remhash(optarg, TOP);
182 			break;
183 		case 'o':		/* specific output   */
184 		case '?':
185 			usage();
186 		}
187 
188         argc -= optind;
189         argv += optind;
190 
191 	active = stdout;		/* default active output     */
192 					/* filename for diversions   */
193 	m4temp = mktemp(xstrdup(_PATH_DIVNAME));
194 
195 	bbase[0] = bufbase;
196         if (!argc) {
197  		sp = -1;		/* stack pointer initialized */
198 		fp = 0; 		/* frame pointer initialized */
199 		infile[0] = stdin;	/* default input (naturally) */
200 		macro();
201 	} else
202 		for (; argc--; ++argv) {
203 			p = *argv;
204 			if (p[0] == '-' && p[1] == '\0')
205 				ifp = stdin;
206 			else if ((ifp = fopen(p, "r")) == NULL)
207 				oops("%s: %s", p, strerror(errno));
208 			sp = -1;
209 			fp = 0;
210 			infile[0] = ifp;
211 			macro();
212 			if (ifp != stdin)
213 				(void)fclose(ifp);
214 		}
215 
216 	if (*m4wraps) { 		/* anything for rundown ??   */
217 		ilevel = 0;		/* in case m4wrap includes.. */
218 		bufbase = bp = buf;	/* use the entire buffer   */
219 		putback(EOF);		/* eof is a must !!	     */
220 		pbstr(m4wraps); 	/* user-defined wrapup act   */
221 		macro();		/* last will and testament   */
222 	}
223 
224 	if (active != stdout)
225 		active = stdout;	/* reset output just in case */
226 	for (n = 1; n < MAXOUT; n++)	/* default wrap-up: undivert */
227 		if (outfile[n] != NULL)
228 			getdiv(n);
229 					/* remove bitbucket if used  */
230 	if (outfile[0] != NULL) {
231 		(void) fclose(outfile[0]);
232 		m4temp[UNIQUE] = '0';
233 #ifdef vms
234 		(void) remove(m4temp);
235 #else
236 		(void) unlink(m4temp);
237 #endif
238 	}
239 
240 	return 0;
241 }
242 
243 ndptr inspect();
244 
245 /*
246  * Look ahead (at most MAXCCHARS characters) for `token'.
247  * (on input `t == token[0]')
248  * Used for comment and quoting delimiters.
249  * Returns 1 if `token' present; copied to output.
250  *         0 if `token' not found; all characters pushed back
251  */
252 int
253 do_look_ahead(t, token)
254 	int	t;
255 	char	*token;
256 {
257 	int i;
258 
259 	if (t != token[0])
260 		oops("internal error", "");
261 
262 	for (i = 1; *++token; i++) {
263 		t = gpbc();
264 		if (t == EOF || t != *token) {
265 			if (t != EOF)
266 				putback(t);
267 			while (--i)
268 				putback(*--token);
269 			return 0;
270 		}
271 	}
272 	return 1;
273 }
274 
275 #define LOOK_AHEAD(t, token) ((t)==(token)[0] && do_look_ahead(t,token))
276 
277 /*
278  * macro - the work horse..
279  */
280 void
281 macro() {
282 	char token[MAXTOK], chars[2];
283 	register char *s;
284 	register int t, l;
285 	register ndptr p;
286 	register int  nlpar;
287 
288 	cycle {
289 		t = gpbc();
290 		if (t == '_' || isalpha(t)) {
291 			putback(t);
292 			if ((p = inspect(s = token)) == nil) {
293 				if (sp < 0)
294 					while (*s)
295 						putc(*s++, active);
296 				else
297 					while (*s)
298 						chrsave(*s++);
299 			}
300 			else {
301 		/*
302 		 * real thing.. First build a call frame:
303 		 */
304 				pushf(fp);	/* previous call frm */
305 				pushf(p->type); /* type of the call  */
306 				pushf(0);	/* parenthesis level */
307 				fp = sp;	/* new frame pointer */
308 		/*
309 		 * now push the string arguments:
310 		 */
311 				pushs(p->defn);	      /* defn string */
312 				pushs(p->name);	      /* macro name  */
313 				pushs(ep);	      /* start next..*/
314 
315 				putback(l = gpbc());
316 				if (l != LPAREN)  {   /* add bracks  */
317 					putback(RPAREN);
318 					putback(LPAREN);
319 				}
320 			}
321 		}
322 		else if (t == EOF) {
323 			if (sp > -1)
324 				oops("unexpected end of input", "");
325 			if (ilevel <= 0)
326 				break;			/* all done thanks.. */
327 			--ilevel;
328 			(void) fclose(infile[ilevel+1]);
329 			bufbase = bbase[ilevel];
330 			continue;
331 		}
332 	/*
333 	 * non-alpha token possibly seen..
334 	 * [the order of else if .. stmts is important.]
335 	 */
336 		else if (LOOK_AHEAD(t,lquote)) {	/* strip quotes */
337 			nlpar = 1;
338 			do {
339 
340 				l = gpbc();
341 				if (LOOK_AHEAD(l,rquote)) {
342 					nlpar--;
343 					s = rquote;
344 				} else if (LOOK_AHEAD(l,lquote)) {
345 					nlpar++;
346 					s = lquote;
347 				} else if (l == EOF)
348 					oops("missing right quote", "");
349 				else {
350 					chars[0] = l;
351 					chars[1] = '\0';
352 					s = chars;
353 				}
354 				if (nlpar > 0) {
355 					if (sp < 0)
356 						while (*s)
357 							putc(*s++, active);
358 					else
359 						while (*s)
360 							chrsave(*s++);
361 				}
362 			}
363 			while (nlpar != 0);
364 		}
365 
366 		else if (sp < 0 && LOOK_AHEAD(t, scommt)) {
367 			int i;
368 			for (i = 0; i < MAXCCHARS && scommt[i]; i++)
369 				putc(scommt[i], active);
370 
371 			for(;;) {
372 				t = gpbc();
373 				if (LOOK_AHEAD(t, ecommt)) {
374 					for (i = 0; i < MAXCCHARS && ecommt[i];
375 					     i++)
376 						putc(ecommt[i], active);
377 					break;
378 				}
379 				if (t == EOF)
380 					break;
381 				putc(t, active);
382 			}
383 		}
384 
385 		else if (sp < 0) {		/* not in a macro at all */
386 			putc(t, active);	/* output directly..	 */
387 		}
388 
389 		else switch(t) {
390 
391 		case LPAREN:
392 			if (PARLEV > 0)
393 				chrsave(t);
394 			while (isspace(l = gpbc()))
395 				;		/* skip blank, tab, nl.. */
396 			putback(l);
397 			PARLEV++;
398 			break;
399 
400 		case RPAREN:
401 			if (--PARLEV > 0)
402 				chrsave(t);
403 			else {			/* end of argument list */
404 				chrsave(EOS);
405 
406 				if (sp == STACKMAX)
407 					oops("internal stack overflow", "");
408 
409 				if (CALTYP == MACRTYPE)
410 					expand((char **) mstack+fp+1, sp-fp);
411 				else
412 					eval((char **) mstack+fp+1, sp-fp, CALTYP);
413 
414 				ep = PREVEP;	/* flush strspace */
415 				sp = PREVSP;	/* previous sp..  */
416 				fp = PREVFP;	/* rewind stack...*/
417 			}
418 			break;
419 
420 		case COMMA:
421 			if (PARLEV == 1) {
422 				chrsave(EOS);		/* new argument   */
423 				while (isspace(l = gpbc()))
424 					;
425 				putback(l);
426 				pushs(ep);
427 			} else
428 				chrsave(t);
429 			break;
430 
431 		default:
432 			chrsave(t);			/* stack the char */
433 			break;
434 		}
435 	}
436 }
437 
438 /*
439  * build an input token..
440  * consider only those starting with _ or A-Za-z. This is a
441  * combo with lookup to speed things up.
442  */
443 ndptr
444 inspect(tp)
445 register char *tp;
446 {
447 	register char c;
448 	register char *name = tp;
449 	register char *etp = tp+MAXTOK;
450 	register ndptr p;
451 	register unsigned long h = 0;
452 
453 	while ((isalnum(c = gpbc()) || c == '_') && tp < etp)
454 		h = (h << 5) + h + (*tp++ = c);
455 	putback(c);
456 	if (tp == etp)
457 		oops("token too long", "");
458 
459 	*tp = EOS;
460 
461 	for (p = hashtab[h%HASHSIZE]; p != nil; p = p->nxtptr)
462 		if (STREQ(name, p->name))
463 			break;
464 	return p;
465 }
466 
467 /*
468  * initkwds - initialise m4 keywords as fast as possible.
469  * This very similar to install, but without certain overheads,
470  * such as calling lookup. Malloc is not used for storing the
471  * keyword strings, since we simply use the static  pointers
472  * within keywrds block.
473  */
474 void
475 initkwds() {
476 	register int i;
477 	register int h;
478 	register ndptr p;
479 
480 	for (i = 0; i < MAXKEYS; i++) {
481 		h = hash(keywrds[i].knam);
482 		p = (ndptr) xalloc(sizeof(struct ndblock));
483 		p->nxtptr = hashtab[h];
484 		hashtab[h] = p;
485 		p->name = keywrds[i].knam;
486 		p->defn = null;
487 		p->type = keywrds[i].ktyp | STATIC;
488 	}
489 }
490