138823Sbostic /* 238823Sbostic * Copyright (c) 1989 The Regents of the University of California. 338823Sbostic * All rights reserved. 438823Sbostic * 538823Sbostic * This code is derived from software contributed to Berkeley by 638823Sbostic * Ozan Yigit. 738823Sbostic * 8*42689Sbostic * %sccs.include.redist.c% 938823Sbostic */ 1038823Sbostic 1138823Sbostic #ifndef lint 12*42689Sbostic static char sccsid[] = "@(#)main.c 5.4 (Berkeley) 06/01/90"; 1338823Sbostic #endif /* not lint */ 1438823Sbostic 1538823Sbostic /* 1638823Sbostic * main.c 1738823Sbostic * Facility: m4 macro processor 1838823Sbostic * by: oz 1938823Sbostic */ 2038823Sbostic 2138823Sbostic #include "mdef.h" 2238823Sbostic 2338823Sbostic /* 2438823Sbostic * m4 - macro processor 2538823Sbostic * 2638823Sbostic * PD m4 is based on the macro tool distributed with the software 2738823Sbostic * tools (VOS) package, and described in the "SOFTWARE TOOLS" and 2838823Sbostic * "SOFTWARE TOOLS IN PASCAL" books. It has been expanded to include 2938823Sbostic * most of the command set of SysV m4, the standard UN*X macro processor. 3038823Sbostic * 3138823Sbostic * Since both PD m4 and UN*X m4 are based on SOFTWARE TOOLS macro, 3238823Sbostic * there may be certain implementation similarities between 3338823Sbostic * the two. The PD m4 was produced without ANY references to m4 3438823Sbostic * sources. 3538823Sbostic * 3638823Sbostic * References: 3738823Sbostic * 3838823Sbostic * Software Tools distribution: macro 3938823Sbostic * 4038823Sbostic * Kernighan, Brian W. and P. J. Plauger, SOFTWARE 4138823Sbostic * TOOLS IN PASCAL, Addison-Wesley, Mass. 1981 4238823Sbostic * 4338823Sbostic * Kernighan, Brian W. and P. J. Plauger, SOFTWARE 4438823Sbostic * TOOLS, Addison-Wesley, Mass. 1976 4538823Sbostic * 4638823Sbostic * Kernighan, Brian W. and Dennis M. Ritchie, 4738823Sbostic * THE M4 MACRO PROCESSOR, Unix Programmer's Manual, 4838823Sbostic * Seventh Edition, Vol. 2, Bell Telephone Labs, 1979 4938823Sbostic * 5038823Sbostic * System V man page for M4 5138823Sbostic * 5238823Sbostic * Modification History: 5338823Sbostic * 5438823Sbostic * Jan 28 1986 Oz Break the whole thing into little 5538823Sbostic * pieces, for easier (?) maintenance. 5638823Sbostic * 5738823Sbostic * Dec 12 1985 Oz Optimize the code, try to squeeze 5838823Sbostic * few microseconds out.. 5938823Sbostic * 6038823Sbostic * Dec 05 1985 Oz Add getopt interface, define (-D), 6138823Sbostic * undefine (-U) options. 6238823Sbostic * 6338823Sbostic * Oct 21 1985 Oz Clean up various bugs, add comment handling. 6438823Sbostic * 6538823Sbostic * June 7 1985 Oz Add some of SysV m4 stuff (m4wrap, pushdef, 6638823Sbostic * popdef, decr, shift etc.). 6738823Sbostic * 6838823Sbostic * June 5 1985 Oz Initial cut. 6938823Sbostic * 7038823Sbostic * Implementation Notes: 7138823Sbostic * 7238823Sbostic * [1] PD m4 uses a different (and simpler) stack mechanism than the one 7338823Sbostic * described in Software Tools and Software Tools in Pascal books. 7438823Sbostic * The triple stack nonsense is replaced with a single stack containing 7538823Sbostic * the call frames and the arguments. Each frame is back-linked to a 7638823Sbostic * previous stack frame, which enables us to rewind the stack after 7738823Sbostic * each nested call is completed. Each argument is a character pointer 7838823Sbostic * to the beginning of the argument string within the string space. 7938823Sbostic * The only exceptions to this are (*) arg 0 and arg 1, which are 8038823Sbostic * the macro definition and macro name strings, stored dynamically 8138823Sbostic * for the hash table. 8238823Sbostic * 8338823Sbostic * . . 8438823Sbostic * | . | <-- sp | . | 8538823Sbostic * +-------+ +-----+ 8638823Sbostic * | arg 3 ------------------------------->| str | 8738823Sbostic * +-------+ | . | 8838823Sbostic * | arg 2 --------------+ . 8938823Sbostic * +-------+ | 9038823Sbostic * * | | | 9138823Sbostic * +-------+ | +-----+ 9238823Sbostic * | plev | <-- fp +---------------->| str | 9338823Sbostic * +-------+ | . | 9438823Sbostic * | type | . 9538823Sbostic * +-------+ 9638823Sbostic * | prcf -----------+ plev: paren level 9738823Sbostic * +-------+ | type: call type 9838823Sbostic * | . | | prcf: prev. call frame 9938823Sbostic * . | 10038823Sbostic * +-------+ | 10138823Sbostic * | <----------+ 10238823Sbostic * +-------+ 10338823Sbostic * 10438823Sbostic * [2] We have three types of null values: 10538823Sbostic * 10638823Sbostic * nil - nodeblock pointer type 0 10738823Sbostic * null - null string ("") 10838823Sbostic * NULL - Stdio-defined NULL 10938823Sbostic * 11038823Sbostic */ 11138823Sbostic 11238823Sbostic ndptr hashtab[HASHSIZE]; /* hash table for macros etc. */ 11338823Sbostic char buf[BUFSIZE]; /* push-back buffer */ 11438823Sbostic char *bp = buf; /* first available character */ 11538823Sbostic char *endpbb = buf+BUFSIZE; /* end of push-back buffer */ 11638823Sbostic stae mstack[STACKMAX+1]; /* stack of m4 machine */ 11738823Sbostic char strspace[STRSPMAX+1]; /* string space for evaluation */ 11838823Sbostic char *ep = strspace; /* first free char in strspace */ 11938823Sbostic char *endest= strspace+STRSPMAX;/* end of string space */ 12038823Sbostic int sp; /* current m4 stack pointer */ 12138823Sbostic int fp; /* m4 call frame pointer */ 12238823Sbostic FILE *infile[MAXINP]; /* input file stack (0=stdin) */ 12338823Sbostic FILE *outfile[MAXOUT]; /* diversion array(0=bitbucket)*/ 12438823Sbostic FILE *active; /* active output file pointer */ 12538823Sbostic char *m4temp; /* filename for diversions */ 12638823Sbostic int ilevel = 0; /* input file stack pointer */ 12738823Sbostic int oindex = 0; /* diversion index.. */ 12838823Sbostic char *null = ""; /* as it says.. just a null.. */ 12938823Sbostic char *m4wraps = ""; /* m4wrap string default.. */ 13038823Sbostic char lquote = LQUOTE; /* left quote character (`) */ 13138823Sbostic char rquote = RQUOTE; /* right quote character (') */ 13238823Sbostic char scommt = SCOMMT; /* start character for comment */ 13338823Sbostic char ecommt = ECOMMT; /* end character for comment */ 13438823Sbostic struct keyblk keywrds[] = { /* m4 keywords to be installed */ 13538823Sbostic "include", INCLTYPE, 13638823Sbostic "sinclude", SINCTYPE, 13738823Sbostic "define", DEFITYPE, 13838823Sbostic "defn", DEFNTYPE, 13938823Sbostic "divert", DIVRTYPE, 14038823Sbostic "expr", EXPRTYPE, 14138823Sbostic "eval", EXPRTYPE, 14238823Sbostic "substr", SUBSTYPE, 14338823Sbostic "ifelse", IFELTYPE, 14438823Sbostic "ifdef", IFDFTYPE, 14538823Sbostic "len", LENGTYPE, 14638823Sbostic "incr", INCRTYPE, 14738823Sbostic "decr", DECRTYPE, 14838823Sbostic "dnl", DNLNTYPE, 14938823Sbostic "changequote", CHNQTYPE, 15038823Sbostic "changecom", CHNCTYPE, 15138823Sbostic "index", INDXTYPE, 15238823Sbostic #ifdef EXTENDED 15338823Sbostic "paste", PASTTYPE, 15438823Sbostic "spaste", SPASTYPE, 15538823Sbostic #endif 15638823Sbostic "popdef", POPDTYPE, 15738823Sbostic "pushdef", PUSDTYPE, 15838823Sbostic "dumpdef", DUMPTYPE, 15938823Sbostic "shift", SHIFTYPE, 16038823Sbostic "translit", TRNLTYPE, 16138823Sbostic "undefine", UNDFTYPE, 16238823Sbostic "undivert", UNDVTYPE, 16338823Sbostic "divnum", DIVNTYPE, 16438823Sbostic "maketemp", MKTMTYPE, 16538823Sbostic "errprint", ERRPTYPE, 16638823Sbostic "m4wrap", M4WRTYPE, 16738823Sbostic "m4exit", EXITTYPE, 16838823Sbostic "syscmd", SYSCTYPE, 16938823Sbostic "sysval", SYSVTYPE, 17038823Sbostic "unix", MACRTYPE, 17138823Sbostic }; 17238823Sbostic 17338823Sbostic #define MAXKEYS (sizeof(keywrds)/sizeof(struct keyblk)) 17438823Sbostic 17538823Sbostic extern ndptr lookup(); 17638823Sbostic extern ndptr addent(); 17738823Sbostic extern int onintr(); 17838823Sbostic 17938823Sbostic extern char *malloc(); 18038823Sbostic extern char *mktemp(); 18138823Sbostic 18238823Sbostic extern int optind; 18338823Sbostic extern char *optarg; 18438823Sbostic 18538823Sbostic main(argc,argv) 18638823Sbostic char *argv[]; 18738823Sbostic { 18838823Sbostic register int c; 18938823Sbostic register int n; 19038823Sbostic char *p; 19138823Sbostic 19238823Sbostic if (signal(SIGINT, SIG_IGN) != SIG_IGN) 19338823Sbostic signal(SIGINT, onintr); 19438823Sbostic #ifdef NONZEROPAGES 19538823Sbostic initm4(); 19638823Sbostic #endif 19738823Sbostic initkwds(); 19838823Sbostic 19938823Sbostic while ((c = getopt(argc, argv, "tD:U:o:")) != EOF) 20038823Sbostic switch(c) { 20138823Sbostic 20238823Sbostic case 'D': /* define something..*/ 20338823Sbostic for (p = optarg; *p; p++) 20438823Sbostic if (*p == '=') 20538823Sbostic break; 20638823Sbostic if (*p) 20738823Sbostic *p++ = EOS; 20838823Sbostic dodefine(optarg, p); 20938823Sbostic break; 21038823Sbostic case 'U': /* undefine... */ 21138823Sbostic remhash(optarg, TOP); 21238823Sbostic break; 21338823Sbostic case 'o': /* specific output */ 21438823Sbostic case '?': 21538823Sbostic default: 21638823Sbostic usage(); 21738823Sbostic } 21838823Sbostic 21938823Sbostic infile[0] = stdin; /* default input (naturally) */ 22038823Sbostic active = stdout; /* default active output */ 22138823Sbostic m4temp = mktemp(DIVNAM); /* filename for diversions */ 22238823Sbostic 22338823Sbostic sp = -1; /* stack pointer initialized */ 22438823Sbostic fp = 0; /* frame pointer initialized */ 22538823Sbostic 22638823Sbostic macro(); /* get some work done here */ 22738823Sbostic 22838823Sbostic if (*m4wraps) { /* anything for rundown ?? */ 22938823Sbostic ilevel = 0; /* in case m4wrap includes.. */ 23038823Sbostic putback(EOF); /* eof is a must !! */ 23138823Sbostic pbstr(m4wraps); /* user-defined wrapup act */ 23238823Sbostic macro(); /* last will and testament */ 23338823Sbostic } 23438823Sbostic 23539102Sbostic if (active != stdout) 23639102Sbostic active = stdout; /* reset output just in case */ 23739102Sbostic for (n = 1; n < MAXOUT; n++) /* default wrap-up: undivert */ 23839102Sbostic if (outfile[n] != NULL) 23939102Sbostic getdiv(n); 24038823Sbostic /* remove bitbucket if used */ 24138823Sbostic if (outfile[0] != NULL) { 24238823Sbostic (void) fclose(outfile[0]); 24338823Sbostic m4temp[UNIQUE] = '0'; 24438823Sbostic (void) unlink(m4temp); 24538823Sbostic } 24638823Sbostic 24738823Sbostic exit(0); 24838823Sbostic } 24938823Sbostic 25038823Sbostic ndptr inspect(); /* forward ... */ 25138823Sbostic 25238823Sbostic /* 25338823Sbostic * macro - the work horse.. 25438823Sbostic * 25538823Sbostic */ 25638823Sbostic macro() { 25738823Sbostic char token[MAXTOK]; 25838823Sbostic register char *s; 25938823Sbostic register int t, l; 26038823Sbostic register ndptr p; 26138823Sbostic register int nlpar; 26238823Sbostic 26338823Sbostic cycle { 26438823Sbostic if ((t = gpbc()) == '_' || isalpha(t)) { 26538823Sbostic putback(t); 26638823Sbostic if ((p = inspect(s = token)) == nil) { 26738823Sbostic if (sp < 0) 26838823Sbostic while (*s) 26938823Sbostic putc(*s++, active); 27038823Sbostic else 27138823Sbostic while (*s) 27238823Sbostic chrsave(*s++); 27338823Sbostic } 27438823Sbostic else { 27538823Sbostic /* 27638823Sbostic * real thing.. First build a call frame: 27738823Sbostic * 27838823Sbostic */ 27938823Sbostic pushf(fp); /* previous call frm */ 28038823Sbostic pushf(p->type); /* type of the call */ 28138823Sbostic pushf(0); /* parenthesis level */ 28238823Sbostic fp = sp; /* new frame pointer */ 28338823Sbostic /* 28438823Sbostic * now push the string arguments: 28538823Sbostic * 28638823Sbostic */ 28738823Sbostic pushs(p->defn); /* defn string */ 28838823Sbostic pushs(p->name); /* macro name */ 28938823Sbostic pushs(ep); /* start next..*/ 29038823Sbostic 29138823Sbostic putback(l = gpbc()); 29238823Sbostic if (l != LPAREN) { /* add bracks */ 29338823Sbostic putback(RPAREN); 29438823Sbostic putback(LPAREN); 29538823Sbostic } 29638823Sbostic } 29738823Sbostic } 29838823Sbostic else if (t == EOF) { 29938823Sbostic if (sp > -1) 30038823Sbostic error("m4: unexpected end of input"); 30138823Sbostic if (--ilevel < 0) 30238823Sbostic break; /* all done thanks.. */ 30338823Sbostic (void) fclose(infile[ilevel+1]); 30438823Sbostic continue; 30538823Sbostic } 30638823Sbostic /* 30738823Sbostic * non-alpha single-char token seen.. 30838823Sbostic * [the order of else if .. stmts is 30938823Sbostic * important.] 31038823Sbostic * 31138823Sbostic */ 31238823Sbostic else if (t == lquote) { /* strip quotes */ 31338823Sbostic nlpar = 1; 31438823Sbostic do { 31538823Sbostic if ((l = gpbc()) == rquote) 31638823Sbostic nlpar--; 31738823Sbostic else if (l == lquote) 31838823Sbostic nlpar++; 31938823Sbostic else if (l == EOF) 32038823Sbostic error("m4: missing right quote"); 32138823Sbostic if (nlpar > 0) { 32238823Sbostic if (sp < 0) 32338823Sbostic putc(l, active); 32438823Sbostic else 32538823Sbostic chrsave(l); 32638823Sbostic } 32738823Sbostic } 32838823Sbostic while (nlpar != 0); 32938823Sbostic } 33038823Sbostic 33138823Sbostic else if (sp < 0) { /* not in a macro at all */ 33238823Sbostic if (t == scommt) { /* comment handling here */ 33338823Sbostic putc(t, active); 33438823Sbostic while ((t = gpbc()) != ecommt) 33538823Sbostic putc(t, active); 33638823Sbostic } 33738823Sbostic putc(t, active); /* output directly.. */ 33838823Sbostic } 33938823Sbostic 34038823Sbostic else switch(t) { 34138823Sbostic 34238823Sbostic case LPAREN: 34338823Sbostic if (PARLEV > 0) 34438823Sbostic chrsave(t); 34538823Sbostic while (isspace(l = gpbc())) 34638823Sbostic ; /* skip blank, tab, nl.. */ 34738823Sbostic putback(l); 34838823Sbostic PARLEV++; 34938823Sbostic break; 35038823Sbostic 35138823Sbostic case RPAREN: 35238823Sbostic if (--PARLEV > 0) 35338823Sbostic chrsave(t); 35438823Sbostic else { /* end of argument list */ 35538823Sbostic chrsave(EOS); 35638823Sbostic 35738823Sbostic if (sp == STACKMAX) 35838823Sbostic error("m4: internal stack overflow"); 35938823Sbostic 36038823Sbostic if (CALTYP == MACRTYPE) 36138823Sbostic expand(mstack+fp+1, sp-fp); 36238823Sbostic else 36338823Sbostic eval(mstack+fp+1, sp-fp, CALTYP); 36438823Sbostic 36538823Sbostic ep = PREVEP; /* flush strspace */ 36638823Sbostic sp = PREVSP; /* previous sp.. */ 36738823Sbostic fp = PREVFP; /* rewind stack...*/ 36838823Sbostic } 36938823Sbostic break; 37038823Sbostic 37138823Sbostic case COMMA: 37238823Sbostic if (PARLEV == 1) { 37338823Sbostic chrsave(EOS); /* new argument */ 37438823Sbostic while (isspace(l = gpbc())) 37538823Sbostic ; 37638823Sbostic putback(l); 37738823Sbostic pushs(ep); 37838823Sbostic } 37938823Sbostic break; 38038823Sbostic default: 38138823Sbostic chrsave(t); /* stack the char */ 38238823Sbostic break; 38338823Sbostic } 38438823Sbostic } 38538823Sbostic } 38638823Sbostic 38738823Sbostic 38838823Sbostic /* 38938823Sbostic * build an input token.. 39038823Sbostic * consider only those starting with _ or A-Za-z. This is a 39138823Sbostic * combo with lookup to speed things up. 39238823Sbostic */ 39338823Sbostic ndptr 39438823Sbostic inspect(tp) 39538823Sbostic register char *tp; 39638823Sbostic { 39738823Sbostic register int h = 0; 39838823Sbostic register char c; 39938823Sbostic register char *name = tp; 40038823Sbostic register char *etp = tp+MAXTOK; 40138823Sbostic register ndptr p; 40238823Sbostic 40338823Sbostic while (tp < etp && (isalnum(c = gpbc()) || c == '_')) 40438823Sbostic h += (*tp++ = c); 40538823Sbostic putback(c); 40638823Sbostic if (tp == etp) 40738823Sbostic error("m4: token too long"); 40838823Sbostic *tp = EOS; 40938823Sbostic for (p = hashtab[h%HASHSIZE]; p != nil; p = p->nxtptr) 41038823Sbostic if (strcmp(name, p->name) == 0) 41138823Sbostic break; 41238823Sbostic return(p); 41338823Sbostic } 41438823Sbostic 41538823Sbostic #ifdef NONZEROPAGES 41638823Sbostic /* 41738823Sbostic * initm4 - initialize various tables. Useful only if your system 41838823Sbostic * does not know anything about demand-zero pages. 41938823Sbostic * 42038823Sbostic */ 42138823Sbostic initm4() 42238823Sbostic { 42338823Sbostic register int i; 42438823Sbostic 42538823Sbostic for (i = 0; i < HASHSIZE; i++) 42638823Sbostic hashtab[i] = nil; 42738823Sbostic for (i = 0; i < MAXOUT; i++) 42838823Sbostic outfile[i] = NULL; 42938823Sbostic } 43038823Sbostic #endif 43138823Sbostic 43238823Sbostic /* 43338823Sbostic * initkwds - initialise m4 keywords as fast as possible. 43438823Sbostic * This very similar to install, but without certain overheads, 43538823Sbostic * such as calling lookup. Malloc is not used for storing the 43638823Sbostic * keyword strings, since we simply use the static pointers 43738823Sbostic * within keywrds block. We also assume that there is enough memory 43838823Sbostic * to at least install the keywords (i.e. malloc won't fail). 43938823Sbostic * 44038823Sbostic */ 44138823Sbostic initkwds() { 44238823Sbostic register int i; 44338823Sbostic register int h; 44438823Sbostic register ndptr p; 44538823Sbostic 44638823Sbostic for (i = 0; i < MAXKEYS; i++) { 44738823Sbostic h = hash(keywrds[i].knam); 44838823Sbostic p = (ndptr) malloc(sizeof(struct ndblock)); 44938823Sbostic p->nxtptr = hashtab[h]; 45038823Sbostic hashtab[h] = p; 45138823Sbostic p->name = keywrds[i].knam; 45238823Sbostic p->defn = null; 45338823Sbostic p->type = keywrds[i].ktyp | STATIC; 45438823Sbostic } 45538823Sbostic } 456