xref: /freebsd-src/contrib/one-true-awk/lex.c (revision 8d457988a72487b35ee3922671775d73169339e3)
12a55deb1SDavid E. O'Brien /****************************************************************
22a55deb1SDavid E. O'Brien Copyright (C) Lucent Technologies 1997
32a55deb1SDavid E. O'Brien All Rights Reserved
42a55deb1SDavid E. O'Brien 
52a55deb1SDavid E. O'Brien Permission to use, copy, modify, and distribute this software and
62a55deb1SDavid E. O'Brien its documentation for any purpose and without fee is hereby
72a55deb1SDavid E. O'Brien granted, provided that the above copyright notice appear in all
82a55deb1SDavid E. O'Brien copies and that both that the copyright notice and this
92a55deb1SDavid E. O'Brien permission notice and warranty disclaimer appear in supporting
102a55deb1SDavid E. O'Brien documentation, and that the name Lucent Technologies or any of
112a55deb1SDavid E. O'Brien its entities not be used in advertising or publicity pertaining
122a55deb1SDavid E. O'Brien to distribution of the software without specific, written prior
132a55deb1SDavid E. O'Brien permission.
142a55deb1SDavid E. O'Brien 
152a55deb1SDavid E. O'Brien LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
162a55deb1SDavid E. O'Brien INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
172a55deb1SDavid E. O'Brien IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
182a55deb1SDavid E. O'Brien SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
192a55deb1SDavid E. O'Brien WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
202a55deb1SDavid E. O'Brien IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
212a55deb1SDavid E. O'Brien ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
222a55deb1SDavid E. O'Brien THIS SOFTWARE.
232a55deb1SDavid E. O'Brien ****************************************************************/
242a55deb1SDavid E. O'Brien 
252a55deb1SDavid E. O'Brien #include <stdio.h>
262a55deb1SDavid E. O'Brien #include <stdlib.h>
272a55deb1SDavid E. O'Brien #include <string.h>
282a55deb1SDavid E. O'Brien #include <ctype.h>
292a55deb1SDavid E. O'Brien #include "awk.h"
30f39dd6a9SWarner Losh #include "awkgram.tab.h"
312a55deb1SDavid E. O'Brien 
322a55deb1SDavid E. O'Brien extern YYSTYPE	yylval;
33f39dd6a9SWarner Losh extern bool	infunc;
342a55deb1SDavid E. O'Brien 
352a55deb1SDavid E. O'Brien int	lineno	= 1;
362a55deb1SDavid E. O'Brien int	bracecnt = 0;
372a55deb1SDavid E. O'Brien int	brackcnt  = 0;
382a55deb1SDavid E. O'Brien int	parencnt = 0;
392a55deb1SDavid E. O'Brien 
402a55deb1SDavid E. O'Brien typedef struct Keyword {
41813da98dSDavid E. O'Brien 	const char *word;
422a55deb1SDavid E. O'Brien 	int	sub;
432a55deb1SDavid E. O'Brien 	int	type;
442a55deb1SDavid E. O'Brien } Keyword;
452a55deb1SDavid E. O'Brien 
46f39dd6a9SWarner Losh const Keyword keywords[] = {	/* keep sorted: binary searched */
472a55deb1SDavid E. O'Brien 	{ "BEGIN",	XBEGIN,		XBEGIN },
482a55deb1SDavid E. O'Brien 	{ "END",	XEND,		XEND },
492a55deb1SDavid E. O'Brien 	{ "NF",		VARNF,		VARNF },
50eb690a05SWarner Losh 	{ "and",	FAND,		BLTIN },
512a55deb1SDavid E. O'Brien 	{ "atan2",	FATAN,		BLTIN },
522a55deb1SDavid E. O'Brien 	{ "break",	BREAK,		BREAK },
532a55deb1SDavid E. O'Brien 	{ "close",	CLOSE,		CLOSE },
54eb690a05SWarner Losh 	{ "compl",	FCOMPL,		BLTIN },
552a55deb1SDavid E. O'Brien 	{ "continue",	CONTINUE,	CONTINUE },
562a55deb1SDavid E. O'Brien 	{ "cos",	FCOS,		BLTIN },
572a55deb1SDavid E. O'Brien 	{ "delete",	DELETE,		DELETE },
582a55deb1SDavid E. O'Brien 	{ "do",		DO,		DO },
592a55deb1SDavid E. O'Brien 	{ "else",	ELSE,		ELSE },
602a55deb1SDavid E. O'Brien 	{ "exit",	EXIT,		EXIT },
612a55deb1SDavid E. O'Brien 	{ "exp",	FEXP,		BLTIN },
622a55deb1SDavid E. O'Brien 	{ "fflush",	FFLUSH,		BLTIN },
632a55deb1SDavid E. O'Brien 	{ "for",	FOR,		FOR },
642a55deb1SDavid E. O'Brien 	{ "func",	FUNC,		FUNC },
652a55deb1SDavid E. O'Brien 	{ "function",	FUNC,		FUNC },
66eb690a05SWarner Losh 	{ "gensub",	GENSUB,		GENSUB },
672a55deb1SDavid E. O'Brien 	{ "getline",	GETLINE,	GETLINE },
682a55deb1SDavid E. O'Brien 	{ "gsub",	GSUB,		GSUB },
692a55deb1SDavid E. O'Brien 	{ "if",		IF,		IF },
702a55deb1SDavid E. O'Brien 	{ "in",		IN,		IN },
712a55deb1SDavid E. O'Brien 	{ "index",	INDEX,		INDEX },
722a55deb1SDavid E. O'Brien 	{ "int",	FINT,		BLTIN },
732a55deb1SDavid E. O'Brien 	{ "length",	FLENGTH,	BLTIN },
742a55deb1SDavid E. O'Brien 	{ "log",	FLOG,		BLTIN },
75eb690a05SWarner Losh 	{ "lshift",	FLSHIFT,	BLTIN },
762a55deb1SDavid E. O'Brien 	{ "match",	MATCHFCN,	MATCHFCN },
77*8d457988SWarner Losh 	{ "mktime",	FMKTIME,	BLTIN },
782a55deb1SDavid E. O'Brien 	{ "next",	NEXT,		NEXT },
792a55deb1SDavid E. O'Brien 	{ "nextfile",	NEXTFILE,	NEXTFILE },
80eb690a05SWarner Losh 	{ "or",		FFOR,		BLTIN },
812a55deb1SDavid E. O'Brien 	{ "print",	PRINT,		PRINT },
822a55deb1SDavid E. O'Brien 	{ "printf",	PRINTF,		PRINTF },
832a55deb1SDavid E. O'Brien 	{ "rand",	FRAND,		BLTIN },
842a55deb1SDavid E. O'Brien 	{ "return",	RETURN,		RETURN },
85eb690a05SWarner Losh 	{ "rshift",	FRSHIFT,	BLTIN },
862a55deb1SDavid E. O'Brien 	{ "sin",	FSIN,		BLTIN },
872a55deb1SDavid E. O'Brien 	{ "split",	SPLIT,		SPLIT },
882a55deb1SDavid E. O'Brien 	{ "sprintf",	SPRINTF,	SPRINTF },
892a55deb1SDavid E. O'Brien 	{ "sqrt",	FSQRT,		BLTIN },
902a55deb1SDavid E. O'Brien 	{ "srand",	FSRAND,		BLTIN },
91eb690a05SWarner Losh 	{ "strftime",	FSTRFTIME,	BLTIN },
922a55deb1SDavid E. O'Brien 	{ "sub",	SUB,		SUB },
932a55deb1SDavid E. O'Brien 	{ "substr",	SUBSTR,		SUBSTR },
942a55deb1SDavid E. O'Brien 	{ "system",	FSYSTEM,	BLTIN },
95eb690a05SWarner Losh 	{ "systime",	FSYSTIME,	BLTIN },
962a55deb1SDavid E. O'Brien 	{ "tolower",	FTOLOWER,	BLTIN },
972a55deb1SDavid E. O'Brien 	{ "toupper",	FTOUPPER,	BLTIN },
982a55deb1SDavid E. O'Brien 	{ "while",	WHILE,		WHILE },
99eb690a05SWarner Losh 	{ "xor",	FXOR,		BLTIN },
1002a55deb1SDavid E. O'Brien };
1012a55deb1SDavid E. O'Brien 
1022a55deb1SDavid E. O'Brien #define	RET(x)	{ if(dbg)printf("lex %s\n", tokname(x)); return(x); }
1032a55deb1SDavid E. O'Brien 
104f39dd6a9SWarner Losh static int peek(void)
1052a55deb1SDavid E. O'Brien {
1062a55deb1SDavid E. O'Brien 	int c = input();
1072a55deb1SDavid E. O'Brien 	unput(c);
1082a55deb1SDavid E. O'Brien 	return c;
1092a55deb1SDavid E. O'Brien }
1102a55deb1SDavid E. O'Brien 
111f39dd6a9SWarner Losh static int gettok(char **pbuf, int *psz)	/* get next input token */
1122a55deb1SDavid E. O'Brien {
113007c6572SDag-Erling Smørgrav 	int c, retc;
1142a55deb1SDavid E. O'Brien 	char *buf = *pbuf;
1152a55deb1SDavid E. O'Brien 	int sz = *psz;
1162a55deb1SDavid E. O'Brien 	char *bp = buf;
1172a55deb1SDavid E. O'Brien 
1182a55deb1SDavid E. O'Brien 	c = input();
1192a55deb1SDavid E. O'Brien 	if (c == 0)
1202a55deb1SDavid E. O'Brien 		return 0;
1212a55deb1SDavid E. O'Brien 	buf[0] = c;
1222a55deb1SDavid E. O'Brien 	buf[1] = 0;
1232a55deb1SDavid E. O'Brien 	if (!isalnum(c) && c != '.' && c != '_')
1242a55deb1SDavid E. O'Brien 		return c;
1252a55deb1SDavid E. O'Brien 
1262a55deb1SDavid E. O'Brien 	*bp++ = c;
1272a55deb1SDavid E. O'Brien 	if (isalpha(c) || c == '_') {	/* it's a varname */
1282a55deb1SDavid E. O'Brien 		for ( ; (c = input()) != 0; ) {
1292a55deb1SDavid E. O'Brien 			if (bp-buf >= sz)
130addad6afSRong-En Fan 				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
1312a55deb1SDavid E. O'Brien 					FATAL( "out of space for name %.10s...", buf );
1322a55deb1SDavid E. O'Brien 			if (isalnum(c) || c == '_')
1332a55deb1SDavid E. O'Brien 				*bp++ = c;
1342a55deb1SDavid E. O'Brien 			else {
1352a55deb1SDavid E. O'Brien 				*bp = 0;
1362a55deb1SDavid E. O'Brien 				unput(c);
1372a55deb1SDavid E. O'Brien 				break;
1382a55deb1SDavid E. O'Brien 			}
1392a55deb1SDavid E. O'Brien 		}
1402a55deb1SDavid E. O'Brien 		*bp = 0;
141007c6572SDag-Erling Smørgrav 		retc = 'a';	/* alphanumeric */
142c263f9bfSRuslan Ermilov 	} else {	/* maybe it's a number, but could be . */
1432a55deb1SDavid E. O'Brien 		char *rem;
1442a55deb1SDavid E. O'Brien 		/* read input until can't be a number */
1452a55deb1SDavid E. O'Brien 		for ( ; (c = input()) != 0; ) {
1462a55deb1SDavid E. O'Brien 			if (bp-buf >= sz)
147addad6afSRong-En Fan 				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
1482a55deb1SDavid E. O'Brien 					FATAL( "out of space for number %.10s...", buf );
1492a55deb1SDavid E. O'Brien 			if (isdigit(c) || c == 'e' || c == 'E'
1502a55deb1SDavid E. O'Brien 			  || c == '.' || c == '+' || c == '-')
1512a55deb1SDavid E. O'Brien 				*bp++ = c;
1522a55deb1SDavid E. O'Brien 			else {
1532a55deb1SDavid E. O'Brien 				unput(c);
1542a55deb1SDavid E. O'Brien 				break;
1552a55deb1SDavid E. O'Brien 			}
1562a55deb1SDavid E. O'Brien 		}
1572a55deb1SDavid E. O'Brien 		*bp = 0;
1582a55deb1SDavid E. O'Brien 		strtod(buf, &rem);	/* parse the number */
159007c6572SDag-Erling Smørgrav 		if (rem == buf) {	/* it wasn't a valid number at all */
160c263f9bfSRuslan Ermilov 			buf[1] = 0;	/* return one character as token */
161f39dd6a9SWarner Losh 			retc = (uschar)buf[0];	/* character is its own type */
162c263f9bfSRuslan Ermilov 			unputstr(rem+1); /* put rest back for later */
163007c6572SDag-Erling Smørgrav 		} else {	/* some prefix was a number */
164c263f9bfSRuslan Ermilov 			unputstr(rem);	/* put rest back for later */
165c263f9bfSRuslan Ermilov 			rem[0] = 0;	/* truncate buf after number part */
166c263f9bfSRuslan Ermilov 			retc = '0';	/* type is number */
167007c6572SDag-Erling Smørgrav 		}
1682a55deb1SDavid E. O'Brien 	}
1692a55deb1SDavid E. O'Brien 	*pbuf = buf;
1702a55deb1SDavid E. O'Brien 	*psz = sz;
171007c6572SDag-Erling Smørgrav 	return retc;
1722a55deb1SDavid E. O'Brien }
1732a55deb1SDavid E. O'Brien 
1742a55deb1SDavid E. O'Brien int	word(char *);
1752a55deb1SDavid E. O'Brien int	string(void);
1762a55deb1SDavid E. O'Brien int	regexpr(void);
177f39dd6a9SWarner Losh bool	sc	= false;	/* true => return a } right now */
178f39dd6a9SWarner Losh bool	reg	= false;	/* true => return a REGEXPR now */
1792a55deb1SDavid E. O'Brien 
1802a55deb1SDavid E. O'Brien int yylex(void)
1812a55deb1SDavid E. O'Brien {
1822a55deb1SDavid E. O'Brien 	int c;
18310ce5b99SWarner Losh 	static char *buf = NULL;
184addad6afSRong-En Fan 	static int bufsize = 5; /* BUG: setting this small causes core dump! */
1852a55deb1SDavid E. O'Brien 
18610ce5b99SWarner Losh 	if (buf == NULL && (buf = (char *) malloc(bufsize)) == NULL)
1872a55deb1SDavid E. O'Brien 		FATAL( "out of space in yylex" );
1882a55deb1SDavid E. O'Brien 	if (sc) {
189f39dd6a9SWarner Losh 		sc = false;
1902a55deb1SDavid E. O'Brien 		RET('}');
1912a55deb1SDavid E. O'Brien 	}
1922a55deb1SDavid E. O'Brien 	if (reg) {
193f39dd6a9SWarner Losh 		reg = false;
1942a55deb1SDavid E. O'Brien 		return regexpr();
1952a55deb1SDavid E. O'Brien 	}
1962a55deb1SDavid E. O'Brien 	for (;;) {
1972a55deb1SDavid E. O'Brien 		c = gettok(&buf, &bufsize);
1982a55deb1SDavid E. O'Brien 		if (c == 0)
1992a55deb1SDavid E. O'Brien 			return 0;
2002a55deb1SDavid E. O'Brien 		if (isalpha(c) || c == '_')
2012a55deb1SDavid E. O'Brien 			return word(buf);
202007c6572SDag-Erling Smørgrav 		if (isdigit(c)) {
203f39dd6a9SWarner Losh 			char *cp = tostring(buf);
204f39dd6a9SWarner Losh 			double result;
205f39dd6a9SWarner Losh 
206f39dd6a9SWarner Losh 			if (is_number(cp, & result))
207f39dd6a9SWarner Losh 				yylval.cp = setsymtab(buf, cp, result, CON|NUM, symtab);
208f39dd6a9SWarner Losh 			else
209f39dd6a9SWarner Losh 				yylval.cp = setsymtab(buf, cp, 0.0, STR, symtab);
210f39dd6a9SWarner Losh 			free(cp);
2112a55deb1SDavid E. O'Brien 			/* should this also have STR set? */
2122a55deb1SDavid E. O'Brien 			RET(NUMBER);
2132a55deb1SDavid E. O'Brien 		}
2142a55deb1SDavid E. O'Brien 
2152a55deb1SDavid E. O'Brien 		yylval.i = c;
2162a55deb1SDavid E. O'Brien 		switch (c) {
2172a55deb1SDavid E. O'Brien 		case '\n':	/* {EOL} */
218b5253557SWarner Losh 			lineno++;
2192a55deb1SDavid E. O'Brien 			RET(NL);
2202a55deb1SDavid E. O'Brien 		case '\r':	/* assume \n is coming */
2212a55deb1SDavid E. O'Brien 		case ' ':	/* {WS}+ */
2222a55deb1SDavid E. O'Brien 		case '\t':
2232a55deb1SDavid E. O'Brien 			break;
2242a55deb1SDavid E. O'Brien 		case '#':	/* #.* strip comments */
2252a55deb1SDavid E. O'Brien 			while ((c = input()) != '\n' && c != 0)
2262a55deb1SDavid E. O'Brien 				;
2272a55deb1SDavid E. O'Brien 			unput(c);
228f39dd6a9SWarner Losh 			/*
229f39dd6a9SWarner Losh 			 * Next line is a hack, it compensates for
230f39dd6a9SWarner Losh 			 * unput's treatment of \n.
231f39dd6a9SWarner Losh 			 */
232f39dd6a9SWarner Losh 			lineno++;
2332a55deb1SDavid E. O'Brien 			break;
2342a55deb1SDavid E. O'Brien 		case ';':
2352a55deb1SDavid E. O'Brien 			RET(';');
2362a55deb1SDavid E. O'Brien 		case '\\':
2372a55deb1SDavid E. O'Brien 			if (peek() == '\n') {
2382a55deb1SDavid E. O'Brien 				input();
239b5253557SWarner Losh 				lineno++;
2402a55deb1SDavid E. O'Brien 			} else if (peek() == '\r') {
2412a55deb1SDavid E. O'Brien 				input(); input();	/* \n */
2422a55deb1SDavid E. O'Brien 				lineno++;
2432a55deb1SDavid E. O'Brien 			} else {
2442a55deb1SDavid E. O'Brien 				RET(c);
2452a55deb1SDavid E. O'Brien 			}
2462a55deb1SDavid E. O'Brien 			break;
2472a55deb1SDavid E. O'Brien 		case '&':
2482a55deb1SDavid E. O'Brien 			if (peek() == '&') {
2492a55deb1SDavid E. O'Brien 				input(); RET(AND);
2502a55deb1SDavid E. O'Brien 			} else
2512a55deb1SDavid E. O'Brien 				RET('&');
2522a55deb1SDavid E. O'Brien 		case '|':
2532a55deb1SDavid E. O'Brien 			if (peek() == '|') {
2542a55deb1SDavid E. O'Brien 				input(); RET(BOR);
2552a55deb1SDavid E. O'Brien 			} else
2562a55deb1SDavid E. O'Brien 				RET('|');
2572a55deb1SDavid E. O'Brien 		case '!':
2582a55deb1SDavid E. O'Brien 			if (peek() == '=') {
2592a55deb1SDavid E. O'Brien 				input(); yylval.i = NE; RET(NE);
2602a55deb1SDavid E. O'Brien 			} else if (peek() == '~') {
2612a55deb1SDavid E. O'Brien 				input(); yylval.i = NOTMATCH; RET(MATCHOP);
2622a55deb1SDavid E. O'Brien 			} else
2632a55deb1SDavid E. O'Brien 				RET(NOT);
2642a55deb1SDavid E. O'Brien 		case '~':
2652a55deb1SDavid E. O'Brien 			yylval.i = MATCH;
2662a55deb1SDavid E. O'Brien 			RET(MATCHOP);
2672a55deb1SDavid E. O'Brien 		case '<':
2682a55deb1SDavid E. O'Brien 			if (peek() == '=') {
2692a55deb1SDavid E. O'Brien 				input(); yylval.i = LE; RET(LE);
2702a55deb1SDavid E. O'Brien 			} else {
2712a55deb1SDavid E. O'Brien 				yylval.i = LT; RET(LT);
2722a55deb1SDavid E. O'Brien 			}
2732a55deb1SDavid E. O'Brien 		case '=':
2742a55deb1SDavid E. O'Brien 			if (peek() == '=') {
2752a55deb1SDavid E. O'Brien 				input(); yylval.i = EQ; RET(EQ);
2762a55deb1SDavid E. O'Brien 			} else {
2772a55deb1SDavid E. O'Brien 				yylval.i = ASSIGN; RET(ASGNOP);
2782a55deb1SDavid E. O'Brien 			}
2792a55deb1SDavid E. O'Brien 		case '>':
2802a55deb1SDavid E. O'Brien 			if (peek() == '=') {
2812a55deb1SDavid E. O'Brien 				input(); yylval.i = GE; RET(GE);
2822a55deb1SDavid E. O'Brien 			} else if (peek() == '>') {
2832a55deb1SDavid E. O'Brien 				input(); yylval.i = APPEND; RET(APPEND);
2842a55deb1SDavid E. O'Brien 			} else {
2852a55deb1SDavid E. O'Brien 				yylval.i = GT; RET(GT);
2862a55deb1SDavid E. O'Brien 			}
2872a55deb1SDavid E. O'Brien 		case '+':
2882a55deb1SDavid E. O'Brien 			if (peek() == '+') {
2892a55deb1SDavid E. O'Brien 				input(); yylval.i = INCR; RET(INCR);
2902a55deb1SDavid E. O'Brien 			} else if (peek() == '=') {
2912a55deb1SDavid E. O'Brien 				input(); yylval.i = ADDEQ; RET(ASGNOP);
2922a55deb1SDavid E. O'Brien 			} else
2932a55deb1SDavid E. O'Brien 				RET('+');
2942a55deb1SDavid E. O'Brien 		case '-':
2952a55deb1SDavid E. O'Brien 			if (peek() == '-') {
2962a55deb1SDavid E. O'Brien 				input(); yylval.i = DECR; RET(DECR);
2972a55deb1SDavid E. O'Brien 			} else if (peek() == '=') {
2982a55deb1SDavid E. O'Brien 				input(); yylval.i = SUBEQ; RET(ASGNOP);
2992a55deb1SDavid E. O'Brien 			} else
3002a55deb1SDavid E. O'Brien 				RET('-');
3012a55deb1SDavid E. O'Brien 		case '*':
3022a55deb1SDavid E. O'Brien 			if (peek() == '=') {	/* *= */
3032a55deb1SDavid E. O'Brien 				input(); yylval.i = MULTEQ; RET(ASGNOP);
3042a55deb1SDavid E. O'Brien 			} else if (peek() == '*') {	/* ** or **= */
3052a55deb1SDavid E. O'Brien 				input();	/* eat 2nd * */
3062a55deb1SDavid E. O'Brien 				if (peek() == '=') {
3072a55deb1SDavid E. O'Brien 					input(); yylval.i = POWEQ; RET(ASGNOP);
3082a55deb1SDavid E. O'Brien 				} else {
3092a55deb1SDavid E. O'Brien 					RET(POWER);
3102a55deb1SDavid E. O'Brien 				}
3112a55deb1SDavid E. O'Brien 			} else
3122a55deb1SDavid E. O'Brien 				RET('*');
3132a55deb1SDavid E. O'Brien 		case '/':
3142a55deb1SDavid E. O'Brien 			RET('/');
3152a55deb1SDavid E. O'Brien 		case '%':
3162a55deb1SDavid E. O'Brien 			if (peek() == '=') {
3172a55deb1SDavid E. O'Brien 				input(); yylval.i = MODEQ; RET(ASGNOP);
3182a55deb1SDavid E. O'Brien 			} else
3192a55deb1SDavid E. O'Brien 				RET('%');
3202a55deb1SDavid E. O'Brien 		case '^':
3212a55deb1SDavid E. O'Brien 			if (peek() == '=') {
3222a55deb1SDavid E. O'Brien 				input(); yylval.i = POWEQ; RET(ASGNOP);
3232a55deb1SDavid E. O'Brien 			} else
3242a55deb1SDavid E. O'Brien 				RET(POWER);
3252a55deb1SDavid E. O'Brien 
3262a55deb1SDavid E. O'Brien 		case '$':
3272a55deb1SDavid E. O'Brien 			/* BUG: awkward, if not wrong */
3282a55deb1SDavid E. O'Brien 			c = gettok(&buf, &bufsize);
3292a55deb1SDavid E. O'Brien 			if (isalpha(c)) {
3302a55deb1SDavid E. O'Brien 				if (strcmp(buf, "NF") == 0) {	/* very special */
3312a55deb1SDavid E. O'Brien 					unputstr("(NF)");
3322a55deb1SDavid E. O'Brien 					RET(INDIRECT);
3332a55deb1SDavid E. O'Brien 				}
3342a55deb1SDavid E. O'Brien 				c = peek();
3352a55deb1SDavid E. O'Brien 				if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
3362a55deb1SDavid E. O'Brien 					unputstr(buf);
3372a55deb1SDavid E. O'Brien 					RET(INDIRECT);
3382a55deb1SDavid E. O'Brien 				}
3392a55deb1SDavid E. O'Brien 				yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
3402a55deb1SDavid E. O'Brien 				RET(IVAR);
341007c6572SDag-Erling Smørgrav 			} else if (c == 0) {	/*  */
342007c6572SDag-Erling Smørgrav 				SYNTAX( "unexpected end of input after $" );
343007c6572SDag-Erling Smørgrav 				RET(';');
3442a55deb1SDavid E. O'Brien 			} else {
3452a55deb1SDavid E. O'Brien 				unputstr(buf);
3462a55deb1SDavid E. O'Brien 				RET(INDIRECT);
3472a55deb1SDavid E. O'Brien 			}
3482a55deb1SDavid E. O'Brien 
3492a55deb1SDavid E. O'Brien 		case '}':
3502a55deb1SDavid E. O'Brien 			if (--bracecnt < 0)
3512a55deb1SDavid E. O'Brien 				SYNTAX( "extra }" );
352f39dd6a9SWarner Losh 			sc = true;
3532a55deb1SDavid E. O'Brien 			RET(';');
3542a55deb1SDavid E. O'Brien 		case ']':
3552a55deb1SDavid E. O'Brien 			if (--brackcnt < 0)
3562a55deb1SDavid E. O'Brien 				SYNTAX( "extra ]" );
3572a55deb1SDavid E. O'Brien 			RET(']');
3582a55deb1SDavid E. O'Brien 		case ')':
3592a55deb1SDavid E. O'Brien 			if (--parencnt < 0)
3602a55deb1SDavid E. O'Brien 				SYNTAX( "extra )" );
3612a55deb1SDavid E. O'Brien 			RET(')');
3622a55deb1SDavid E. O'Brien 		case '{':
3632a55deb1SDavid E. O'Brien 			bracecnt++;
3642a55deb1SDavid E. O'Brien 			RET('{');
3652a55deb1SDavid E. O'Brien 		case '[':
3662a55deb1SDavid E. O'Brien 			brackcnt++;
3672a55deb1SDavid E. O'Brien 			RET('[');
3682a55deb1SDavid E. O'Brien 		case '(':
3692a55deb1SDavid E. O'Brien 			parencnt++;
3702a55deb1SDavid E. O'Brien 			RET('(');
3712a55deb1SDavid E. O'Brien 
3722a55deb1SDavid E. O'Brien 		case '"':
3732a55deb1SDavid E. O'Brien 			return string();	/* BUG: should be like tran.c ? */
3742a55deb1SDavid E. O'Brien 
3752a55deb1SDavid E. O'Brien 		default:
3762a55deb1SDavid E. O'Brien 			RET(c);
3772a55deb1SDavid E. O'Brien 		}
3782a55deb1SDavid E. O'Brien 	}
3792a55deb1SDavid E. O'Brien }
3802a55deb1SDavid E. O'Brien 
381f32a6403SWarner Losh extern int runetochar(char *str, int c);
382f32a6403SWarner Losh 
3832a55deb1SDavid E. O'Brien int string(void)
3842a55deb1SDavid E. O'Brien {
3852a55deb1SDavid E. O'Brien 	int c, n;
3862a55deb1SDavid E. O'Brien 	char *s, *bp;
38710ce5b99SWarner Losh 	static char *buf = NULL;
3882a55deb1SDavid E. O'Brien 	static int bufsz = 500;
3892a55deb1SDavid E. O'Brien 
39010ce5b99SWarner Losh 	if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
3912a55deb1SDavid E. O'Brien 		FATAL("out of space for strings");
3922a55deb1SDavid E. O'Brien 	for (bp = buf; (c = input()) != '"'; ) {
393addad6afSRong-En Fan 		if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
3942a55deb1SDavid E. O'Brien 			FATAL("out of space for string %.10s...", buf);
3952a55deb1SDavid E. O'Brien 		switch (c) {
3962a55deb1SDavid E. O'Brien 		case '\n':
3972a55deb1SDavid E. O'Brien 		case '\r':
3982a55deb1SDavid E. O'Brien 		case 0:
399b5253557SWarner Losh 			*bp = '\0';
4002a55deb1SDavid E. O'Brien 			SYNTAX( "non-terminated string %.10s...", buf );
401007c6572SDag-Erling Smørgrav 			if (c == 0)	/* hopeless */
402007c6572SDag-Erling Smørgrav 				FATAL( "giving up" );
403b5253557SWarner Losh 			lineno++;
4042a55deb1SDavid E. O'Brien 			break;
4052a55deb1SDavid E. O'Brien 		case '\\':
4062a55deb1SDavid E. O'Brien 			c = input();
4072a55deb1SDavid E. O'Brien 			switch (c) {
408f39dd6a9SWarner Losh 			case '\n': break;
4092a55deb1SDavid E. O'Brien 			case '"': *bp++ = '"'; break;
4102a55deb1SDavid E. O'Brien 			case 'n': *bp++ = '\n'; break;
4112a55deb1SDavid E. O'Brien 			case 't': *bp++ = '\t'; break;
4122a55deb1SDavid E. O'Brien 			case 'f': *bp++ = '\f'; break;
4132a55deb1SDavid E. O'Brien 			case 'r': *bp++ = '\r'; break;
4142a55deb1SDavid E. O'Brien 			case 'b': *bp++ = '\b'; break;
4152a55deb1SDavid E. O'Brien 			case 'v': *bp++ = '\v'; break;
416f39dd6a9SWarner Losh 			case 'a': *bp++ = '\a'; break;
4172a55deb1SDavid E. O'Brien 			case '\\': *bp++ = '\\'; break;
4182a55deb1SDavid E. O'Brien 
4192a55deb1SDavid E. O'Brien 			case '0': case '1': case '2': /* octal: \d \dd \ddd */
4202a55deb1SDavid E. O'Brien 			case '3': case '4': case '5': case '6': case '7':
4212a55deb1SDavid E. O'Brien 				n = c - '0';
4222a55deb1SDavid E. O'Brien 				if ((c = peek()) >= '0' && c < '8') {
4232a55deb1SDavid E. O'Brien 					n = 8 * n + input() - '0';
4242a55deb1SDavid E. O'Brien 					if ((c = peek()) >= '0' && c < '8')
4252a55deb1SDavid E. O'Brien 						n = 8 * n + input() - '0';
4262a55deb1SDavid E. O'Brien 				}
4272a55deb1SDavid E. O'Brien 				*bp++ = n;
4282a55deb1SDavid E. O'Brien 				break;
4292a55deb1SDavid E. O'Brien 
430f32a6403SWarner Losh 			case 'x':	/* hex  \x0-9a-fA-F (exactly two) */
431f32a6403SWarner Losh 			    {
432f32a6403SWarner Losh 				int i;
433f32a6403SWarner Losh 
434f32a6403SWarner Losh 				if (!isxdigit(peek())) {
435f32a6403SWarner Losh 					unput(c);
4362a55deb1SDavid E. O'Brien 					break;
4372a55deb1SDavid E. O'Brien 				}
438f32a6403SWarner Losh 				n = 0;
439f32a6403SWarner Losh 				for (i = 0; i < 2; i++) {
440f32a6403SWarner Losh 					c = input();
441f32a6403SWarner Losh 					if (c == 0)
442f32a6403SWarner Losh 						break;
443f32a6403SWarner Losh 					if (isxdigit(c)) {
444f32a6403SWarner Losh 						c = tolower(c);
445f32a6403SWarner Losh 						n *= 16;
446f32a6403SWarner Losh 						if (isdigit(c))
447f32a6403SWarner Losh 							n += (c - '0');
448f32a6403SWarner Losh 						else
449f32a6403SWarner Losh 							n += 10 + (c - 'a');
450f32a6403SWarner Losh 					} else {
451b2376a5fSWarner Losh 						unput(c);
452f32a6403SWarner Losh 						break;
453f32a6403SWarner Losh 					}
454f32a6403SWarner Losh 				}
455f32a6403SWarner Losh 				if (i)
4562a55deb1SDavid E. O'Brien 					*bp++ = n;
4572a55deb1SDavid E. O'Brien 				break;
4582a55deb1SDavid E. O'Brien 			    }
4592a55deb1SDavid E. O'Brien 
460f32a6403SWarner Losh 			case 'u':	/* utf  \u0-9a-fA-F (1..8) */
461f32a6403SWarner Losh 			    {
462f32a6403SWarner Losh 				int i;
463f32a6403SWarner Losh 
464f32a6403SWarner Losh 				n = 0;
465f32a6403SWarner Losh 				for (i = 0; i < 8; i++) {
466f32a6403SWarner Losh 					c = input();
467f32a6403SWarner Losh 					if (!isxdigit(c) || c == 0)
468f32a6403SWarner Losh 						break;
469f32a6403SWarner Losh 					c = tolower(c);
470f32a6403SWarner Losh 					n *= 16;
471f32a6403SWarner Losh 					if (isdigit(c))
472f32a6403SWarner Losh 						n += (c - '0');
473f32a6403SWarner Losh 					else
474f32a6403SWarner Losh 						n += 10 + (c - 'a');
475f32a6403SWarner Losh 				}
476f32a6403SWarner Losh 				unput(c);
477f32a6403SWarner Losh 				bp += runetochar(bp, n);
478f32a6403SWarner Losh 				break;
479f32a6403SWarner Losh 			    }
480f32a6403SWarner Losh 
4812a55deb1SDavid E. O'Brien 			default:
4822a55deb1SDavid E. O'Brien 				*bp++ = c;
4832a55deb1SDavid E. O'Brien 				break;
4842a55deb1SDavid E. O'Brien 			}
4852a55deb1SDavid E. O'Brien 			break;
4862a55deb1SDavid E. O'Brien 		default:
4872a55deb1SDavid E. O'Brien 			*bp++ = c;
4882a55deb1SDavid E. O'Brien 			break;
4892a55deb1SDavid E. O'Brien 		}
4902a55deb1SDavid E. O'Brien 	}
4912a55deb1SDavid E. O'Brien 	*bp = 0;
4922a55deb1SDavid E. O'Brien 	s = tostring(buf);
493f39dd6a9SWarner Losh 	*bp++ = ' '; *bp++ = '\0';
4942a55deb1SDavid E. O'Brien 	yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
495f39dd6a9SWarner Losh 	free(s);
4962a55deb1SDavid E. O'Brien 	RET(STRING);
4972a55deb1SDavid E. O'Brien }
4982a55deb1SDavid E. O'Brien 
4992a55deb1SDavid E. O'Brien 
500f39dd6a9SWarner Losh static int binsearch(char *w, const Keyword *kp, int n)
5012a55deb1SDavid E. O'Brien {
5022a55deb1SDavid E. O'Brien 	int cond, low, mid, high;
5032a55deb1SDavid E. O'Brien 
5042a55deb1SDavid E. O'Brien 	low = 0;
5052a55deb1SDavid E. O'Brien 	high = n - 1;
5062a55deb1SDavid E. O'Brien 	while (low <= high) {
5072a55deb1SDavid E. O'Brien 		mid = (low + high) / 2;
5082a55deb1SDavid E. O'Brien 		if ((cond = strcmp(w, kp[mid].word)) < 0)
5092a55deb1SDavid E. O'Brien 			high = mid - 1;
5102a55deb1SDavid E. O'Brien 		else if (cond > 0)
5112a55deb1SDavid E. O'Brien 			low = mid + 1;
5122a55deb1SDavid E. O'Brien 		else
5132a55deb1SDavid E. O'Brien 			return mid;
5142a55deb1SDavid E. O'Brien 	}
5152a55deb1SDavid E. O'Brien 	return -1;
5162a55deb1SDavid E. O'Brien }
5172a55deb1SDavid E. O'Brien 
5182a55deb1SDavid E. O'Brien int word(char *w)
5192a55deb1SDavid E. O'Brien {
520f39dd6a9SWarner Losh 	const Keyword *kp;
5212a55deb1SDavid E. O'Brien 	int c, n;
5222a55deb1SDavid E. O'Brien 
5232a55deb1SDavid E. O'Brien 	n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
5242a55deb1SDavid E. O'Brien 	if (n != -1) {	/* found in table */
52511169460SAlex Richardson 		kp = keywords + n;
5262a55deb1SDavid E. O'Brien 		yylval.i = kp->sub;
5272a55deb1SDavid E. O'Brien 		switch (kp->type) {	/* special handling */
528addad6afSRong-En Fan 		case BLTIN:
529addad6afSRong-En Fan 			if (kp->sub == FSYSTEM && safe)
5302a55deb1SDavid E. O'Brien 				SYNTAX( "system is unsafe" );
5312a55deb1SDavid E. O'Brien 			RET(kp->type);
5322a55deb1SDavid E. O'Brien 		case FUNC:
5332a55deb1SDavid E. O'Brien 			if (infunc)
5342a55deb1SDavid E. O'Brien 				SYNTAX( "illegal nested function" );
5352a55deb1SDavid E. O'Brien 			RET(kp->type);
5362a55deb1SDavid E. O'Brien 		case RETURN:
5372a55deb1SDavid E. O'Brien 			if (!infunc)
5382a55deb1SDavid E. O'Brien 				SYNTAX( "return not in function" );
5392a55deb1SDavid E. O'Brien 			RET(kp->type);
5402a55deb1SDavid E. O'Brien 		case VARNF:
5412a55deb1SDavid E. O'Brien 			yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
5422a55deb1SDavid E. O'Brien 			RET(VARNF);
5432a55deb1SDavid E. O'Brien 		default:
5442a55deb1SDavid E. O'Brien 			RET(kp->type);
5452a55deb1SDavid E. O'Brien 		}
5462a55deb1SDavid E. O'Brien 	}
5472a55deb1SDavid E. O'Brien 	c = peek();	/* look for '(' */
5482a55deb1SDavid E. O'Brien 	if (c != '(' && infunc && (n=isarg(w)) >= 0) {
5492a55deb1SDavid E. O'Brien 		yylval.i = n;
5502a55deb1SDavid E. O'Brien 		RET(ARG);
5512a55deb1SDavid E. O'Brien 	} else {
5522a55deb1SDavid E. O'Brien 		yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
5532a55deb1SDavid E. O'Brien 		if (c == '(') {
5542a55deb1SDavid E. O'Brien 			RET(CALL);
5552a55deb1SDavid E. O'Brien 		} else {
5562a55deb1SDavid E. O'Brien 			RET(VAR);
5572a55deb1SDavid E. O'Brien 		}
5582a55deb1SDavid E. O'Brien 	}
5592a55deb1SDavid E. O'Brien }
5602a55deb1SDavid E. O'Brien 
561813da98dSDavid E. O'Brien void startreg(void)	/* next call to yylex will return a regular expression */
5622a55deb1SDavid E. O'Brien {
563f39dd6a9SWarner Losh 	reg = true;
5642a55deb1SDavid E. O'Brien }
5652a55deb1SDavid E. O'Brien 
5662a55deb1SDavid E. O'Brien int regexpr(void)
5672a55deb1SDavid E. O'Brien {
5682a55deb1SDavid E. O'Brien 	int c;
56910ce5b99SWarner Losh 	static char *buf = NULL;
5702a55deb1SDavid E. O'Brien 	static int bufsz = 500;
5712a55deb1SDavid E. O'Brien 	char *bp;
5722a55deb1SDavid E. O'Brien 
57310ce5b99SWarner Losh 	if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
574f32a6403SWarner Losh 		FATAL("out of space for reg expr");
5752a55deb1SDavid E. O'Brien 	bp = buf;
5762a55deb1SDavid E. O'Brien 	for ( ; (c = input()) != '/' && c != 0; ) {
577addad6afSRong-En Fan 		if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
5782a55deb1SDavid E. O'Brien 			FATAL("out of space for reg expr %.10s...", buf);
5792a55deb1SDavid E. O'Brien 		if (c == '\n') {
580b5253557SWarner Losh 			*bp = '\0';
5812a55deb1SDavid E. O'Brien 			SYNTAX( "newline in regular expression %.10s...", buf );
5822a55deb1SDavid E. O'Brien 			unput('\n');
5832a55deb1SDavid E. O'Brien 			break;
5842a55deb1SDavid E. O'Brien 		} else if (c == '\\') {
5852a55deb1SDavid E. O'Brien 			*bp++ = '\\';
5862a55deb1SDavid E. O'Brien 			*bp++ = input();
5872a55deb1SDavid E. O'Brien 		} else {
5882a55deb1SDavid E. O'Brien 			*bp++ = c;
5892a55deb1SDavid E. O'Brien 		}
5902a55deb1SDavid E. O'Brien 	}
5912a55deb1SDavid E. O'Brien 	*bp = 0;
59288b8d487SRuslan Ermilov 	if (c == 0)
59388b8d487SRuslan Ermilov 		SYNTAX("non-terminated regular expression %.10s...", buf);
5942a55deb1SDavid E. O'Brien 	yylval.s = tostring(buf);
5952a55deb1SDavid E. O'Brien 	unput('/');
5962a55deb1SDavid E. O'Brien 	RET(REGEXPR);
5972a55deb1SDavid E. O'Brien }
5982a55deb1SDavid E. O'Brien 
5992a55deb1SDavid E. O'Brien /* low-level lexical stuff, sort of inherited from lex */
6002a55deb1SDavid E. O'Brien 
6012a55deb1SDavid E. O'Brien char	ebuf[300];
6022a55deb1SDavid E. O'Brien char	*ep = ebuf;
6032a55deb1SDavid E. O'Brien char	yysbuf[100];	/* pushback buffer */
6042a55deb1SDavid E. O'Brien char	*yysptr = yysbuf;
60510ce5b99SWarner Losh FILE	*yyin = NULL;
6062a55deb1SDavid E. O'Brien 
6072a55deb1SDavid E. O'Brien int input(void)	/* get next lexical input character */
6082a55deb1SDavid E. O'Brien {
6092a55deb1SDavid E. O'Brien 	int c;
6102a55deb1SDavid E. O'Brien 	extern char *lexprog;
6112a55deb1SDavid E. O'Brien 
6122a55deb1SDavid E. O'Brien 	if (yysptr > yysbuf)
6132e454f23SRuslan Ermilov 		c = (uschar)*--yysptr;
6142a55deb1SDavid E. O'Brien 	else if (lexprog != NULL) {	/* awk '...' */
6152e454f23SRuslan Ermilov 		if ((c = (uschar)*lexprog) != 0)
6162a55deb1SDavid E. O'Brien 			lexprog++;
6172a55deb1SDavid E. O'Brien 	} else				/* awk -f ... */
6182a55deb1SDavid E. O'Brien 		c = pgetc();
619b5253557SWarner Losh 	if (c == EOF)
6202a55deb1SDavid E. O'Brien 		c = 0;
6212a55deb1SDavid E. O'Brien 	if (ep >= ebuf + sizeof ebuf)
6222a55deb1SDavid E. O'Brien 		ep = ebuf;
623b5253557SWarner Losh 	*ep = c;
624b5253557SWarner Losh 	if (c != 0) {
625b5253557SWarner Losh 		ep++;
626b5253557SWarner Losh 	}
627b5253557SWarner Losh 	return (c);
6282a55deb1SDavid E. O'Brien }
6292a55deb1SDavid E. O'Brien 
6302a55deb1SDavid E. O'Brien void unput(int c)	/* put lexical character back on input */
6312a55deb1SDavid E. O'Brien {
632f39dd6a9SWarner Losh 	if (c == '\n')
633f39dd6a9SWarner Losh 		lineno--;
6342a55deb1SDavid E. O'Brien 	if (yysptr >= yysbuf + sizeof(yysbuf))
6352a55deb1SDavid E. O'Brien 		FATAL("pushed back too much: %.20s...", yysbuf);
6362a55deb1SDavid E. O'Brien 	*yysptr++ = c;
6372a55deb1SDavid E. O'Brien 	if (--ep < ebuf)
6382a55deb1SDavid E. O'Brien 		ep = ebuf + sizeof(ebuf) - 1;
6392a55deb1SDavid E. O'Brien }
6402a55deb1SDavid E. O'Brien 
641813da98dSDavid E. O'Brien void unputstr(const char *s)	/* put a string back on input */
6422a55deb1SDavid E. O'Brien {
6432a55deb1SDavid E. O'Brien 	int i;
6442a55deb1SDavid E. O'Brien 
6452a55deb1SDavid E. O'Brien 	for (i = strlen(s)-1; i >= 0; i--)
6462a55deb1SDavid E. O'Brien 		unput(s[i]);
6472a55deb1SDavid E. O'Brien }
648