xref: /inferno-os/utils/awk/lex.c (revision 74a4d8c26dd3c1e9febcb717cfd6cb6512991a7a)
1*74a4d8c2SCharles.Forsyth /****************************************************************
2*74a4d8c2SCharles.Forsyth Copyright (C) Lucent Technologies 1997
3*74a4d8c2SCharles.Forsyth All Rights Reserved
4*74a4d8c2SCharles.Forsyth 
5*74a4d8c2SCharles.Forsyth Permission to use, copy, modify, and distribute this software and
6*74a4d8c2SCharles.Forsyth its documentation for any purpose and without fee is hereby
7*74a4d8c2SCharles.Forsyth granted, provided that the above copyright notice appear in all
8*74a4d8c2SCharles.Forsyth copies and that both that the copyright notice and this
9*74a4d8c2SCharles.Forsyth permission notice and warranty disclaimer appear in supporting
10*74a4d8c2SCharles.Forsyth documentation, and that the name Lucent Technologies or any of
11*74a4d8c2SCharles.Forsyth its entities not be used in advertising or publicity pertaining
12*74a4d8c2SCharles.Forsyth to distribution of the software without specific, written prior
13*74a4d8c2SCharles.Forsyth permission.
14*74a4d8c2SCharles.Forsyth 
15*74a4d8c2SCharles.Forsyth LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16*74a4d8c2SCharles.Forsyth INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17*74a4d8c2SCharles.Forsyth IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18*74a4d8c2SCharles.Forsyth SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19*74a4d8c2SCharles.Forsyth WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20*74a4d8c2SCharles.Forsyth IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21*74a4d8c2SCharles.Forsyth ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22*74a4d8c2SCharles.Forsyth THIS SOFTWARE.
23*74a4d8c2SCharles.Forsyth ****************************************************************/
24*74a4d8c2SCharles.Forsyth 
25*74a4d8c2SCharles.Forsyth #include <stdio.h>
26*74a4d8c2SCharles.Forsyth #include <stdlib.h>
27*74a4d8c2SCharles.Forsyth #include <string.h>
28*74a4d8c2SCharles.Forsyth #include <ctype.h>
29*74a4d8c2SCharles.Forsyth #include "awk.h"
30*74a4d8c2SCharles.Forsyth #include "ytab.h"
31*74a4d8c2SCharles.Forsyth 
32*74a4d8c2SCharles.Forsyth extern YYSTYPE	yylval;
33*74a4d8c2SCharles.Forsyth extern int	infunc;
34*74a4d8c2SCharles.Forsyth 
35*74a4d8c2SCharles.Forsyth int	lineno	= 1;
36*74a4d8c2SCharles.Forsyth int	bracecnt = 0;
37*74a4d8c2SCharles.Forsyth int	brackcnt  = 0;
38*74a4d8c2SCharles.Forsyth int	parencnt = 0;
39*74a4d8c2SCharles.Forsyth 
40*74a4d8c2SCharles.Forsyth typedef struct Keyword {
41*74a4d8c2SCharles.Forsyth 	char	*word;
42*74a4d8c2SCharles.Forsyth 	int	sub;
43*74a4d8c2SCharles.Forsyth 	int	type;
44*74a4d8c2SCharles.Forsyth } Keyword;
45*74a4d8c2SCharles.Forsyth 
46*74a4d8c2SCharles.Forsyth Keyword keywords[] ={	/* keep sorted: binary searched */
47*74a4d8c2SCharles.Forsyth 	{ "BEGIN",	XBEGIN,		XBEGIN },
48*74a4d8c2SCharles.Forsyth 	{ "END",	XEND,		XEND },
49*74a4d8c2SCharles.Forsyth 	{ "NF",		VARNF,		VARNF },
50*74a4d8c2SCharles.Forsyth 	{ "atan2",	FATAN,		BLTIN },
51*74a4d8c2SCharles.Forsyth 	{ "break",	BREAK,		BREAK },
52*74a4d8c2SCharles.Forsyth 	{ "close",	CLOSE,		CLOSE },
53*74a4d8c2SCharles.Forsyth 	{ "continue",	CONTINUE,	CONTINUE },
54*74a4d8c2SCharles.Forsyth 	{ "cos",	FCOS,		BLTIN },
55*74a4d8c2SCharles.Forsyth 	{ "delete",	DELETE,		DELETE },
56*74a4d8c2SCharles.Forsyth 	{ "do",		DO,		DO },
57*74a4d8c2SCharles.Forsyth 	{ "else",	ELSE,		ELSE },
58*74a4d8c2SCharles.Forsyth 	{ "exit",	EXIT,		EXIT },
59*74a4d8c2SCharles.Forsyth 	{ "exp",	FEXP,		BLTIN },
60*74a4d8c2SCharles.Forsyth 	{ "fflush",	FFLUSH,		BLTIN },
61*74a4d8c2SCharles.Forsyth 	{ "for",	FOR,		FOR },
62*74a4d8c2SCharles.Forsyth 	{ "func",	FUNC,		FUNC },
63*74a4d8c2SCharles.Forsyth 	{ "function",	FUNC,		FUNC },
64*74a4d8c2SCharles.Forsyth 	{ "getline",	GETLINE,	GETLINE },
65*74a4d8c2SCharles.Forsyth 	{ "gsub",	GSUB,		GSUB },
66*74a4d8c2SCharles.Forsyth 	{ "if",		IF,		IF },
67*74a4d8c2SCharles.Forsyth 	{ "in",		IN,		IN },
68*74a4d8c2SCharles.Forsyth 	{ "index",	INDEX,		INDEX },
69*74a4d8c2SCharles.Forsyth 	{ "int",	FINT,		BLTIN },
70*74a4d8c2SCharles.Forsyth 	{ "length",	FLENGTH,	BLTIN },
71*74a4d8c2SCharles.Forsyth 	{ "log",	FLOG,		BLTIN },
72*74a4d8c2SCharles.Forsyth 	{ "match",	MATCHFCN,	MATCHFCN },
73*74a4d8c2SCharles.Forsyth 	{ "next",	NEXT,		NEXT },
74*74a4d8c2SCharles.Forsyth 	{ "nextfile",	NEXTFILE,	NEXTFILE },
75*74a4d8c2SCharles.Forsyth 	{ "print",	PRINT,		PRINT },
76*74a4d8c2SCharles.Forsyth 	{ "printf",	PRINTF,		PRINTF },
77*74a4d8c2SCharles.Forsyth 	{ "rand",	FRAND,		BLTIN },
78*74a4d8c2SCharles.Forsyth 	{ "return",	RETURN,		RETURN },
79*74a4d8c2SCharles.Forsyth 	{ "sin",	FSIN,		BLTIN },
80*74a4d8c2SCharles.Forsyth 	{ "split",	SPLIT,		SPLIT },
81*74a4d8c2SCharles.Forsyth 	{ "sprintf",	SPRINTF,	SPRINTF },
82*74a4d8c2SCharles.Forsyth 	{ "sqrt",	FSQRT,		BLTIN },
83*74a4d8c2SCharles.Forsyth 	{ "srand",	FSRAND,		BLTIN },
84*74a4d8c2SCharles.Forsyth 	{ "sub",	SUB,		SUB },
85*74a4d8c2SCharles.Forsyth 	{ "substr",	SUBSTR,		SUBSTR },
86*74a4d8c2SCharles.Forsyth 	{ "system",	FSYSTEM,	BLTIN },
87*74a4d8c2SCharles.Forsyth 	{ "tolower",	FTOLOWER,	BLTIN },
88*74a4d8c2SCharles.Forsyth 	{ "toupper",	FTOUPPER,	BLTIN },
89*74a4d8c2SCharles.Forsyth 	{ "while",	WHILE,		WHILE },
90*74a4d8c2SCharles.Forsyth };
91*74a4d8c2SCharles.Forsyth 
92*74a4d8c2SCharles.Forsyth #define DEBUG
93*74a4d8c2SCharles.Forsyth #ifdef	DEBUG
94*74a4d8c2SCharles.Forsyth #define	RET(x)	{ if(dbg)printf("lex %s\n", tokname(x)); return(x); }
95*74a4d8c2SCharles.Forsyth #else
96*74a4d8c2SCharles.Forsyth #define	RET(x)	return(x)
97*74a4d8c2SCharles.Forsyth #endif
98*74a4d8c2SCharles.Forsyth 
peek(void)99*74a4d8c2SCharles.Forsyth int peek(void)
100*74a4d8c2SCharles.Forsyth {
101*74a4d8c2SCharles.Forsyth 	int c = input();
102*74a4d8c2SCharles.Forsyth 	unput(c);
103*74a4d8c2SCharles.Forsyth 	return c;
104*74a4d8c2SCharles.Forsyth }
105*74a4d8c2SCharles.Forsyth 
gettok(char ** pbuf,int * psz)106*74a4d8c2SCharles.Forsyth int gettok(char **pbuf, int *psz)	/* get next input token */
107*74a4d8c2SCharles.Forsyth {
108*74a4d8c2SCharles.Forsyth 	int c;
109*74a4d8c2SCharles.Forsyth 	char *buf = *pbuf;
110*74a4d8c2SCharles.Forsyth 	int sz = *psz;
111*74a4d8c2SCharles.Forsyth 	char *bp = buf;
112*74a4d8c2SCharles.Forsyth 
113*74a4d8c2SCharles.Forsyth 	c = input();
114*74a4d8c2SCharles.Forsyth 	if (c == 0)
115*74a4d8c2SCharles.Forsyth 		return 0;
116*74a4d8c2SCharles.Forsyth 	buf[0] = c;
117*74a4d8c2SCharles.Forsyth 	buf[1] = 0;
118*74a4d8c2SCharles.Forsyth 	if (!isalnum(c) && c != '.' && c != '_')
119*74a4d8c2SCharles.Forsyth 		return c;
120*74a4d8c2SCharles.Forsyth 
121*74a4d8c2SCharles.Forsyth 	*bp++ = c;
122*74a4d8c2SCharles.Forsyth 	if (isalpha(c) || c == '_') {	/* it's a varname */
123*74a4d8c2SCharles.Forsyth 		for ( ; (c = input()) != 0; ) {
124*74a4d8c2SCharles.Forsyth 			if (bp-buf >= sz)
125*74a4d8c2SCharles.Forsyth 				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, 0))
126*74a4d8c2SCharles.Forsyth 					FATAL( "out of space for name %.10s...", buf );
127*74a4d8c2SCharles.Forsyth 			if (isalnum(c) || c == '_')
128*74a4d8c2SCharles.Forsyth 				*bp++ = c;
129*74a4d8c2SCharles.Forsyth 			else {
130*74a4d8c2SCharles.Forsyth 				*bp = 0;
131*74a4d8c2SCharles.Forsyth 				unput(c);
132*74a4d8c2SCharles.Forsyth 				break;
133*74a4d8c2SCharles.Forsyth 			}
134*74a4d8c2SCharles.Forsyth 		}
135*74a4d8c2SCharles.Forsyth 		*bp = 0;
136*74a4d8c2SCharles.Forsyth 	} else {	/* it's a number */
137*74a4d8c2SCharles.Forsyth 		char *rem;
138*74a4d8c2SCharles.Forsyth 		/* read input until can't be a number */
139*74a4d8c2SCharles.Forsyth 		for ( ; (c = input()) != 0; ) {
140*74a4d8c2SCharles.Forsyth 			if (bp-buf >= sz)
141*74a4d8c2SCharles.Forsyth 				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, 0))
142*74a4d8c2SCharles.Forsyth 					FATAL( "out of space for number %.10s...", buf );
143*74a4d8c2SCharles.Forsyth 			if (isdigit(c) || c == 'e' || c == 'E'
144*74a4d8c2SCharles.Forsyth 			  || c == '.' || c == '+' || c == '-')
145*74a4d8c2SCharles.Forsyth 				*bp++ = c;
146*74a4d8c2SCharles.Forsyth 			else {
147*74a4d8c2SCharles.Forsyth 				unput(c);
148*74a4d8c2SCharles.Forsyth 				break;
149*74a4d8c2SCharles.Forsyth 			}
150*74a4d8c2SCharles.Forsyth 		}
151*74a4d8c2SCharles.Forsyth 		*bp = 0;
152*74a4d8c2SCharles.Forsyth 		strtod(buf, &rem);	/* parse the number */
153*74a4d8c2SCharles.Forsyth 		unputstr(rem);		/* put rest back for later */
154*74a4d8c2SCharles.Forsyth 		rem[0] = 0;
155*74a4d8c2SCharles.Forsyth 	}
156*74a4d8c2SCharles.Forsyth 	*pbuf = buf;
157*74a4d8c2SCharles.Forsyth 	*psz = sz;
158*74a4d8c2SCharles.Forsyth 	return buf[0];
159*74a4d8c2SCharles.Forsyth }
160*74a4d8c2SCharles.Forsyth 
161*74a4d8c2SCharles.Forsyth int	word(char *);
162*74a4d8c2SCharles.Forsyth int	string(void);
163*74a4d8c2SCharles.Forsyth int	regexpr(void);
164*74a4d8c2SCharles.Forsyth int	sc	= 0;	/* 1 => return a } right now */
165*74a4d8c2SCharles.Forsyth int	reg	= 0;	/* 1 => return a REGEXPR now */
166*74a4d8c2SCharles.Forsyth 
yylex(void)167*74a4d8c2SCharles.Forsyth int yylex(void)
168*74a4d8c2SCharles.Forsyth {
169*74a4d8c2SCharles.Forsyth 	int c;
170*74a4d8c2SCharles.Forsyth 	static char *buf = 0;
171*74a4d8c2SCharles.Forsyth 	static int bufsize = 500;
172*74a4d8c2SCharles.Forsyth 
173*74a4d8c2SCharles.Forsyth 	if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL)
174*74a4d8c2SCharles.Forsyth 		FATAL( "out of space in yylex" );
175*74a4d8c2SCharles.Forsyth 	if (sc) {
176*74a4d8c2SCharles.Forsyth 		sc = 0;
177*74a4d8c2SCharles.Forsyth 		RET('}');
178*74a4d8c2SCharles.Forsyth 	}
179*74a4d8c2SCharles.Forsyth 	if (reg) {
180*74a4d8c2SCharles.Forsyth 		reg = 0;
181*74a4d8c2SCharles.Forsyth 		return regexpr();
182*74a4d8c2SCharles.Forsyth 	}
183*74a4d8c2SCharles.Forsyth 	for (;;) {
184*74a4d8c2SCharles.Forsyth 		c = gettok(&buf, &bufsize);
185*74a4d8c2SCharles.Forsyth 		if (c == 0)
186*74a4d8c2SCharles.Forsyth 			return 0;
187*74a4d8c2SCharles.Forsyth 		if (isalpha(c) || c == '_')
188*74a4d8c2SCharles.Forsyth 			return word(buf);
189*74a4d8c2SCharles.Forsyth 		if (isdigit(c) || c == '.') {
190*74a4d8c2SCharles.Forsyth 			yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
191*74a4d8c2SCharles.Forsyth 			/* should this also have STR set? */
192*74a4d8c2SCharles.Forsyth 			RET(NUMBER);
193*74a4d8c2SCharles.Forsyth 		}
194*74a4d8c2SCharles.Forsyth 
195*74a4d8c2SCharles.Forsyth 		yylval.i = c;
196*74a4d8c2SCharles.Forsyth 		switch (c) {
197*74a4d8c2SCharles.Forsyth 		case '\n':	/* {EOL} */
198*74a4d8c2SCharles.Forsyth 			RET(NL);
199*74a4d8c2SCharles.Forsyth 		case '\r':	/* assume \n is coming */
200*74a4d8c2SCharles.Forsyth 		case ' ':	/* {WS}+ */
201*74a4d8c2SCharles.Forsyth 		case '\t':
202*74a4d8c2SCharles.Forsyth 			break;
203*74a4d8c2SCharles.Forsyth 		case '#':	/* #.* strip comments */
204*74a4d8c2SCharles.Forsyth 			while ((c = input()) != '\n' && c != 0)
205*74a4d8c2SCharles.Forsyth 				;
206*74a4d8c2SCharles.Forsyth 			unput(c);
207*74a4d8c2SCharles.Forsyth 			break;
208*74a4d8c2SCharles.Forsyth 		case ';':
209*74a4d8c2SCharles.Forsyth 			RET(';');
210*74a4d8c2SCharles.Forsyth 		case '\\':
211*74a4d8c2SCharles.Forsyth 			if (peek() == '\n') {
212*74a4d8c2SCharles.Forsyth 				input();
213*74a4d8c2SCharles.Forsyth 			} else if (peek() == '\r') {
214*74a4d8c2SCharles.Forsyth 				input(); input();	/* \n */
215*74a4d8c2SCharles.Forsyth 				lineno++;
216*74a4d8c2SCharles.Forsyth 			} else {
217*74a4d8c2SCharles.Forsyth 				RET(c);
218*74a4d8c2SCharles.Forsyth 			}
219*74a4d8c2SCharles.Forsyth 			break;
220*74a4d8c2SCharles.Forsyth 		case '&':
221*74a4d8c2SCharles.Forsyth 			if (peek() == '&') {
222*74a4d8c2SCharles.Forsyth 				input(); RET(AND);
223*74a4d8c2SCharles.Forsyth 			} else
224*74a4d8c2SCharles.Forsyth 				RET('&');
225*74a4d8c2SCharles.Forsyth 		case '|':
226*74a4d8c2SCharles.Forsyth 			if (peek() == '|') {
227*74a4d8c2SCharles.Forsyth 				input(); RET(BOR);
228*74a4d8c2SCharles.Forsyth 			} else
229*74a4d8c2SCharles.Forsyth 				RET('|');
230*74a4d8c2SCharles.Forsyth 		case '!':
231*74a4d8c2SCharles.Forsyth 			if (peek() == '=') {
232*74a4d8c2SCharles.Forsyth 				input(); yylval.i = NE; RET(NE);
233*74a4d8c2SCharles.Forsyth 			} else if (peek() == '~') {
234*74a4d8c2SCharles.Forsyth 				input(); yylval.i = NOTMATCH; RET(MATCHOP);
235*74a4d8c2SCharles.Forsyth 			} else
236*74a4d8c2SCharles.Forsyth 				RET(NOT);
237*74a4d8c2SCharles.Forsyth 		case '~':
238*74a4d8c2SCharles.Forsyth 			yylval.i = MATCH;
239*74a4d8c2SCharles.Forsyth 			RET(MATCHOP);
240*74a4d8c2SCharles.Forsyth 		case '<':
241*74a4d8c2SCharles.Forsyth 			if (peek() == '=') {
242*74a4d8c2SCharles.Forsyth 				input(); yylval.i = LE; RET(LE);
243*74a4d8c2SCharles.Forsyth 			} else {
244*74a4d8c2SCharles.Forsyth 				yylval.i = LT; RET(LT);
245*74a4d8c2SCharles.Forsyth 			}
246*74a4d8c2SCharles.Forsyth 		case '=':
247*74a4d8c2SCharles.Forsyth 			if (peek() == '=') {
248*74a4d8c2SCharles.Forsyth 				input(); yylval.i = EQ; RET(EQ);
249*74a4d8c2SCharles.Forsyth 			} else {
250*74a4d8c2SCharles.Forsyth 				yylval.i = ASSIGN; RET(ASGNOP);
251*74a4d8c2SCharles.Forsyth 			}
252*74a4d8c2SCharles.Forsyth 		case '>':
253*74a4d8c2SCharles.Forsyth 			if (peek() == '=') {
254*74a4d8c2SCharles.Forsyth 				input(); yylval.i = GE; RET(GE);
255*74a4d8c2SCharles.Forsyth 			} else if (peek() == '>') {
256*74a4d8c2SCharles.Forsyth 				input(); yylval.i = APPEND; RET(APPEND);
257*74a4d8c2SCharles.Forsyth 			} else {
258*74a4d8c2SCharles.Forsyth 				yylval.i = GT; RET(GT);
259*74a4d8c2SCharles.Forsyth 			}
260*74a4d8c2SCharles.Forsyth 		case '+':
261*74a4d8c2SCharles.Forsyth 			if (peek() == '+') {
262*74a4d8c2SCharles.Forsyth 				input(); yylval.i = INCR; RET(INCR);
263*74a4d8c2SCharles.Forsyth 			} else if (peek() == '=') {
264*74a4d8c2SCharles.Forsyth 				input(); yylval.i = ADDEQ; RET(ASGNOP);
265*74a4d8c2SCharles.Forsyth 			} else
266*74a4d8c2SCharles.Forsyth 				RET('+');
267*74a4d8c2SCharles.Forsyth 		case '-':
268*74a4d8c2SCharles.Forsyth 			if (peek() == '-') {
269*74a4d8c2SCharles.Forsyth 				input(); yylval.i = DECR; RET(DECR);
270*74a4d8c2SCharles.Forsyth 			} else if (peek() == '=') {
271*74a4d8c2SCharles.Forsyth 				input(); yylval.i = SUBEQ; RET(ASGNOP);
272*74a4d8c2SCharles.Forsyth 			} else
273*74a4d8c2SCharles.Forsyth 				RET('-');
274*74a4d8c2SCharles.Forsyth 		case '*':
275*74a4d8c2SCharles.Forsyth 			if (peek() == '=') {	/* *= */
276*74a4d8c2SCharles.Forsyth 				input(); yylval.i = MULTEQ; RET(ASGNOP);
277*74a4d8c2SCharles.Forsyth 			} else if (peek() == '*') {	/* ** or **= */
278*74a4d8c2SCharles.Forsyth 				input();	/* eat 2nd * */
279*74a4d8c2SCharles.Forsyth 				if (peek() == '=') {
280*74a4d8c2SCharles.Forsyth 					input(); yylval.i = POWEQ; RET(ASGNOP);
281*74a4d8c2SCharles.Forsyth 				} else {
282*74a4d8c2SCharles.Forsyth 					RET(POWER);
283*74a4d8c2SCharles.Forsyth 				}
284*74a4d8c2SCharles.Forsyth 			} else
285*74a4d8c2SCharles.Forsyth 				RET('*');
286*74a4d8c2SCharles.Forsyth 		case '/':
287*74a4d8c2SCharles.Forsyth 			RET('/');
288*74a4d8c2SCharles.Forsyth 		case '%':
289*74a4d8c2SCharles.Forsyth 			if (peek() == '=') {
290*74a4d8c2SCharles.Forsyth 				input(); yylval.i = MODEQ; RET(ASGNOP);
291*74a4d8c2SCharles.Forsyth 			} else
292*74a4d8c2SCharles.Forsyth 				RET('%');
293*74a4d8c2SCharles.Forsyth 		case '^':
294*74a4d8c2SCharles.Forsyth 			if (peek() == '=') {
295*74a4d8c2SCharles.Forsyth 				input(); yylval.i = POWEQ; RET(ASGNOP);
296*74a4d8c2SCharles.Forsyth 			} else
297*74a4d8c2SCharles.Forsyth 				RET(POWER);
298*74a4d8c2SCharles.Forsyth 
299*74a4d8c2SCharles.Forsyth 		case '$':
300*74a4d8c2SCharles.Forsyth 			/* BUG: awkward, if not wrong */
301*74a4d8c2SCharles.Forsyth 			c = gettok(&buf, &bufsize);
302*74a4d8c2SCharles.Forsyth 			if (isalpha(c)) {
303*74a4d8c2SCharles.Forsyth 				if (strcmp(buf, "NF") == 0) {	/* very special */
304*74a4d8c2SCharles.Forsyth 					unputstr("(NF)");
305*74a4d8c2SCharles.Forsyth 					RET(INDIRECT);
306*74a4d8c2SCharles.Forsyth 				}
307*74a4d8c2SCharles.Forsyth 				c = peek();
308*74a4d8c2SCharles.Forsyth 				if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
309*74a4d8c2SCharles.Forsyth 					unputstr(buf);
310*74a4d8c2SCharles.Forsyth 					RET(INDIRECT);
311*74a4d8c2SCharles.Forsyth 				}
312*74a4d8c2SCharles.Forsyth 				yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
313*74a4d8c2SCharles.Forsyth 				RET(IVAR);
314*74a4d8c2SCharles.Forsyth 			} else {
315*74a4d8c2SCharles.Forsyth 				unputstr(buf);
316*74a4d8c2SCharles.Forsyth 				RET(INDIRECT);
317*74a4d8c2SCharles.Forsyth 			}
318*74a4d8c2SCharles.Forsyth 
319*74a4d8c2SCharles.Forsyth 		case '}':
320*74a4d8c2SCharles.Forsyth 			if (--bracecnt < 0)
321*74a4d8c2SCharles.Forsyth 				SYNTAX( "extra }" );
322*74a4d8c2SCharles.Forsyth 			sc = 1;
323*74a4d8c2SCharles.Forsyth 			RET(';');
324*74a4d8c2SCharles.Forsyth 		case ']':
325*74a4d8c2SCharles.Forsyth 			if (--brackcnt < 0)
326*74a4d8c2SCharles.Forsyth 				SYNTAX( "extra ]" );
327*74a4d8c2SCharles.Forsyth 			RET(']');
328*74a4d8c2SCharles.Forsyth 		case ')':
329*74a4d8c2SCharles.Forsyth 			if (--parencnt < 0)
330*74a4d8c2SCharles.Forsyth 				SYNTAX( "extra )" );
331*74a4d8c2SCharles.Forsyth 			RET(')');
332*74a4d8c2SCharles.Forsyth 		case '{':
333*74a4d8c2SCharles.Forsyth 			bracecnt++;
334*74a4d8c2SCharles.Forsyth 			RET('{');
335*74a4d8c2SCharles.Forsyth 		case '[':
336*74a4d8c2SCharles.Forsyth 			brackcnt++;
337*74a4d8c2SCharles.Forsyth 			RET('[');
338*74a4d8c2SCharles.Forsyth 		case '(':
339*74a4d8c2SCharles.Forsyth 			parencnt++;
340*74a4d8c2SCharles.Forsyth 			RET('(');
341*74a4d8c2SCharles.Forsyth 
342*74a4d8c2SCharles.Forsyth 		case '"':
343*74a4d8c2SCharles.Forsyth 			return string();	/* BUG: should be like tran.c ? */
344*74a4d8c2SCharles.Forsyth 
345*74a4d8c2SCharles.Forsyth 		default:
346*74a4d8c2SCharles.Forsyth 			RET(c);
347*74a4d8c2SCharles.Forsyth 		}
348*74a4d8c2SCharles.Forsyth 	}
349*74a4d8c2SCharles.Forsyth }
350*74a4d8c2SCharles.Forsyth 
string(void)351*74a4d8c2SCharles.Forsyth int string(void)
352*74a4d8c2SCharles.Forsyth {
353*74a4d8c2SCharles.Forsyth 	int c, n;
354*74a4d8c2SCharles.Forsyth 	char *s, *bp;
355*74a4d8c2SCharles.Forsyth 	static char *buf = 0;
356*74a4d8c2SCharles.Forsyth 	static int bufsz = 500;
357*74a4d8c2SCharles.Forsyth 
358*74a4d8c2SCharles.Forsyth 	if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
359*74a4d8c2SCharles.Forsyth 		FATAL("out of space for strings");
360*74a4d8c2SCharles.Forsyth 	for (bp = buf; (c = input()) != '"'; ) {
361*74a4d8c2SCharles.Forsyth 		if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, 0))
362*74a4d8c2SCharles.Forsyth 			FATAL("out of space for string %.10s...", buf);
363*74a4d8c2SCharles.Forsyth 		switch (c) {
364*74a4d8c2SCharles.Forsyth 		case '\n':
365*74a4d8c2SCharles.Forsyth 		case '\r':
366*74a4d8c2SCharles.Forsyth 		case 0:
367*74a4d8c2SCharles.Forsyth 			SYNTAX( "non-terminated string %.10s...", buf );
368*74a4d8c2SCharles.Forsyth 			lineno++;
369*74a4d8c2SCharles.Forsyth 			break;
370*74a4d8c2SCharles.Forsyth 		case '\\':
371*74a4d8c2SCharles.Forsyth 			c = input();
372*74a4d8c2SCharles.Forsyth 			switch (c) {
373*74a4d8c2SCharles.Forsyth 			case '"': *bp++ = '"'; break;
374*74a4d8c2SCharles.Forsyth 			case 'n': *bp++ = '\n'; break;
375*74a4d8c2SCharles.Forsyth 			case 't': *bp++ = '\t'; break;
376*74a4d8c2SCharles.Forsyth 			case 'f': *bp++ = '\f'; break;
377*74a4d8c2SCharles.Forsyth 			case 'r': *bp++ = '\r'; break;
378*74a4d8c2SCharles.Forsyth 			case 'b': *bp++ = '\b'; break;
379*74a4d8c2SCharles.Forsyth 			case 'v': *bp++ = '\v'; break;
380*74a4d8c2SCharles.Forsyth 			case 'a': *bp++ = '\007'; break;
381*74a4d8c2SCharles.Forsyth 			case '\\': *bp++ = '\\'; break;
382*74a4d8c2SCharles.Forsyth 
383*74a4d8c2SCharles.Forsyth 			case '0': case '1': case '2': /* octal: \d \dd \ddd */
384*74a4d8c2SCharles.Forsyth 			case '3': case '4': case '5': case '6': case '7':
385*74a4d8c2SCharles.Forsyth 				n = c - '0';
386*74a4d8c2SCharles.Forsyth 				if ((c = peek()) >= '0' && c < '8') {
387*74a4d8c2SCharles.Forsyth 					n = 8 * n + input() - '0';
388*74a4d8c2SCharles.Forsyth 					if ((c = peek()) >= '0' && c < '8')
389*74a4d8c2SCharles.Forsyth 						n = 8 * n + input() - '0';
390*74a4d8c2SCharles.Forsyth 				}
391*74a4d8c2SCharles.Forsyth 				*bp++ = n;
392*74a4d8c2SCharles.Forsyth 				break;
393*74a4d8c2SCharles.Forsyth 
394*74a4d8c2SCharles.Forsyth 			case 'x':	/* hex  \x0-9a-fA-F + */
395*74a4d8c2SCharles.Forsyth 			    {	char xbuf[100], *px;
396*74a4d8c2SCharles.Forsyth 				for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
397*74a4d8c2SCharles.Forsyth 					if (isdigit(c)
398*74a4d8c2SCharles.Forsyth 					 || (c >= 'a' && c <= 'f')
399*74a4d8c2SCharles.Forsyth 					 || (c >= 'A' && c <= 'F'))
400*74a4d8c2SCharles.Forsyth 						*px++ = c;
401*74a4d8c2SCharles.Forsyth 					else
402*74a4d8c2SCharles.Forsyth 						break;
403*74a4d8c2SCharles.Forsyth 				}
404*74a4d8c2SCharles.Forsyth 				*px = 0;
405*74a4d8c2SCharles.Forsyth 				unput(c);
406*74a4d8c2SCharles.Forsyth 	  			sscanf(xbuf, "%x", &n);
407*74a4d8c2SCharles.Forsyth 				*bp++ = n;
408*74a4d8c2SCharles.Forsyth 				break;
409*74a4d8c2SCharles.Forsyth 			    }
410*74a4d8c2SCharles.Forsyth 
411*74a4d8c2SCharles.Forsyth 			default:
412*74a4d8c2SCharles.Forsyth 				*bp++ = c;
413*74a4d8c2SCharles.Forsyth 				break;
414*74a4d8c2SCharles.Forsyth 			}
415*74a4d8c2SCharles.Forsyth 			break;
416*74a4d8c2SCharles.Forsyth 		default:
417*74a4d8c2SCharles.Forsyth 			*bp++ = c;
418*74a4d8c2SCharles.Forsyth 			break;
419*74a4d8c2SCharles.Forsyth 		}
420*74a4d8c2SCharles.Forsyth 	}
421*74a4d8c2SCharles.Forsyth 	*bp = 0;
422*74a4d8c2SCharles.Forsyth 	s = tostring(buf);
423*74a4d8c2SCharles.Forsyth 	*bp++ = ' '; *bp++ = 0;
424*74a4d8c2SCharles.Forsyth 	yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
425*74a4d8c2SCharles.Forsyth 	RET(STRING);
426*74a4d8c2SCharles.Forsyth }
427*74a4d8c2SCharles.Forsyth 
428*74a4d8c2SCharles.Forsyth 
binsearch(char * w,Keyword * kp,int n)429*74a4d8c2SCharles.Forsyth int binsearch(char *w, Keyword *kp, int n)
430*74a4d8c2SCharles.Forsyth {
431*74a4d8c2SCharles.Forsyth 	int cond, low, mid, high;
432*74a4d8c2SCharles.Forsyth 
433*74a4d8c2SCharles.Forsyth 	low = 0;
434*74a4d8c2SCharles.Forsyth 	high = n - 1;
435*74a4d8c2SCharles.Forsyth 	while (low <= high) {
436*74a4d8c2SCharles.Forsyth 		mid = (low + high) / 2;
437*74a4d8c2SCharles.Forsyth 		if ((cond = strcmp(w, kp[mid].word)) < 0)
438*74a4d8c2SCharles.Forsyth 			high = mid - 1;
439*74a4d8c2SCharles.Forsyth 		else if (cond > 0)
440*74a4d8c2SCharles.Forsyth 			low = mid + 1;
441*74a4d8c2SCharles.Forsyth 		else
442*74a4d8c2SCharles.Forsyth 			return mid;
443*74a4d8c2SCharles.Forsyth 	}
444*74a4d8c2SCharles.Forsyth 	return -1;
445*74a4d8c2SCharles.Forsyth }
446*74a4d8c2SCharles.Forsyth 
word(char * w)447*74a4d8c2SCharles.Forsyth int word(char *w)
448*74a4d8c2SCharles.Forsyth {
449*74a4d8c2SCharles.Forsyth 	Keyword *kp;
450*74a4d8c2SCharles.Forsyth 	int c, n;
451*74a4d8c2SCharles.Forsyth 
452*74a4d8c2SCharles.Forsyth 	n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
453*74a4d8c2SCharles.Forsyth 	kp = keywords + n;
454*74a4d8c2SCharles.Forsyth 	if (n != -1) {	/* found in table */
455*74a4d8c2SCharles.Forsyth 		yylval.i = kp->sub;
456*74a4d8c2SCharles.Forsyth 		switch (kp->type) {	/* special handling */
457*74a4d8c2SCharles.Forsyth 		case FSYSTEM:
458*74a4d8c2SCharles.Forsyth 			if (safe)
459*74a4d8c2SCharles.Forsyth 				SYNTAX( "system is unsafe" );
460*74a4d8c2SCharles.Forsyth 			RET(kp->type);
461*74a4d8c2SCharles.Forsyth 		case FUNC:
462*74a4d8c2SCharles.Forsyth 			if (infunc)
463*74a4d8c2SCharles.Forsyth 				SYNTAX( "illegal nested function" );
464*74a4d8c2SCharles.Forsyth 			RET(kp->type);
465*74a4d8c2SCharles.Forsyth 		case RETURN:
466*74a4d8c2SCharles.Forsyth 			if (!infunc)
467*74a4d8c2SCharles.Forsyth 				SYNTAX( "return not in function" );
468*74a4d8c2SCharles.Forsyth 			RET(kp->type);
469*74a4d8c2SCharles.Forsyth 		case VARNF:
470*74a4d8c2SCharles.Forsyth 			yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
471*74a4d8c2SCharles.Forsyth 			RET(VARNF);
472*74a4d8c2SCharles.Forsyth 		default:
473*74a4d8c2SCharles.Forsyth 			RET(kp->type);
474*74a4d8c2SCharles.Forsyth 		}
475*74a4d8c2SCharles.Forsyth 	}
476*74a4d8c2SCharles.Forsyth 	c = peek();	/* look for '(' */
477*74a4d8c2SCharles.Forsyth 	if (c != '(' && infunc && (n=isarg(w)) >= 0) {
478*74a4d8c2SCharles.Forsyth 		yylval.i = n;
479*74a4d8c2SCharles.Forsyth 		RET(ARG);
480*74a4d8c2SCharles.Forsyth 	} else {
481*74a4d8c2SCharles.Forsyth 		yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
482*74a4d8c2SCharles.Forsyth 		if (c == '(') {
483*74a4d8c2SCharles.Forsyth 			RET(CALL);
484*74a4d8c2SCharles.Forsyth 		} else {
485*74a4d8c2SCharles.Forsyth 			RET(VAR);
486*74a4d8c2SCharles.Forsyth 		}
487*74a4d8c2SCharles.Forsyth 	}
488*74a4d8c2SCharles.Forsyth }
489*74a4d8c2SCharles.Forsyth 
startreg(void)490*74a4d8c2SCharles.Forsyth void startreg(void)	/* next call to yyles will return a regular expression */
491*74a4d8c2SCharles.Forsyth {
492*74a4d8c2SCharles.Forsyth 	reg = 1;
493*74a4d8c2SCharles.Forsyth }
494*74a4d8c2SCharles.Forsyth 
regexpr(void)495*74a4d8c2SCharles.Forsyth int regexpr(void)
496*74a4d8c2SCharles.Forsyth {
497*74a4d8c2SCharles.Forsyth 	int c;
498*74a4d8c2SCharles.Forsyth 	static char *buf = 0;
499*74a4d8c2SCharles.Forsyth 	static int bufsz = 500;
500*74a4d8c2SCharles.Forsyth 	char *bp;
501*74a4d8c2SCharles.Forsyth 
502*74a4d8c2SCharles.Forsyth 	if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
503*74a4d8c2SCharles.Forsyth 		FATAL("out of space for rex expr");
504*74a4d8c2SCharles.Forsyth 	bp = buf;
505*74a4d8c2SCharles.Forsyth 	for ( ; (c = input()) != '/' && c != 0; ) {
506*74a4d8c2SCharles.Forsyth 		if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, 0))
507*74a4d8c2SCharles.Forsyth 			FATAL("out of space for reg expr %.10s...", buf);
508*74a4d8c2SCharles.Forsyth 		if (c == '\n') {
509*74a4d8c2SCharles.Forsyth 			SYNTAX( "newline in regular expression %.10s...", buf );
510*74a4d8c2SCharles.Forsyth 			unput('\n');
511*74a4d8c2SCharles.Forsyth 			break;
512*74a4d8c2SCharles.Forsyth 		} else if (c == '\\') {
513*74a4d8c2SCharles.Forsyth 			*bp++ = '\\';
514*74a4d8c2SCharles.Forsyth 			*bp++ = input();
515*74a4d8c2SCharles.Forsyth 		} else {
516*74a4d8c2SCharles.Forsyth 			*bp++ = c;
517*74a4d8c2SCharles.Forsyth 		}
518*74a4d8c2SCharles.Forsyth 	}
519*74a4d8c2SCharles.Forsyth 	*bp = 0;
520*74a4d8c2SCharles.Forsyth 	yylval.s = tostring(buf);
521*74a4d8c2SCharles.Forsyth 	unput('/');
522*74a4d8c2SCharles.Forsyth 	RET(REGEXPR);
523*74a4d8c2SCharles.Forsyth }
524*74a4d8c2SCharles.Forsyth 
525*74a4d8c2SCharles.Forsyth /* low-level lexical stuff, sort of inherited from lex */
526*74a4d8c2SCharles.Forsyth 
527*74a4d8c2SCharles.Forsyth char	ebuf[300];
528*74a4d8c2SCharles.Forsyth char	*ep = ebuf;
529*74a4d8c2SCharles.Forsyth char	yysbuf[100];	/* pushback buffer */
530*74a4d8c2SCharles.Forsyth char	*yysptr = yysbuf;
531*74a4d8c2SCharles.Forsyth FILE	*yyin = 0;
532*74a4d8c2SCharles.Forsyth 
input(void)533*74a4d8c2SCharles.Forsyth int input(void)	/* get next lexical input character */
534*74a4d8c2SCharles.Forsyth {
535*74a4d8c2SCharles.Forsyth 	int c;
536*74a4d8c2SCharles.Forsyth 	extern char *lexprog;
537*74a4d8c2SCharles.Forsyth 
538*74a4d8c2SCharles.Forsyth 	if (yysptr > yysbuf)
539*74a4d8c2SCharles.Forsyth 		c = *--yysptr;
540*74a4d8c2SCharles.Forsyth 	else if (lexprog != NULL) {	/* awk '...' */
541*74a4d8c2SCharles.Forsyth 		if ((c = *lexprog) != 0)
542*74a4d8c2SCharles.Forsyth 			lexprog++;
543*74a4d8c2SCharles.Forsyth 	} else				/* awk -f ... */
544*74a4d8c2SCharles.Forsyth 		c = pgetc();
545*74a4d8c2SCharles.Forsyth 	if (c == '\n')
546*74a4d8c2SCharles.Forsyth 		lineno++;
547*74a4d8c2SCharles.Forsyth 	else if (c == EOF)
548*74a4d8c2SCharles.Forsyth 		c = 0;
549*74a4d8c2SCharles.Forsyth 	if (ep >= ebuf + sizeof ebuf)
550*74a4d8c2SCharles.Forsyth 		ep = ebuf;
551*74a4d8c2SCharles.Forsyth 	return *ep++ = c;
552*74a4d8c2SCharles.Forsyth }
553*74a4d8c2SCharles.Forsyth 
unput(int c)554*74a4d8c2SCharles.Forsyth void unput(int c)	/* put lexical character back on input */
555*74a4d8c2SCharles.Forsyth {
556*74a4d8c2SCharles.Forsyth 	if (c == '\n')
557*74a4d8c2SCharles.Forsyth 		lineno--;
558*74a4d8c2SCharles.Forsyth 	if (yysptr >= yysbuf + sizeof(yysbuf))
559*74a4d8c2SCharles.Forsyth 		FATAL("pushed back too much: %.20s...", yysbuf);
560*74a4d8c2SCharles.Forsyth 	*yysptr++ = c;
561*74a4d8c2SCharles.Forsyth 	if (--ep < ebuf)
562*74a4d8c2SCharles.Forsyth 		ep = ebuf + sizeof(ebuf) - 1;
563*74a4d8c2SCharles.Forsyth }
564*74a4d8c2SCharles.Forsyth 
unputstr(char * s)565*74a4d8c2SCharles.Forsyth void unputstr(char *s)	/* put a string back on input */
566*74a4d8c2SCharles.Forsyth {
567*74a4d8c2SCharles.Forsyth 	int i;
568*74a4d8c2SCharles.Forsyth 
569*74a4d8c2SCharles.Forsyth 	for (i = strlen(s)-1; i >= 0; i--)
570*74a4d8c2SCharles.Forsyth 		unput(s[i]);
571*74a4d8c2SCharles.Forsyth }
572