1*65389Sbostic %Start A str sc reg comment
2*65389Sbostic 
3*65389Sbostic %{
4*65389Sbostic /****************************************************************
5*65389Sbostic Copyright (C) AT&T 1993
6*65389Sbostic All Rights Reserved
7*65389Sbostic 
8*65389Sbostic Permission to use, copy, modify, and distribute this software and
9*65389Sbostic its documentation for any purpose and without fee is hereby
10*65389Sbostic granted, provided that the above copyright notice appear in all
11*65389Sbostic copies and that both that the copyright notice and this
12*65389Sbostic permission notice and warranty disclaimer appear in supporting
13*65389Sbostic documentation, and that the name of AT&T or any of its entities
14*65389Sbostic not be used in advertising or publicity pertaining to
15*65389Sbostic distribution of the software without specific, written prior
16*65389Sbostic permission.
17*65389Sbostic 
18*65389Sbostic AT&T DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
19*65389Sbostic INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
20*65389Sbostic IN NO EVENT SHALL AT&T OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
21*65389Sbostic SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
22*65389Sbostic WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
23*65389Sbostic IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
24*65389Sbostic ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
25*65389Sbostic THIS SOFTWARE.
26*65389Sbostic ****************************************************************/
27*65389Sbostic 
28*65389Sbostic /* some of this depends on behavior of lex that
29*65389Sbostic    may not be preserved in other implementations of lex.
30*65389Sbostic */
31*65389Sbostic 
32*65389Sbostic #undef	input	/* defeat lex */
33*65389Sbostic #undef	unput
34*65389Sbostic 
35*65389Sbostic #include <stdlib.h>
36*65389Sbostic #include <string.h>
37*65389Sbostic #include "awk.h"
38*65389Sbostic #include "y.tab.h"
39*65389Sbostic 
40*65389Sbostic extern YYSTYPE	yylval;
41*65389Sbostic extern int	infunc;
42*65389Sbostic 
43*65389Sbostic int	lineno	= 1;
44*65389Sbostic int	bracecnt = 0;
45*65389Sbostic int	brackcnt  = 0;
46*65389Sbostic int	parencnt = 0;
47*65389Sbostic #define DEBUG
48*65389Sbostic #ifdef	DEBUG
49*65389Sbostic #	define	RET(x)	{if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); }
50*65389Sbostic #else
51*65389Sbostic #	define	RET(x)	return(x)
52*65389Sbostic #endif
53*65389Sbostic 
54*65389Sbostic #define	CADD	cbuf[clen++] = yytext[0]; \
55*65389Sbostic 		if (clen >= CBUFLEN-1) { \
56*65389Sbostic 			ERROR "string/reg expr %.30s... too long", cbuf SYNTAX; \
57*65389Sbostic 			BEGIN A; \
58*65389Sbostic 		}
59*65389Sbostic 
60*65389Sbostic uchar	cbuf[CBUFLEN];
61*65389Sbostic uchar	*s;
62*65389Sbostic int	clen, cflag;
63*65389Sbostic %}
64*65389Sbostic 
65*65389Sbostic A	[a-zA-Z_]
66*65389Sbostic B	[a-zA-Z0-9_]
67*65389Sbostic D	[0-9]
68*65389Sbostic O	[0-7]
69*65389Sbostic H	[0-9a-fA-F]
70*65389Sbostic WS	[ \t]
71*65389Sbostic 
72*65389Sbostic %%
73*65389Sbostic 	switch (yybgin-yysvec-1) {	/* witchcraft */
74*65389Sbostic 	case 0:
75*65389Sbostic 		BEGIN A;
76*65389Sbostic 		break;
77*65389Sbostic 	case sc:
78*65389Sbostic 		BEGIN A;
79*65389Sbostic 		RET('}');
80*65389Sbostic 	}
81*65389Sbostic 
82*65389Sbostic <A>\n		{ lineno++; RET(NL); }
83*65389Sbostic <A>#.*		{ ; }	/* strip comments */
84*65389Sbostic <A>{WS}+	{ ; }
85*65389Sbostic <A>;		{ RET(';'); }
86*65389Sbostic 
87*65389Sbostic <A>"\\"\n	{ lineno++; }
88*65389Sbostic <A>BEGIN	{ RET(XBEGIN); }
89*65389Sbostic <A>END		{ RET(XEND); }
90*65389Sbostic <A>func(tion)?	{ if (infunc) ERROR "illegal nested function" SYNTAX; RET(FUNC); }
91*65389Sbostic <A>return	{ if (!infunc) ERROR "return not in function" SYNTAX; RET(RETURN); }
92*65389Sbostic <A>"&&"		{ RET(AND); }
93*65389Sbostic <A>"||"		{ RET(BOR); }
94*65389Sbostic <A>"!"		{ RET(NOT); }
95*65389Sbostic <A>"!="		{ yylval.i = NE; RET(NE); }
96*65389Sbostic <A>"~"		{ yylval.i = MATCH; RET(MATCHOP); }
97*65389Sbostic <A>"!~"		{ yylval.i = NOTMATCH; RET(MATCHOP); }
98*65389Sbostic <A>"<"		{ yylval.i = LT; RET(LT); }
99*65389Sbostic <A>"<="		{ yylval.i = LE; RET(LE); }
100*65389Sbostic <A>"=="		{ yylval.i = EQ; RET(EQ); }
101*65389Sbostic <A>">="		{ yylval.i = GE; RET(GE); }
102*65389Sbostic <A>">"		{ yylval.i = GT; RET(GT); }
103*65389Sbostic <A>">>"		{ yylval.i = APPEND; RET(APPEND); }
104*65389Sbostic <A>"++"		{ yylval.i = INCR; RET(INCR); }
105*65389Sbostic <A>"--"		{ yylval.i = DECR; RET(DECR); }
106*65389Sbostic <A>"+="		{ yylval.i = ADDEQ; RET(ASGNOP); }
107*65389Sbostic <A>"-="		{ yylval.i = SUBEQ; RET(ASGNOP); }
108*65389Sbostic <A>"*="		{ yylval.i = MULTEQ; RET(ASGNOP); }
109*65389Sbostic <A>"/="		{ yylval.i = DIVEQ; RET(ASGNOP); }
110*65389Sbostic <A>"%="		{ yylval.i = MODEQ; RET(ASGNOP); }
111*65389Sbostic <A>"^="		{ yylval.i = POWEQ; RET(ASGNOP); }
112*65389Sbostic <A>"**="	{ yylval.i = POWEQ; RET(ASGNOP); }
113*65389Sbostic <A>"="		{ yylval.i = ASSIGN; RET(ASGNOP); }
114*65389Sbostic <A>"**"		{ RET(POWER); }
115*65389Sbostic <A>"^"		{ RET(POWER); }
116*65389Sbostic 
117*65389Sbostic <A>"$"{D}+	{ yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); }
118*65389Sbostic <A>"$NF"	{ unputstr("(NF)"); return(INDIRECT); }
119*65389Sbostic <A>"$"{A}{B}*	{ int c, n;
120*65389Sbostic 		  c = input(); unput(c);
121*65389Sbostic 		  if (c == '(' || c == '[' || infunc && (n=isarg(yytext+1)) >= 0) {
122*65389Sbostic 			unputstr(yytext+1);
123*65389Sbostic 			return(INDIRECT);
124*65389Sbostic 		  } else {
125*65389Sbostic 			yylval.cp = setsymtab(yytext+1,"",0.0,STR|NUM,symtab);
126*65389Sbostic 			RET(IVAR);
127*65389Sbostic 		  }
128*65389Sbostic 		}
129*65389Sbostic <A>"$"		{ RET(INDIRECT); }
130*65389Sbostic <A>NF		{ yylval.cp = setsymtab(yytext, "", 0.0, NUM, symtab); RET(VARNF); }
131*65389Sbostic 
132*65389Sbostic <A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)?	{
133*65389Sbostic 		  yylval.cp = setsymtab(yytext, tostring(yytext), atof(yytext), CON|NUM, symtab);
134*65389Sbostic 		/* should this also have STR set? */
135*65389Sbostic 		  RET(NUMBER); }
136*65389Sbostic 
137*65389Sbostic <A>while	{ RET(WHILE); }
138*65389Sbostic <A>for		{ RET(FOR); }
139*65389Sbostic <A>do		{ RET(DO); }
140*65389Sbostic <A>if		{ RET(IF); }
141*65389Sbostic <A>else		{ RET(ELSE); }
142*65389Sbostic <A>next		{ RET(NEXT); }
143*65389Sbostic <A>exit		{ RET(EXIT); }
144*65389Sbostic <A>break	{ RET(BREAK); }
145*65389Sbostic <A>continue	{ RET(CONTINUE); }
146*65389Sbostic <A>print	{ yylval.i = PRINT; RET(PRINT); }
147*65389Sbostic <A>printf	{ yylval.i = PRINTF; RET(PRINTF); }
148*65389Sbostic <A>sprintf	{ yylval.i = SPRINTF; RET(SPRINTF); }
149*65389Sbostic <A>split	{ yylval.i = SPLIT; RET(SPLIT); }
150*65389Sbostic <A>substr	{ RET(SUBSTR); }
151*65389Sbostic <A>sub		{ yylval.i = SUB; RET(SUB); }
152*65389Sbostic <A>gsub		{ yylval.i = GSUB; RET(GSUB); }
153*65389Sbostic <A>index	{ RET(INDEX); }
154*65389Sbostic <A>match	{ RET(MATCHFCN); }
155*65389Sbostic <A>in		{ RET(IN); }
156*65389Sbostic <A>getline	{ RET(GETLINE); }
157*65389Sbostic <A>close	{ RET(CLOSE); }
158*65389Sbostic <A>delete	{ RET(DELETE); }
159*65389Sbostic <A>length	{ yylval.i = FLENGTH; RET(BLTIN); }
160*65389Sbostic <A>log		{ yylval.i = FLOG; RET(BLTIN); }
161*65389Sbostic <A>int		{ yylval.i = FINT; RET(BLTIN); }
162*65389Sbostic <A>exp		{ yylval.i = FEXP; RET(BLTIN); }
163*65389Sbostic <A>sqrt		{ yylval.i = FSQRT; RET(BLTIN); }
164*65389Sbostic <A>sin		{ yylval.i = FSIN; RET(BLTIN); }
165*65389Sbostic <A>cos		{ yylval.i = FCOS; RET(BLTIN); }
166*65389Sbostic <A>atan2	{ yylval.i = FATAN; RET(BLTIN); }
167*65389Sbostic <A>system	{ yylval.i = FSYSTEM; RET(BLTIN); }
168*65389Sbostic <A>rand		{ yylval.i = FRAND; RET(BLTIN); }
169*65389Sbostic <A>srand	{ yylval.i = FSRAND; RET(BLTIN); }
170*65389Sbostic <A>toupper	{ yylval.i = FTOUPPER; RET(BLTIN); }
171*65389Sbostic <A>tolower	{ yylval.i = FTOLOWER; RET(BLTIN); }
172*65389Sbostic <A>fflush	{ yylval.i = FFLUSH; RET(BLTIN); }
173*65389Sbostic 
174*65389Sbostic <A>{A}{B}*	{ int n, c;
175*65389Sbostic 		  c = input(); unput(c);	/* look for '(' */
176*65389Sbostic 		  if (c != '(' && infunc && (n=isarg(yytext)) >= 0) {
177*65389Sbostic 			yylval.i = n;
178*65389Sbostic 			RET(ARG);
179*65389Sbostic 		  } else {
180*65389Sbostic 			yylval.cp = setsymtab(yytext,"",0.0,STR|NUM,symtab);
181*65389Sbostic 			if (c == '(') {
182*65389Sbostic 				RET(CALL);
183*65389Sbostic 			} else {
184*65389Sbostic 				RET(VAR);
185*65389Sbostic 			}
186*65389Sbostic 		  }
187*65389Sbostic 		}
188*65389Sbostic <A>\"		{ BEGIN str; clen = 0; }
189*65389Sbostic 
190*65389Sbostic <A>"}"		{ if (--bracecnt < 0) ERROR "extra }" SYNTAX; BEGIN sc; RET(';'); }
191*65389Sbostic <A>"]"		{ if (--brackcnt < 0) ERROR "extra ]" SYNTAX; RET(']'); }
192*65389Sbostic <A>")"		{ if (--parencnt < 0) ERROR "extra )" SYNTAX; RET(')'); }
193*65389Sbostic 
194*65389Sbostic <A>.		{ if (yytext[0] == '{') bracecnt++;
195*65389Sbostic 		  else if (yytext[0] == '[') brackcnt++;
196*65389Sbostic 		  else if (yytext[0] == '(') parencnt++;
197*65389Sbostic 		  RET(yylval.i = yytext[0]); /* everything else */ }
198*65389Sbostic 
199*65389Sbostic <reg>\\.	{ cbuf[clen++] = '\\'; cbuf[clen++] = yytext[1]; }
200*65389Sbostic <reg>\n		{ ERROR "newline in regular expression %.10s...", cbuf SYNTAX; lineno++; BEGIN A; }
201*65389Sbostic <reg>"/"	{ BEGIN A;
202*65389Sbostic 		  cbuf[clen] = 0;
203*65389Sbostic 		  yylval.s = tostring(cbuf);
204*65389Sbostic 		  unput('/');
205*65389Sbostic 		  RET(REGEXPR); }
206*65389Sbostic <reg>.		{ CADD; }
207*65389Sbostic 
208*65389Sbostic <str>\"		{ BEGIN A;
209*65389Sbostic 		  cbuf[clen] = 0; s = tostring(cbuf);
210*65389Sbostic 		  cbuf[clen] = ' '; cbuf[++clen] = 0;
211*65389Sbostic 		  yylval.cp = setsymtab(cbuf, s, 0.0, CON|STR, symtab);
212*65389Sbostic 		  RET(STRING); }
213*65389Sbostic <str>\n		{ ERROR "newline in string %.10s...", cbuf SYNTAX; lineno++; BEGIN A; }
214*65389Sbostic <str>"\\\""	{ cbuf[clen++] = '"'; }
215*65389Sbostic <str>"\\"n	{ cbuf[clen++] = '\n'; }
216*65389Sbostic <str>"\\"t	{ cbuf[clen++] = '\t'; }
217*65389Sbostic <str>"\\"f	{ cbuf[clen++] = '\f'; }
218*65389Sbostic <str>"\\"r	{ cbuf[clen++] = '\r'; }
219*65389Sbostic <str>"\\"b	{ cbuf[clen++] = '\b'; }
220*65389Sbostic <str>"\\"v	{ cbuf[clen++] = '\v'; }	/* these ANSIisms may not be known by */
221*65389Sbostic <str>"\\"a	{ cbuf[clen++] = '\007'; }	/* your compiler. hence 007 for bell */
222*65389Sbostic <str>"\\\\"	{ cbuf[clen++] = '\\'; }
223*65389Sbostic <str>"\\"({O}{O}{O}|{O}{O}|{O}) { int n;
224*65389Sbostic 		  sscanf(yytext+1, "%o", &n); cbuf[clen++] = n; }
225*65389Sbostic <str>"\\"x({H}+) { int n;	/* ANSI permits any number! */
226*65389Sbostic 		  sscanf(yytext+2, "%x", &n); cbuf[clen++] = n; }
227*65389Sbostic <str>"\\".	{ cbuf[clen++] = yytext[1]; }
228*65389Sbostic <str>.		{ CADD; }
229*65389Sbostic 
230*65389Sbostic %%
231*65389Sbostic 
232*65389Sbostic void startreg(void)	/* start parsing a regular expression */
233*65389Sbostic {
234*65389Sbostic 	BEGIN reg;
235*65389Sbostic 	clen = 0;
236*65389Sbostic }
237*65389Sbostic 
238*65389Sbostic /* input() and unput() are transcriptions of the standard lex
239*65389Sbostic    macros for input and output with additions for error message
240*65389Sbostic    printing.  God help us all if someone changes how lex works.
241*65389Sbostic */
242*65389Sbostic 
243*65389Sbostic uchar	ebuf[300];
244*65389Sbostic uchar	*ep = ebuf;
245*65389Sbostic 
246*65389Sbostic input(void)	/* get next lexical input character */
247*65389Sbostic {
248*65389Sbostic 	register int c;
249*65389Sbostic 	extern uchar *lexprog;
250*65389Sbostic 
251*65389Sbostic 	if (yysptr > yysbuf)
252*65389Sbostic 		c = U(*--yysptr);
253*65389Sbostic 	else if (lexprog != NULL) {	/* awk '...' */
254*65389Sbostic 		if (c = *lexprog)
255*65389Sbostic 			lexprog++;
256*65389Sbostic 	} else				/* awk -f ... */
257*65389Sbostic 		c = pgetc();
258*65389Sbostic 	if (c == '\n')
259*65389Sbostic 		yylineno++;
260*65389Sbostic 	else if (c == EOF)
261*65389Sbostic 		c = 0;
262*65389Sbostic 	if (ep >= ebuf + sizeof ebuf)
263*65389Sbostic 		ep = ebuf;
264*65389Sbostic 	return *ep++ = c;
265*65389Sbostic }
266*65389Sbostic 
267*65389Sbostic void unput(int c)	/* put lexical character back on input */
268*65389Sbostic {
269*65389Sbostic 	yytchar = c;
270*65389Sbostic 	if (yytchar == '\n')
271*65389Sbostic 		yylineno--;
272*65389Sbostic 	*yysptr++ = yytchar;
273*65389Sbostic 	if (--ep < ebuf)
274*65389Sbostic 		ep = ebuf + sizeof(ebuf) - 1;
275*65389Sbostic }
276*65389Sbostic 
277*65389Sbostic 
278*65389Sbostic void unputstr(char *s)	/* put a string back on input */
279*65389Sbostic {
280*65389Sbostic 	int i;
281*65389Sbostic 
282*65389Sbostic 	for (i = strlen(s)-1; i >= 0; i--)
283*65389Sbostic 		unput(s[i]);
284*65389Sbostic }
285