165389Sbostic %Start A str sc reg comment
265389Sbostic 
365389Sbostic %{
465389Sbostic /****************************************************************
565389Sbostic Copyright (C) AT&T 1993
665389Sbostic All Rights Reserved
765389Sbostic 
865389Sbostic Permission to use, copy, modify, and distribute this software and
965389Sbostic its documentation for any purpose and without fee is hereby
1065389Sbostic granted, provided that the above copyright notice appear in all
1165389Sbostic copies and that both that the copyright notice and this
1265389Sbostic permission notice and warranty disclaimer appear in supporting
1365389Sbostic documentation, and that the name of AT&T or any of its entities
1465389Sbostic not be used in advertising or publicity pertaining to
1565389Sbostic distribution of the software without specific, written prior
1665389Sbostic permission.
1765389Sbostic 
1865389Sbostic AT&T DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
1965389Sbostic INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
2065389Sbostic IN NO EVENT SHALL AT&T OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
2165389Sbostic SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
2265389Sbostic WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
2365389Sbostic IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
2465389Sbostic ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
2565389Sbostic THIS SOFTWARE.
2665389Sbostic ****************************************************************/
2765389Sbostic 
2865389Sbostic #include <stdlib.h>
2965389Sbostic #include <string.h>
3065389Sbostic #include "awk.h"
3165389Sbostic #include "y.tab.h"
3265389Sbostic 
3365389Sbostic extern YYSTYPE	yylval;
3465389Sbostic extern int	infunc;
3565389Sbostic 
3665389Sbostic int	lineno	= 1;
3765389Sbostic int	bracecnt = 0;
3865389Sbostic int	brackcnt  = 0;
3965389Sbostic int	parencnt = 0;
4065389Sbostic #define DEBUG
4165389Sbostic #ifdef	DEBUG
4265389Sbostic #	define	RET(x)	{if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); }
4365389Sbostic #else
4465389Sbostic #	define	RET(x)	return(x)
4565389Sbostic #endif
4665389Sbostic 
4765389Sbostic #define	CADD	cbuf[clen++] = yytext[0]; \
4865389Sbostic 		if (clen >= CBUFLEN-1) { \
4965389Sbostic 			ERROR "string/reg expr %.30s... too long", cbuf SYNTAX; \
5065389Sbostic 			BEGIN A; \
5165389Sbostic 		}
5265389Sbostic 
5365389Sbostic uchar	cbuf[CBUFLEN];
5465389Sbostic uchar	*s;
5565389Sbostic int	clen, cflag;
56*65396Sbostic 
57*65396Sbostic /* some of this depends on behavior of lex that
58*65396Sbostic    may not be preserved in other implementations of lex.
59*65396Sbostic */
60*65396Sbostic 
61*65396Sbostic static	int	my_input( YY_CHAR *buf, int max_size );
62*65396Sbostic 
63*65396Sbostic #undef YY_INPUT
64*65396Sbostic #define YY_INPUT(buf,result,max_size) result = my_input(buf, max_size);
65*65396Sbostic 
66*65396Sbostic #undef YY_USER_INIT
67*65396Sbostic #define YY_USER_INIT init_input_source();
6865389Sbostic %}
6965389Sbostic 
7065389Sbostic A	[a-zA-Z_]
7165389Sbostic B	[a-zA-Z0-9_]
7265389Sbostic D	[0-9]
7365389Sbostic O	[0-7]
7465389Sbostic H	[0-9a-fA-F]
7565389Sbostic WS	[ \t]
7665389Sbostic 
7765389Sbostic %%
78*65396Sbostic 	switch ((yy_start - 1) / 2) {	/* witchcraft */
7965389Sbostic 	case 0:
8065389Sbostic 		BEGIN A;
8165389Sbostic 		break;
8265389Sbostic 	case sc:
8365389Sbostic 		BEGIN A;
8465389Sbostic 		RET('}');
8565389Sbostic 	}
8665389Sbostic 
8765389Sbostic <A>\n		{ lineno++; RET(NL); }
8865389Sbostic <A>#.*		{ ; }	/* strip comments */
8965389Sbostic <A>{WS}+	{ ; }
9065389Sbostic <A>;		{ RET(';'); }
9165389Sbostic 
9265389Sbostic <A>"\\"\n	{ lineno++; }
9365389Sbostic <A>BEGIN	{ RET(XBEGIN); }
9465389Sbostic <A>END		{ RET(XEND); }
9565389Sbostic <A>func(tion)?	{ if (infunc) ERROR "illegal nested function" SYNTAX; RET(FUNC); }
9665389Sbostic <A>return	{ if (!infunc) ERROR "return not in function" SYNTAX; RET(RETURN); }
9765389Sbostic <A>"&&"		{ RET(AND); }
9865389Sbostic <A>"||"		{ RET(BOR); }
9965389Sbostic <A>"!"		{ RET(NOT); }
10065389Sbostic <A>"!="		{ yylval.i = NE; RET(NE); }
10165389Sbostic <A>"~"		{ yylval.i = MATCH; RET(MATCHOP); }
10265389Sbostic <A>"!~"		{ yylval.i = NOTMATCH; RET(MATCHOP); }
10365389Sbostic <A>"<"		{ yylval.i = LT; RET(LT); }
10465389Sbostic <A>"<="		{ yylval.i = LE; RET(LE); }
10565389Sbostic <A>"=="		{ yylval.i = EQ; RET(EQ); }
10665389Sbostic <A>">="		{ yylval.i = GE; RET(GE); }
10765389Sbostic <A>">"		{ yylval.i = GT; RET(GT); }
10865389Sbostic <A>">>"		{ yylval.i = APPEND; RET(APPEND); }
10965389Sbostic <A>"++"		{ yylval.i = INCR; RET(INCR); }
11065389Sbostic <A>"--"		{ yylval.i = DECR; RET(DECR); }
11165389Sbostic <A>"+="		{ yylval.i = ADDEQ; RET(ASGNOP); }
11265389Sbostic <A>"-="		{ yylval.i = SUBEQ; RET(ASGNOP); }
11365389Sbostic <A>"*="		{ yylval.i = MULTEQ; RET(ASGNOP); }
11465389Sbostic <A>"/="		{ yylval.i = DIVEQ; RET(ASGNOP); }
11565389Sbostic <A>"%="		{ yylval.i = MODEQ; RET(ASGNOP); }
11665389Sbostic <A>"^="		{ yylval.i = POWEQ; RET(ASGNOP); }
11765389Sbostic <A>"**="	{ yylval.i = POWEQ; RET(ASGNOP); }
11865389Sbostic <A>"="		{ yylval.i = ASSIGN; RET(ASGNOP); }
11965389Sbostic <A>"**"		{ RET(POWER); }
12065389Sbostic <A>"^"		{ RET(POWER); }
12165389Sbostic 
12265389Sbostic <A>"$"{D}+	{ yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); }
12365389Sbostic <A>"$NF"	{ unputstr("(NF)"); return(INDIRECT); }
124*65396Sbostic <A>"$"{A}{B}*	{
125*65396Sbostic 		  int c;
126*65396Sbostic 		  char *yytext_copy = strdup(yytext);
127*65396Sbostic 		  c = input(); unput(c);	/* look for '(' or '[' */
128*65396Sbostic 		  if (c == '(' || c == '[' ||
129*65396Sbostic 		      infunc && isarg(yytext_copy+1) >= 0) {
130*65396Sbostic 			  unputstr(yytext_copy+1);
131*65396Sbostic 			  free(yytext_copy);
13265389Sbostic 			return(INDIRECT);
13365389Sbostic 		  } else {
134*65396Sbostic 			  yylval.cp =
135*65396Sbostic 				setsymtab(yytext_copy+1,"",0.0,STR|NUM,symtab);
136*65396Sbostic 			  free(yytext_copy);
13765389Sbostic 			RET(IVAR);
13865389Sbostic 		  }
13965389Sbostic 		}
14065389Sbostic <A>"$"		{ RET(INDIRECT); }
14165389Sbostic <A>NF		{ yylval.cp = setsymtab(yytext, "", 0.0, NUM, symtab); RET(VARNF); }
14265389Sbostic 
14365389Sbostic <A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)?	{
14465389Sbostic 		  yylval.cp = setsymtab(yytext, tostring(yytext), atof(yytext), CON|NUM, symtab);
14565389Sbostic 		/* should this also have STR set? */
14665389Sbostic 		  RET(NUMBER); }
14765389Sbostic 
14865389Sbostic <A>while	{ RET(WHILE); }
14965389Sbostic <A>for		{ RET(FOR); }
15065389Sbostic <A>do		{ RET(DO); }
15165389Sbostic <A>if		{ RET(IF); }
15265389Sbostic <A>else		{ RET(ELSE); }
15365389Sbostic <A>next		{ RET(NEXT); }
15465389Sbostic <A>exit		{ RET(EXIT); }
15565389Sbostic <A>break	{ RET(BREAK); }
15665389Sbostic <A>continue	{ RET(CONTINUE); }
15765389Sbostic <A>print	{ yylval.i = PRINT; RET(PRINT); }
15865389Sbostic <A>printf	{ yylval.i = PRINTF; RET(PRINTF); }
15965389Sbostic <A>sprintf	{ yylval.i = SPRINTF; RET(SPRINTF); }
16065389Sbostic <A>split	{ yylval.i = SPLIT; RET(SPLIT); }
16165389Sbostic <A>substr	{ RET(SUBSTR); }
16265389Sbostic <A>sub		{ yylval.i = SUB; RET(SUB); }
16365389Sbostic <A>gsub		{ yylval.i = GSUB; RET(GSUB); }
16465389Sbostic <A>index	{ RET(INDEX); }
16565389Sbostic <A>match	{ RET(MATCHFCN); }
16665389Sbostic <A>in		{ RET(IN); }
16765389Sbostic <A>getline	{ RET(GETLINE); }
16865389Sbostic <A>close	{ RET(CLOSE); }
16965389Sbostic <A>delete	{ RET(DELETE); }
17065389Sbostic <A>length	{ yylval.i = FLENGTH; RET(BLTIN); }
17165389Sbostic <A>log		{ yylval.i = FLOG; RET(BLTIN); }
17265389Sbostic <A>int		{ yylval.i = FINT; RET(BLTIN); }
17365389Sbostic <A>exp		{ yylval.i = FEXP; RET(BLTIN); }
17465389Sbostic <A>sqrt		{ yylval.i = FSQRT; RET(BLTIN); }
17565389Sbostic <A>sin		{ yylval.i = FSIN; RET(BLTIN); }
17665389Sbostic <A>cos		{ yylval.i = FCOS; RET(BLTIN); }
17765389Sbostic <A>atan2	{ yylval.i = FATAN; RET(BLTIN); }
17865389Sbostic <A>system	{ yylval.i = FSYSTEM; RET(BLTIN); }
17965389Sbostic <A>rand		{ yylval.i = FRAND; RET(BLTIN); }
18065389Sbostic <A>srand	{ yylval.i = FSRAND; RET(BLTIN); }
18165389Sbostic <A>toupper	{ yylval.i = FTOUPPER; RET(BLTIN); }
18265389Sbostic <A>tolower	{ yylval.i = FTOLOWER; RET(BLTIN); }
18365389Sbostic <A>fflush	{ yylval.i = FFLUSH; RET(BLTIN); }
18465389Sbostic 
18565389Sbostic <A>{A}{B}*	{ int n, c;
186*65396Sbostic 		  char *yytext_copy = strdup(yytext);
18765389Sbostic 		  c = input(); unput(c);	/* look for '(' */
188*65396Sbostic 		  if (c != '(' && infunc && (n=isarg(yytext_copy)) >= 0) {
18965389Sbostic 			yylval.i = n;
190*65396Sbostic 			free(yytext_copy);
19165389Sbostic 			RET(ARG);
19265389Sbostic 		  } else {
193*65396Sbostic 			yylval.cp =
194*65396Sbostic 				setsymtab(yytext_copy,"",0.0,STR|NUM,symtab);
195*65396Sbostic 			free(yytext_copy);
19665389Sbostic 			if (c == '(') {
19765389Sbostic 				RET(CALL);
19865389Sbostic 			} else {
19965389Sbostic 				RET(VAR);
20065389Sbostic 			}
20165389Sbostic 		  }
20265389Sbostic 		}
203*65396Sbostic 
20465389Sbostic <A>\"		{ BEGIN str; clen = 0; }
20565389Sbostic 
20665389Sbostic <A>"}"		{ if (--bracecnt < 0) ERROR "extra }" SYNTAX; BEGIN sc; RET(';'); }
20765389Sbostic <A>"]"		{ if (--brackcnt < 0) ERROR "extra ]" SYNTAX; RET(']'); }
20865389Sbostic <A>")"		{ if (--parencnt < 0) ERROR "extra )" SYNTAX; RET(')'); }
20965389Sbostic 
21065389Sbostic <A>.		{ if (yytext[0] == '{') bracecnt++;
21165389Sbostic 		  else if (yytext[0] == '[') brackcnt++;
21265389Sbostic 		  else if (yytext[0] == '(') parencnt++;
21365389Sbostic 		  RET(yylval.i = yytext[0]); /* everything else */ }
21465389Sbostic 
21565389Sbostic <reg>\\.	{ cbuf[clen++] = '\\'; cbuf[clen++] = yytext[1]; }
21665389Sbostic <reg>\n		{ ERROR "newline in regular expression %.10s...", cbuf SYNTAX; lineno++; BEGIN A; }
21765389Sbostic <reg>"/"	{ BEGIN A;
21865389Sbostic 		  cbuf[clen] = 0;
21965389Sbostic 		  yylval.s = tostring(cbuf);
22065389Sbostic 		  unput('/');
22165389Sbostic 		  RET(REGEXPR); }
22265389Sbostic <reg>.		{ CADD; }
22365389Sbostic 
22465389Sbostic <str>\"		{ BEGIN A;
22565389Sbostic 		  cbuf[clen] = 0; s = tostring(cbuf);
22665389Sbostic 		  cbuf[clen] = ' '; cbuf[++clen] = 0;
22765389Sbostic 		  yylval.cp = setsymtab(cbuf, s, 0.0, CON|STR, symtab);
22865389Sbostic 		  RET(STRING); }
22965389Sbostic <str>\n		{ ERROR "newline in string %.10s...", cbuf SYNTAX; lineno++; BEGIN A; }
23065389Sbostic <str>"\\\""	{ cbuf[clen++] = '"'; }
23165389Sbostic <str>"\\"n	{ cbuf[clen++] = '\n'; }
23265389Sbostic <str>"\\"t	{ cbuf[clen++] = '\t'; }
23365389Sbostic <str>"\\"f	{ cbuf[clen++] = '\f'; }
23465389Sbostic <str>"\\"r	{ cbuf[clen++] = '\r'; }
23565389Sbostic <str>"\\"b	{ cbuf[clen++] = '\b'; }
23665389Sbostic <str>"\\"v	{ cbuf[clen++] = '\v'; }	/* these ANSIisms may not be known by */
23765389Sbostic <str>"\\"a	{ cbuf[clen++] = '\007'; }	/* your compiler. hence 007 for bell */
23865389Sbostic <str>"\\\\"	{ cbuf[clen++] = '\\'; }
23965389Sbostic <str>"\\"({O}{O}{O}|{O}{O}|{O}) { int n;
24065389Sbostic 		  sscanf(yytext+1, "%o", &n); cbuf[clen++] = n; }
24165389Sbostic <str>"\\"x({H}+) { int n;	/* ANSI permits any number! */
24265389Sbostic 		  sscanf(yytext+2, "%x", &n); cbuf[clen++] = n; }
24365389Sbostic <str>"\\".	{ cbuf[clen++] = yytext[1]; }
24465389Sbostic <str>.		{ CADD; }
24565389Sbostic 
24665389Sbostic %%
24765389Sbostic 
24865389Sbostic void startreg(void)	/* start parsing a regular expression */
24965389Sbostic {
25065389Sbostic 	BEGIN reg;
25165389Sbostic 	clen = 0;
25265389Sbostic }
25365389Sbostic 
254*65396Sbostic static int my_input( YY_CHAR *buf, int max_size )
25565389Sbostic {
25665389Sbostic 	extern uchar *lexprog;
25765389Sbostic 
258*65396Sbostic 	if ( lexprog ) {		/* awk '...' */
259*65396Sbostic 		int num_chars = strlen( lexprog );
260*65396Sbostic 		if ( num_chars > max_size )
261*65396Sbostic 			{
262*65396Sbostic 			num_chars = max_size;
263*65396Sbostic 			strncpy( buf, lexprog, num_chars );
26465389Sbostic }
265*65396Sbostic 		else
266*65396Sbostic 			strcpy( buf, lexprog );
267*65396Sbostic 		lexprog += num_chars;
268*65396Sbostic 		return num_chars;
26965389Sbostic 
270*65396Sbostic 	} else {			/* awk -f ... */
271*65396Sbostic 		int c = pgetc();
272*65396Sbostic 		if (c == EOF)
273*65396Sbostic 			return 0;
274*65396Sbostic 		buf[0] = c;
275*65396Sbostic 		return 1;
27665389Sbostic }
277*65396Sbostic }
27865389Sbostic 
27965389Sbostic void unputstr(char *s)	/* put a string back on input */
28065389Sbostic {
28165389Sbostic 	int i;
28265389Sbostic 
28365389Sbostic 	for (i = strlen(s)-1; i >= 0; i--)
28465389Sbostic 		unput(s[i]);
28565389Sbostic }
286*65396Sbostic 
287*65396Sbostic int lex_input()
288*65396Sbostic {
289*65396Sbostic 	return input();
290*65396Sbostic }
291