xref: /csrg-svn/old/awk/awk.lx.l (revision 17490)
1*17490Ssam /*	awk.lx.l	4.3	84/12/08	*/
26668Smckusick 
36668Smckusick %Start A str chc sc reg comment
46668Smckusick 
56668Smckusick %{
66668Smckusick #include	"awk.h"
76668Smckusick #include	"awk.def"
86668Smckusick #undef	input	/* defeat lex */
96668Smckusick extern int	yylval;
106668Smckusick extern int	mustfld;
11*17490Ssam extern int	ldbg;
126668Smckusick 
136668Smckusick int	lineno	= 1;
14*17490Ssam #define	RETURN(x)	{if (ldbg) ptoken(x); return(x); }
156668Smckusick #define	CADD	cbuf[clen++]=yytext[0]; if(clen>=CBUFLEN-1) {yyerror("string too long", cbuf); BEGIN A;}
166668Smckusick #define	CBUFLEN	150
176668Smckusick char	cbuf[CBUFLEN];
186668Smckusick int	clen, cflag;
196668Smckusick %}
206668Smckusick 
216668Smckusick A	[a-zA-Z_]
226668Smckusick B	[a-zA-Z0-9_]
236668Smckusick D	[0-9]
246668Smckusick WS	[ \t]
256668Smckusick 
266668Smckusick %%
276668Smckusick 	switch (yybgin-yysvec-1) {	/* witchcraft */
286668Smckusick 	case 0:
296668Smckusick 		BEGIN A;
306668Smckusick 		break;
316668Smckusick 	case sc:
326668Smckusick 		BEGIN A;
336668Smckusick 		RETURN('}');
346668Smckusick 	}
356668Smckusick 
366668Smckusick <A>^\n		lineno++;
376668Smckusick <A>^{WS}*#.*\n	lineno++;	/* strip comment lines */
386668Smckusick <A>{WS}		;
396668Smckusick <A,reg>"\\"\n	lineno++;
406668Smckusick <A>"||"		RETURN(BOR);
416668Smckusick <A>BEGIN	RETURN(XBEGIN);
426668Smckusick <A>END		RETURN(XEND);
436668Smckusick <A>PROGEND	RETURN(EOF);
446668Smckusick <A>"&&"		RETURN(AND);
456668Smckusick <A>"!"		RETURN(NOT);
466668Smckusick <A>"!="		{ yylval = NE; RETURN(RELOP); }
476668Smckusick <A>"~"		{ yylval = MATCH; RETURN(MATCHOP); }
486668Smckusick <A>"!~"		{ yylval = NOTMATCH; RETURN(MATCHOP); }
496668Smckusick <A>"<"		{ yylval = LT; RETURN(RELOP); }
506668Smckusick <A>"<="		{ yylval = LE; RETURN(RELOP); }
516668Smckusick <A>"=="		{ yylval = EQ; RETURN(RELOP); }
526668Smckusick <A>">="		{ yylval = GE; RETURN(RELOP); }
536668Smckusick <A>">"		{ yylval = GT; RETURN(RELOP); }
546668Smckusick <A>">>"		{ yylval = APPEND; RETURN(RELOP); }
556668Smckusick <A>"++"		{ yylval = INCR; RETURN(INCR); }
566668Smckusick <A>"--"		{ yylval = DECR; RETURN(DECR); }
576668Smckusick <A>"+="		{ yylval = ADDEQ; RETURN(ASGNOP); }
586668Smckusick <A>"-="		{ yylval = SUBEQ; RETURN(ASGNOP); }
596668Smckusick <A>"*="		{ yylval = MULTEQ; RETURN(ASGNOP); }
606668Smckusick <A>"/="		{ yylval = DIVEQ; RETURN(ASGNOP); }
616668Smckusick <A>"%="		{ yylval = MODEQ; RETURN(ASGNOP); }
626668Smckusick <A>"="		{ yylval = ASSIGN; RETURN(ASGNOP); }
636668Smckusick 
646668Smckusick <A>"$"{D}+	{	if (atoi(yytext+1)==0) {
656668Smckusick 				yylval = (hack)lookup("$record", symtab, 0);
666668Smckusick 				RETURN(STRING);
676668Smckusick 			} else {
686668Smckusick 				yylval = fieldadr(atoi(yytext+1));
696668Smckusick 				RETURN(FIELD);
706668Smckusick 			}
716668Smckusick 		}
726668Smckusick <A>"$"{WS}*	{ RETURN(INDIRECT); }
7310794Ssam <A>NF		{ mustfld=1; yylval = (hack)setsymtab(yytext, EMPTY, 0.0, NUM, symtab); RETURN(VAR); }
746668Smckusick <A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)?	{
7510794Ssam 		yylval = (hack)setsymtab(yytext, EMPTY, atof(yytext), CON|NUM, symtab); RETURN(NUMBER); }
766668Smckusick <A>"}"{WS}*\n	{ BEGIN sc; lineno++; RETURN(';'); }
776668Smckusick <A>"}"		{ BEGIN sc; RETURN(';'); }
786668Smckusick <A>;\n		{ lineno++; RETURN(';'); }
796668Smckusick <A>\n		{ lineno++; RETURN(NL); }
806668Smckusick <A>while	RETURN(WHILE);
816668Smckusick <A>for		RETURN(FOR);
826668Smckusick <A>if		RETURN(IF);
836668Smckusick <A>else		RETURN(ELSE);
846668Smckusick <A>next		RETURN(NEXT);
856668Smckusick <A>exit		RETURN(EXIT);
866668Smckusick <A>break	RETURN(BREAK);
876668Smckusick <A>continue	RETURN(CONTINUE);
886668Smckusick <A>print	{ yylval = PRINT; RETURN(PRINT); }
896668Smckusick <A>printf	{ yylval = PRINTF; RETURN(PRINTF); }
906668Smckusick <A>sprintf	{ yylval = SPRINTF; RETURN(SPRINTF); }
916668Smckusick <A>split	{ yylval = SPLIT; RETURN(SPLIT); }
926668Smckusick <A>substr	RETURN(SUBSTR);
936668Smckusick <A>index	RETURN(INDEX);
946668Smckusick <A>in		RETURN(IN);
956668Smckusick <A>getline	RETURN(GETLINE);
966668Smckusick <A>length	{ yylval = FLENGTH; RETURN(FNCN); }
976668Smckusick <A>log		{ yylval = FLOG; RETURN(FNCN); }
986668Smckusick <A>int		{ yylval = FINT; RETURN(FNCN); }
996668Smckusick <A>exp		{ yylval = FEXP; RETURN(FNCN); }
1006668Smckusick <A>sqrt		{ yylval = FSQRT; RETURN(FNCN); }
1016668Smckusick <A>{A}{B}*	{ yylval = (hack)setsymtab(yytext, tostring(""), 0.0, STR|NUM, symtab); RETURN(VAR); }
1026668Smckusick <A>\"		{ BEGIN str; clen=0; }
1036668Smckusick 
1046668Smckusick <A>#		{ BEGIN comment; }
1056668Smckusick <comment>\n	{ BEGIN A; lineno++; RETURN(NL); }
1066668Smckusick <comment>.	;
1076668Smckusick 
1086668Smckusick <A>.		{ yylval = yytext[0]; RETURN(yytext[0]); }
1096668Smckusick 
1106668Smckusick <reg>"["	{ BEGIN chc; clen=0; cflag=0; }
1116668Smckusick <reg>"[^"	{ BEGIN chc; clen=0; cflag=1; }
1126668Smckusick 
1136668Smckusick <reg>"?"	RETURN(QUEST);
1146668Smckusick <reg>"+"	RETURN(PLUS);
1156668Smckusick <reg>"*"	RETURN(STAR);
1166668Smckusick <reg>"|"	RETURN(OR);
1176668Smckusick <reg>"."	RETURN(DOT);
1186668Smckusick <reg>"("	RETURN('(');
1196668Smckusick <reg>")"	RETURN(')');
1206668Smckusick <reg>"^"	RETURN('^');
1216668Smckusick <reg>"$"	RETURN('$');
1226668Smckusick <reg>\\{D}{D}{D}	{ sscanf(yytext+1, "%o", &yylval); RETURN(CHAR); }
1236668Smckusick <reg>\\.	{	if (yytext[1]=='n') yylval = '\n';
1246668Smckusick 			else if (yytext[1] == 't') yylval = '\t';
1256668Smckusick 			else yylval = yytext[1];
1266668Smckusick 			RETURN(CHAR);
1276668Smckusick 		}
1286668Smckusick <reg>"/"	{ BEGIN A; unput('/'); }
1296668Smckusick <reg>\n		{ yyerror("newline in regular expression"); lineno++; BEGIN A; }
1306668Smckusick <reg>.		{ yylval = yytext[0]; RETURN(CHAR); }
1316668Smckusick 
1326668Smckusick <str>\"		{ char *s; BEGIN A; cbuf[clen]=0; s = tostring(cbuf);
1336668Smckusick 		cbuf[clen] = ' '; cbuf[++clen] = 0;
1346668Smckusick 		yylval = (hack)setsymtab(cbuf, s, 0.0, CON|STR, symtab); RETURN(STRING); }
1356668Smckusick <str>\n		{ yyerror("newline in string"); lineno++; BEGIN A; }
1366668Smckusick <str>"\\\""	{ cbuf[clen++]='"'; }
1376668Smckusick <str,chc>"\\"n	{ cbuf[clen++]='\n'; }
1386668Smckusick <str,chc>"\\"t	{ cbuf[clen++]='\t'; }
1396668Smckusick <str,chc>"\\\\"	{ cbuf[clen++]='\\'; }
1406668Smckusick <str>.		{ CADD; }
1416668Smckusick 
1426668Smckusick <chc>"\\""]"	{ cbuf[clen++]=']'; }
1436668Smckusick <chc>"]"	{ BEGIN reg; cbuf[clen]=0; yylval = (hack)tostring(cbuf);
1446668Smckusick 		if (cflag==0) { RETURN(CCL); }
1456668Smckusick 		else { RETURN(NCCL); } }
1466668Smckusick <chc>\n		{ yyerror("newline in character class"); lineno++; BEGIN A; }
1476668Smckusick <chc>.		{ CADD; }
1486668Smckusick 
1496668Smckusick %%
1506668Smckusick 
1516668Smckusick input()
1526668Smckusick {
1536668Smckusick 	register c;
1546668Smckusick 	extern char *lexprog;
1556668Smckusick 
1566668Smckusick 	if (yysptr > yysbuf)
1576668Smckusick 		c = U(*--yysptr);
1586668Smckusick 	else if (yyin == NULL)
1596668Smckusick 		c = *lexprog++;
1606668Smckusick 	else
1616668Smckusick 		c = getc(yyin);
1626668Smckusick 	if (c == '\n')
1636668Smckusick 		yylineno++;
1646668Smckusick 	else if (c == EOF)
1656668Smckusick 		c = 0;
1666668Smckusick 	return(c);
1676668Smckusick }
1686668Smckusick 
1696668Smckusick startreg()
1706668Smckusick {
1716668Smckusick 	BEGIN reg;
1726668Smckusick }
173*17490Ssam 
174*17490Ssam ptoken(n)
175*17490Ssam {
176*17490Ssam 	extern struct tok {
177*17490Ssam 		char *tnm;
178*17490Ssam 		int yval;
179*17490Ssam 	} tok[];
180*17490Ssam 	extern char yytext[];
181*17490Ssam 	extern int yylval;
182*17490Ssam 
183*17490Ssam 	printf("lex:");
184*17490Ssam 	if (n < 128) {
185*17490Ssam 		printf(" %c\n",n);
186*17490Ssam 		return;
187*17490Ssam 	}
188*17490Ssam 	if (n <= 256 || n >= LASTTOKEN) {
189*17490Ssam 		printf("? %o\n",n);
190*17490Ssam 		return;
191*17490Ssam 	}
192*17490Ssam 	printf(" %s",tok[n-257].tnm);
193*17490Ssam 	switch (n) {
194*17490Ssam 
195*17490Ssam 	case RELOP:
196*17490Ssam 	case MATCHOP:
197*17490Ssam 	case ASGNOP:
198*17490Ssam 	case STRING:
199*17490Ssam 	case FIELD:
200*17490Ssam 	case VAR:
201*17490Ssam 	case NUMBER:
202*17490Ssam 	case FNCN:
203*17490Ssam 		printf(" (%s)", yytext);
204*17490Ssam 		break;
205*17490Ssam 
206*17490Ssam 	case CHAR:
207*17490Ssam 		printf(" (%o)", yylval);
208*17490Ssam 		break;
209*17490Ssam 	}
210*17490Ssam 	putchar('\n');
211*17490Ssam }
212