xref: /csrg-svn/old/awk/awk.lx.l (revision 44021)
1*44021Sbostic /*	awk.lx.l	4.4	90/06/25	*/
26668Smckusick 
3*44021Sbostic %X str chc reg comment
46668Smckusick 
56668Smckusick %{
6*44021Sbostic #include	<string.h>
76668Smckusick #include	"awk.h"
86668Smckusick #include	"awk.def"
96668Smckusick extern int	yylval;
106668Smckusick extern int	mustfld;
1117490Ssam extern int	ldbg;
12*44021Sbostic extern char	*lexprog;
136668Smckusick 
14*44021Sbostic #undef YY_INPUT
15*44021Sbostic #define YY_INPUT(buf,result,max_size)				\
16*44021Sbostic {								\
17*44021Sbostic 	if ( lexprog )						\
18*44021Sbostic 		{						\
19*44021Sbostic 		result = strlen( lexprog );			\
20*44021Sbostic 		if ( result > max_size )			\
21*44021Sbostic 			{					\
22*44021Sbostic 			result = max_size;			\
23*44021Sbostic 			strncpy( buf, lexprog, result );	\
24*44021Sbostic 			}					\
25*44021Sbostic 		else						\
26*44021Sbostic 			strcpy( buf, lexprog );			\
27*44021Sbostic 		lexprog += result;				\
28*44021Sbostic 		}						\
29*44021Sbostic 	else							\
30*44021Sbostic 		result = read( fileno(yyin), buf, max_size );	\
31*44021Sbostic }
32*44021Sbostic 
336668Smckusick int	lineno	= 1;
3417490Ssam #define	RETURN(x)	{if (ldbg) ptoken(x); return(x); }
35*44021Sbostic #define	CADD	cbuf[clen++]=yytext[0]; if(clen>=CBUFLEN-1) {yyerror("string too long", cbuf); BEGIN INITIAL;}
366668Smckusick #define	CBUFLEN	150
376668Smckusick char	cbuf[CBUFLEN];
386668Smckusick int	clen, cflag;
396668Smckusick %}
406668Smckusick 
416668Smckusick A	[a-zA-Z_]
426668Smckusick B	[a-zA-Z0-9_]
436668Smckusick D	[0-9]
446668Smckusick WS	[ \t]
456668Smckusick 
466668Smckusick %%
47*44021Sbostic 	static int sc_flag = 0;
48*44021Sbostic 
49*44021Sbostic 	if ( sc_flag ) {
50*44021Sbostic 		BEGIN INITIAL;
51*44021Sbostic 		sc_flag = 0;
526668Smckusick 		RETURN('}');
536668Smckusick 	}
546668Smckusick 
55*44021Sbostic ^\n		lineno++;
56*44021Sbostic ^{WS}*#.*\n	lineno++;	/* strip comment lines */
57*44021Sbostic {WS}		;
58*44021Sbostic <INITIAL,reg>"\\"\n	lineno++;
59*44021Sbostic "||"		RETURN(BOR);
60*44021Sbostic BEGIN	RETURN(XBEGIN);
61*44021Sbostic END		RETURN(XEND);
62*44021Sbostic PROGEND	RETURN(EOF);
63*44021Sbostic "&&"		RETURN(AND);
64*44021Sbostic "!"		RETURN(NOT);
65*44021Sbostic "!="		{ yylval = NE; RETURN(RELOP); }
66*44021Sbostic "~"		{ yylval = MATCH; RETURN(MATCHOP); }
67*44021Sbostic "!~"		{ yylval = NOTMATCH; RETURN(MATCHOP); }
68*44021Sbostic "<"		{ yylval = LT; RETURN(RELOP); }
69*44021Sbostic "<="		{ yylval = LE; RETURN(RELOP); }
70*44021Sbostic "=="		{ yylval = EQ; RETURN(RELOP); }
71*44021Sbostic ">="		{ yylval = GE; RETURN(RELOP); }
72*44021Sbostic ">"		{ yylval = GT; RETURN(RELOP); }
73*44021Sbostic ">>"		{ yylval = APPEND; RETURN(RELOP); }
74*44021Sbostic "++"		{ yylval = INCR; RETURN(INCR); }
75*44021Sbostic "--"		{ yylval = DECR; RETURN(DECR); }
76*44021Sbostic "+="		{ yylval = ADDEQ; RETURN(ASGNOP); }
77*44021Sbostic "-="		{ yylval = SUBEQ; RETURN(ASGNOP); }
78*44021Sbostic "*="		{ yylval = MULTEQ; RETURN(ASGNOP); }
79*44021Sbostic "/="		{ yylval = DIVEQ; RETURN(ASGNOP); }
80*44021Sbostic "%="		{ yylval = MODEQ; RETURN(ASGNOP); }
81*44021Sbostic "="		{ yylval = ASSIGN; RETURN(ASGNOP); }
826668Smckusick 
83*44021Sbostic "$"{D}+	{	if (atoi(yytext+1)==0) {
846668Smckusick 				yylval = (hack)lookup("$record", symtab, 0);
856668Smckusick 				RETURN(STRING);
866668Smckusick 			} else {
876668Smckusick 				yylval = fieldadr(atoi(yytext+1));
886668Smckusick 				RETURN(FIELD);
896668Smckusick 			}
906668Smckusick 		}
91*44021Sbostic "$"{WS}*	{ RETURN(INDIRECT); }
92*44021Sbostic NF		{ mustfld=1; yylval = (hack)setsymtab(yytext, EMPTY, 0.0, NUM, symtab); RETURN(VAR); }
93*44021Sbostic ({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)?	{
9410794Ssam 		yylval = (hack)setsymtab(yytext, EMPTY, atof(yytext), CON|NUM, symtab); RETURN(NUMBER); }
95*44021Sbostic "}"{WS}*\n	{ sc_flag = 1; lineno++; RETURN(';'); }
96*44021Sbostic "}"		{ sc_flag = 1; RETURN(';'); }
97*44021Sbostic ;\n		{ lineno++; RETURN(';'); }
98*44021Sbostic \n		{ lineno++; RETURN(NL); }
99*44021Sbostic while	RETURN(WHILE);
100*44021Sbostic for		RETURN(FOR);
101*44021Sbostic if		RETURN(IF);
102*44021Sbostic else		RETURN(ELSE);
103*44021Sbostic next		RETURN(NEXT);
104*44021Sbostic exit		RETURN(EXIT);
105*44021Sbostic break	RETURN(BREAK);
106*44021Sbostic continue	RETURN(CONTINUE);
107*44021Sbostic print	{ yylval = PRINT; RETURN(PRINT); }
108*44021Sbostic printf	{ yylval = PRINTF; RETURN(PRINTF); }
109*44021Sbostic sprintf	{ yylval = SPRINTF; RETURN(SPRINTF); }
110*44021Sbostic split	{ yylval = SPLIT; RETURN(SPLIT); }
111*44021Sbostic substr	RETURN(SUBSTR);
112*44021Sbostic index	RETURN(INDEX);
113*44021Sbostic in		RETURN(IN);
114*44021Sbostic getline	RETURN(GETLINE);
115*44021Sbostic length	{ yylval = FLENGTH; RETURN(FNCN); }
116*44021Sbostic log		{ yylval = FLOG; RETURN(FNCN); }
117*44021Sbostic int		{ yylval = FINT; RETURN(FNCN); }
118*44021Sbostic exp		{ yylval = FEXP; RETURN(FNCN); }
119*44021Sbostic sqrt		{ yylval = FSQRT; RETURN(FNCN); }
120*44021Sbostic {A}{B}*	{ yylval = (hack)setsymtab(yytext, tostring(""), 0.0, STR|NUM, symtab); RETURN(VAR); }
121*44021Sbostic \"		{ BEGIN str; clen=0; }
1226668Smckusick 
123*44021Sbostic #		{ BEGIN comment; }
124*44021Sbostic <comment>\n	{ BEGIN INITIAL; lineno++; RETURN(NL); }
1256668Smckusick <comment>.	;
1266668Smckusick 
127*44021Sbostic .		{ yylval = yytext[0]; RETURN(yytext[0]); }
1286668Smckusick 
1296668Smckusick <reg>"["	{ BEGIN chc; clen=0; cflag=0; }
1306668Smckusick <reg>"[^"	{ BEGIN chc; clen=0; cflag=1; }
1316668Smckusick 
1326668Smckusick <reg>"?"	RETURN(QUEST);
1336668Smckusick <reg>"+"	RETURN(PLUS);
1346668Smckusick <reg>"*"	RETURN(STAR);
1356668Smckusick <reg>"|"	RETURN(OR);
1366668Smckusick <reg>"."	RETURN(DOT);
1376668Smckusick <reg>"("	RETURN('(');
1386668Smckusick <reg>")"	RETURN(')');
1396668Smckusick <reg>"^"	RETURN('^');
1406668Smckusick <reg>"$"	RETURN('$');
1416668Smckusick <reg>\\{D}{D}{D}	{ sscanf(yytext+1, "%o", &yylval); RETURN(CHAR); }
1426668Smckusick <reg>\\.	{	if (yytext[1]=='n') yylval = '\n';
1436668Smckusick 			else if (yytext[1] == 't') yylval = '\t';
1446668Smckusick 			else yylval = yytext[1];
1456668Smckusick 			RETURN(CHAR);
1466668Smckusick 		}
147*44021Sbostic <reg>"/"	{ BEGIN INITIAL; unput('/'); }
148*44021Sbostic <reg>\n		{ yyerror("newline in regular expression"); lineno++; BEGIN INITIAL; }
1496668Smckusick <reg>.		{ yylval = yytext[0]; RETURN(CHAR); }
1506668Smckusick 
151*44021Sbostic <str>\"		{ char *s; BEGIN INITIAL; cbuf[clen]=0; s = tostring(cbuf);
1526668Smckusick 		cbuf[clen] = ' '; cbuf[++clen] = 0;
1536668Smckusick 		yylval = (hack)setsymtab(cbuf, s, 0.0, CON|STR, symtab); RETURN(STRING); }
154*44021Sbostic <str>\n		{ yyerror("newline in string"); lineno++; BEGIN INITIAL; }
1556668Smckusick <str>"\\\""	{ cbuf[clen++]='"'; }
1566668Smckusick <str,chc>"\\"n	{ cbuf[clen++]='\n'; }
1576668Smckusick <str,chc>"\\"t	{ cbuf[clen++]='\t'; }
1586668Smckusick <str,chc>"\\\\"	{ cbuf[clen++]='\\'; }
1596668Smckusick <str>.		{ CADD; }
1606668Smckusick 
1616668Smckusick <chc>"\\""]"	{ cbuf[clen++]=']'; }
1626668Smckusick <chc>"]"	{ BEGIN reg; cbuf[clen]=0; yylval = (hack)tostring(cbuf);
1636668Smckusick 		if (cflag==0) { RETURN(CCL); }
1646668Smckusick 		else { RETURN(NCCL); } }
165*44021Sbostic <chc>\n		{ yyerror("newline in character class"); lineno++; BEGIN INITIAL; }
1666668Smckusick <chc>.		{ CADD; }
1676668Smckusick 
1686668Smckusick %%
1696668Smckusick 
1706668Smckusick startreg()
1716668Smckusick {
1726668Smckusick 	BEGIN reg;
1736668Smckusick }
17417490Ssam 
17517490Ssam ptoken(n)
17617490Ssam {
17717490Ssam 	extern struct tok {
17817490Ssam 		char *tnm;
17917490Ssam 		int yval;
18017490Ssam 	} tok[];
18117490Ssam 	extern int yylval;
18217490Ssam 
18317490Ssam 	printf("lex:");
18417490Ssam 	if (n < 128) {
18517490Ssam 		printf(" %c\n",n);
18617490Ssam 		return;
18717490Ssam 	}
18817490Ssam 	if (n <= 256 || n >= LASTTOKEN) {
18917490Ssam 		printf("? %o\n",n);
19017490Ssam 		return;
19117490Ssam 	}
19217490Ssam 	printf(" %s",tok[n-257].tnm);
19317490Ssam 	switch (n) {
19417490Ssam 
19517490Ssam 	case RELOP:
19617490Ssam 	case MATCHOP:
19717490Ssam 	case ASGNOP:
19817490Ssam 	case STRING:
19917490Ssam 	case FIELD:
20017490Ssam 	case VAR:
20117490Ssam 	case NUMBER:
20217490Ssam 	case FNCN:
20317490Ssam 		printf(" (%s)", yytext);
20417490Ssam 		break;
20517490Ssam 
20617490Ssam 	case CHAR:
20717490Ssam 		printf(" (%o)", yylval);
20817490Ssam 		break;
20917490Ssam 	}
21017490Ssam 	putchar('\n');
21117490Ssam }
212