xref: /csrg-svn/old/awk/awk.lx.l (revision 65931)
148236Sbostic /*-
248236Sbostic  * Copyright (c) 1991 The Regents of the University of California.
348236Sbostic  * All rights reserved.
448236Sbostic  *
548236Sbostic  * %sccs.include.proprietary.c%
648236Sbostic  */
76668Smckusick 
8*65931Svern %X str chc reg comment
9*65931Svern 
10*65931Svern %{
1148236Sbostic #ifndef lint
12*65931Svern static char sccsid[] = "@(#)awk.lx.l	4.6 (Berkeley) 01/28/94";
1348236Sbostic #endif /* not lint */
1448236Sbostic 
1544021Sbostic #include	<string.h>
166668Smckusick #include	"awk.h"
176668Smckusick #include	"awk.def"
186668Smckusick extern int	yylval;
196668Smckusick extern int	mustfld;
2017490Ssam extern int	ldbg;
2144021Sbostic extern char	*lexprog;
226668Smckusick 
2344021Sbostic #undef YY_INPUT
2444021Sbostic #define YY_INPUT(buf,result,max_size)				\
2544021Sbostic {								\
2644021Sbostic 	if ( lexprog )						\
2744021Sbostic 		{						\
2844021Sbostic 		result = strlen( lexprog );			\
2944021Sbostic 		if ( result > max_size )			\
3044021Sbostic 			{					\
3144021Sbostic 			result = max_size;			\
3244021Sbostic 			strncpy( buf, lexprog, result );	\
3344021Sbostic 			}					\
3444021Sbostic 		else						\
3544021Sbostic 			strcpy( buf, lexprog );			\
3644021Sbostic 		lexprog += result;				\
3744021Sbostic 		}						\
3844021Sbostic 	else							\
3944021Sbostic 		result = read( fileno(yyin), buf, max_size );	\
4044021Sbostic }
4144021Sbostic 
426668Smckusick int	lineno	= 1;
4317490Ssam #define	RETURN(x)	{if (ldbg) ptoken(x); return(x); }
4444021Sbostic #define	CADD	cbuf[clen++]=yytext[0]; if(clen>=CBUFLEN-1) {yyerror("string too long", cbuf); BEGIN INITIAL;}
456668Smckusick #define	CBUFLEN	150
466668Smckusick char	cbuf[CBUFLEN];
476668Smckusick int	clen, cflag;
486668Smckusick %}
496668Smckusick 
506668Smckusick A	[a-zA-Z_]
516668Smckusick B	[a-zA-Z0-9_]
526668Smckusick D	[0-9]
536668Smckusick WS	[ \t]
546668Smckusick 
556668Smckusick %%
5644021Sbostic 	static int sc_flag = 0;
5744021Sbostic 
5844021Sbostic 	if ( sc_flag ) {
5944021Sbostic 		BEGIN INITIAL;
6044021Sbostic 		sc_flag = 0;
616668Smckusick 		RETURN('}');
626668Smckusick 	}
636668Smckusick 
6444021Sbostic ^\n		lineno++;
6544021Sbostic ^{WS}*#.*\n	lineno++;	/* strip comment lines */
6644021Sbostic {WS}		;
6744021Sbostic <INITIAL,reg>"\\"\n	lineno++;
6844021Sbostic "||"		RETURN(BOR);
6944021Sbostic BEGIN	RETURN(XBEGIN);
7044021Sbostic END		RETURN(XEND);
7144021Sbostic PROGEND	RETURN(EOF);
7244021Sbostic "&&"		RETURN(AND);
7344021Sbostic "!"		RETURN(NOT);
7444021Sbostic "!="		{ yylval = NE; RETURN(RELOP); }
7544021Sbostic "~"		{ yylval = MATCH; RETURN(MATCHOP); }
7644021Sbostic "!~"		{ yylval = NOTMATCH; RETURN(MATCHOP); }
7744021Sbostic "<"		{ yylval = LT; RETURN(RELOP); }
7844021Sbostic "<="		{ yylval = LE; RETURN(RELOP); }
7944021Sbostic "=="		{ yylval = EQ; RETURN(RELOP); }
8044021Sbostic ">="		{ yylval = GE; RETURN(RELOP); }
8144021Sbostic ">"		{ yylval = GT; RETURN(RELOP); }
8244021Sbostic ">>"		{ yylval = APPEND; RETURN(RELOP); }
8344021Sbostic "++"		{ yylval = INCR; RETURN(INCR); }
8444021Sbostic "--"		{ yylval = DECR; RETURN(DECR); }
8544021Sbostic "+="		{ yylval = ADDEQ; RETURN(ASGNOP); }
8644021Sbostic "-="		{ yylval = SUBEQ; RETURN(ASGNOP); }
8744021Sbostic "*="		{ yylval = MULTEQ; RETURN(ASGNOP); }
8844021Sbostic "/="		{ yylval = DIVEQ; RETURN(ASGNOP); }
8944021Sbostic "%="		{ yylval = MODEQ; RETURN(ASGNOP); }
9044021Sbostic "="		{ yylval = ASSIGN; RETURN(ASGNOP); }
916668Smckusick 
9244021Sbostic "$"{D}+	{	if (atoi(yytext+1)==0) {
936668Smckusick 				yylval = (hack)lookup("$record", symtab, 0);
946668Smckusick 				RETURN(STRING);
956668Smckusick 			} else {
966668Smckusick 				yylval = fieldadr(atoi(yytext+1));
976668Smckusick 				RETURN(FIELD);
986668Smckusick 			}
996668Smckusick 		}
10044021Sbostic "$"{WS}*	{ RETURN(INDIRECT); }
10144021Sbostic NF		{ mustfld=1; yylval = (hack)setsymtab(yytext, EMPTY, 0.0, NUM, symtab); RETURN(VAR); }
10244021Sbostic ({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)?	{
10310794Ssam 		yylval = (hack)setsymtab(yytext, EMPTY, atof(yytext), CON|NUM, symtab); RETURN(NUMBER); }
10444021Sbostic "}"{WS}*\n	{ sc_flag = 1; lineno++; RETURN(';'); }
10544021Sbostic "}"		{ sc_flag = 1; RETURN(';'); }
10644021Sbostic ;\n		{ lineno++; RETURN(';'); }
10744021Sbostic \n		{ lineno++; RETURN(NL); }
10844021Sbostic while	RETURN(WHILE);
10944021Sbostic for		RETURN(FOR);
11044021Sbostic if		RETURN(IF);
11144021Sbostic else		RETURN(ELSE);
11244021Sbostic next		RETURN(NEXT);
11344021Sbostic exit		RETURN(EXIT);
11444021Sbostic break	RETURN(BREAK);
11544021Sbostic continue	RETURN(CONTINUE);
11644021Sbostic print	{ yylval = PRINT; RETURN(PRINT); }
11744021Sbostic printf	{ yylval = PRINTF; RETURN(PRINTF); }
11844021Sbostic sprintf	{ yylval = SPRINTF; RETURN(SPRINTF); }
11944021Sbostic split	{ yylval = SPLIT; RETURN(SPLIT); }
12044021Sbostic substr	RETURN(SUBSTR);
12144021Sbostic index	RETURN(INDEX);
12244021Sbostic in		RETURN(IN);
12344021Sbostic getline	RETURN(GETLINE);
12444021Sbostic length	{ yylval = FLENGTH; RETURN(FNCN); }
12544021Sbostic log		{ yylval = FLOG; RETURN(FNCN); }
12644021Sbostic int		{ yylval = FINT; RETURN(FNCN); }
12744021Sbostic exp		{ yylval = FEXP; RETURN(FNCN); }
12844021Sbostic sqrt		{ yylval = FSQRT; RETURN(FNCN); }
12944021Sbostic {A}{B}*	{ yylval = (hack)setsymtab(yytext, tostring(""), 0.0, STR|NUM, symtab); RETURN(VAR); }
13044021Sbostic \"		{ BEGIN str; clen=0; }
1316668Smckusick 
13244021Sbostic #		{ BEGIN comment; }
13344021Sbostic <comment>\n	{ BEGIN INITIAL; lineno++; RETURN(NL); }
1346668Smckusick <comment>.	;
1356668Smckusick 
13644021Sbostic .		{ yylval = yytext[0]; RETURN(yytext[0]); }
1376668Smckusick 
1386668Smckusick <reg>"["	{ BEGIN chc; clen=0; cflag=0; }
1396668Smckusick <reg>"[^"	{ BEGIN chc; clen=0; cflag=1; }
1406668Smckusick 
1416668Smckusick <reg>"?"	RETURN(QUEST);
1426668Smckusick <reg>"+"	RETURN(PLUS);
1436668Smckusick <reg>"*"	RETURN(STAR);
1446668Smckusick <reg>"|"	RETURN(OR);
1456668Smckusick <reg>"."	RETURN(DOT);
1466668Smckusick <reg>"("	RETURN('(');
1476668Smckusick <reg>")"	RETURN(')');
1486668Smckusick <reg>"^"	RETURN('^');
1496668Smckusick <reg>"$"	RETURN('$');
1506668Smckusick <reg>\\{D}{D}{D}	{ sscanf(yytext+1, "%o", &yylval); RETURN(CHAR); }
1516668Smckusick <reg>\\.	{	if (yytext[1]=='n') yylval = '\n';
1526668Smckusick 			else if (yytext[1] == 't') yylval = '\t';
1536668Smckusick 			else yylval = yytext[1];
1546668Smckusick 			RETURN(CHAR);
1556668Smckusick 		}
15644021Sbostic <reg>"/"	{ BEGIN INITIAL; unput('/'); }
15744021Sbostic <reg>\n		{ yyerror("newline in regular expression"); lineno++; BEGIN INITIAL; }
1586668Smckusick <reg>.		{ yylval = yytext[0]; RETURN(CHAR); }
1596668Smckusick 
16044021Sbostic <str>\"		{ char *s; BEGIN INITIAL; cbuf[clen]=0; s = tostring(cbuf);
1616668Smckusick 		cbuf[clen] = ' '; cbuf[++clen] = 0;
1626668Smckusick 		yylval = (hack)setsymtab(cbuf, s, 0.0, CON|STR, symtab); RETURN(STRING); }
16344021Sbostic <str>\n		{ yyerror("newline in string"); lineno++; BEGIN INITIAL; }
1646668Smckusick <str>"\\\""	{ cbuf[clen++]='"'; }
1656668Smckusick <str,chc>"\\"n	{ cbuf[clen++]='\n'; }
1666668Smckusick <str,chc>"\\"t	{ cbuf[clen++]='\t'; }
1676668Smckusick <str,chc>"\\\\"	{ cbuf[clen++]='\\'; }
1686668Smckusick <str>.		{ CADD; }
1696668Smckusick 
1706668Smckusick <chc>"\\""]"	{ cbuf[clen++]=']'; }
1716668Smckusick <chc>"]"	{ BEGIN reg; cbuf[clen]=0; yylval = (hack)tostring(cbuf);
1726668Smckusick 		if (cflag==0) { RETURN(CCL); }
1736668Smckusick 		else { RETURN(NCCL); } }
17444021Sbostic <chc>\n		{ yyerror("newline in character class"); lineno++; BEGIN INITIAL; }
1756668Smckusick <chc>.		{ CADD; }
1766668Smckusick 
1776668Smckusick %%
1786668Smckusick 
1796668Smckusick startreg()
1806668Smckusick {
1816668Smckusick 	BEGIN reg;
1826668Smckusick }
18317490Ssam 
18417490Ssam ptoken(n)
18517490Ssam {
18617490Ssam 	extern struct tok {
18717490Ssam 		char *tnm;
18817490Ssam 		int yval;
18917490Ssam 	} tok[];
19017490Ssam 	extern int yylval;
19117490Ssam 
19217490Ssam 	printf("lex:");
19317490Ssam 	if (n < 128) {
19417490Ssam 		printf(" %c\n",n);
19517490Ssam 		return;
19617490Ssam 	}
19717490Ssam 	if (n <= 256 || n >= LASTTOKEN) {
19817490Ssam 		printf("? %o\n",n);
19917490Ssam 		return;
20017490Ssam 	}
20117490Ssam 	printf(" %s",tok[n-257].tnm);
20217490Ssam 	switch (n) {
20317490Ssam 
20417490Ssam 	case RELOP:
20517490Ssam 	case MATCHOP:
20617490Ssam 	case ASGNOP:
20717490Ssam 	case STRING:
20817490Ssam 	case FIELD:
20917490Ssam 	case VAR:
21017490Ssam 	case NUMBER:
21117490Ssam 	case FNCN:
21217490Ssam 		printf(" (%s)", yytext);
21317490Ssam 		break;
21417490Ssam 
21517490Ssam 	case CHAR:
21617490Ssam 		printf(" (%o)", yylval);
21717490Ssam 		break;
21817490Ssam 	}
21917490Ssam 	putchar('\n');
22017490Ssam }
221