xref: /csrg-svn/old/as.vax/asscan2.c (revision 13516)
15798Srrh /*
25798Srrh  *	Copyright (c) 1982 Regents of the University of California
35798Srrh  */
45798Srrh #ifndef lint
5*13516Srrh static char sccsid[] = "@(#)asscan2.c 4.10 06/30/83";
65798Srrh #endif not lint
75798Srrh 
85798Srrh #include "asscanl.h"
913467Srrh 
105798Srrh static	inttoktype	oval = NL;
115798Srrh 
1213467Srrh char	inbufunget[8];
1313467Srrh char	inbuffer[ASINBUFSIZ];
1413467Srrh char	*Ginbufptr = inbuffer;
1513467Srrh int	Ginbufcnt = 0;
1613467Srrh 
1713467Srrh fillinbuffer()
1813467Srrh {
1913467Srrh 		int	nread;
2013467Srrh 	static	int	hadeof;
2113467Srrh 		int	goal;
2213467Srrh 		int	got;
2313467Srrh 
2413467Srrh 	nread = 0;
2513467Srrh 	if (hadeof == 0){
2613467Srrh 		goal = sizeof(inbuffer);
2713467Srrh 		do {
2813467Srrh 			got = read(stdin->_file, inbuffer + nread, goal);
2913467Srrh 			if (got == 0)
3013467Srrh 				hadeof = 1;
3113467Srrh 			if (got <= 0)
3213467Srrh 				break;
3313467Srrh 			nread += got;
3413467Srrh 			goal -= got;
3513467Srrh 		} while (goal);
3613467Srrh 	}
375798Srrh 	/*
3813467Srrh 	 *	getchar assumes that Ginbufcnt and Ginbufptr
3913467Srrh 	 *	are adjusted as if one character has been removed
4013467Srrh 	 *	from the input.
415798Srrh 	 */
4213467Srrh 	if (nread == 0){
4313467Srrh 		inbuffer[0] = EOFCHAR;
4413467Srrh 		nread = 1;
4513467Srrh 	}
4613467Srrh 	Ginbufcnt = nread - 1;
4713467Srrh 	Ginbufptr = inbuffer + 1;
4813467Srrh }
495798Srrh 
5013462Srrh #ifndef FLEXNAMES
5113462Srrh char	strtext[NCPString + 1];
5213462Srrh #else FLEXNAMES
5313462Srrh # if NCPName < NCPString
5413462Srrh char	strtext[NCPString + 1];
5513462Srrh # else
5613462Srrh #define	strtext yytext
5713462Srrh # endif
5813462Srrh #endif FLEXNAMES
5913462Srrh 
605798Srrh scan_dot_s(bufferbox)
615798Srrh 	struct tokbufdesc *bufferbox;
625798Srrh {
6313467Srrh 	reg	char	*inbufptr;
6413467Srrh 	reg	int	inbufcnt;
655798Srrh 	reg	int	ryylval;	/* local copy of lexical value */
665798Srrh 	extern	int	yylval;		/* global copy of lexical value */
675798Srrh 	reg	int	val;		/* the value returned */
685798Srrh 		int	i;		/* simple counter */
695798Srrh 	reg	char	*rcp;
7013467Srrh 		int	ch;		/* treated as a character */
715798Srrh 		int	ch1;		/* shadow value */
725798Srrh 		struct 	symtab	*op;
7313467Srrh 		ptrall	lgbackpatch;	/* where to stuff a string length */
745798Srrh 	reg	ptrall	bufptr;		/* where to stuff tokens */
755798Srrh 		ptrall	bufub;		/* where not to stuff tokens */
76*13516Srrh 	reg	int	strlg;		/* the length of a string */
775798Srrh 		long	intval;		/* value of int */
785798Srrh 		int	linescrossed;	/* when doing strings and comments */
795798Srrh 		struct	Opcode		opstruct;
80*13516Srrh 		struct	strdesc	strd;	/* for building DQ strings */
815798Srrh 
825798Srrh 	(bytetoktype *)bufptr = (bytetoktype *) & (bufferbox->toks[0]);
835798Srrh 	(bytetoktype *)bufub = &(bufferbox->toks[AVAILTOKS]);
845798Srrh 
8513467Srrh 	MEMTOREGBUF;
865798Srrh 	if (newfflag){
8713448Srrh 		newfflag = 0;
88*13516Srrh 		strd.sd_stroff = strfilepos;
89*13516Srrh 		strd.sd_place = STR_BOTH;
90*13516Srrh 		strd.sd_strlen = strlen(newfname) + 1;
91*13516Srrh 		fputs(newfname, strfile);
92*13516Srrh 		putc(0, strfile);
93*13516Srrh 		strfilepos += strd.sd_strlen;
94*13516Srrh 		ryylval = (int)savestr(newfname, &strd);
9513448Srrh 
965798Srrh 		ptoken(bufptr, IFILE);
975798Srrh 		ptoken(bufptr, STRING);
9813448Srrh 		pptr(bufptr, ryylval);
995798Srrh 
1005798Srrh 		ptoken(bufptr, ILINENO);
1015798Srrh 		ptoken(bufptr, INT);
1025798Srrh 		pint(bufptr,  1);
1035798Srrh 	}
1045798Srrh 
1055798Srrh 	while (bufptr < bufub){
1065798Srrh    loop:
10713467Srrh         switch(ryylval = (type+1)[ch = getchar()]) {
1085798Srrh 	case SCANEOF:
10913467Srrh 	endoffile: ;
1105798Srrh 		inbufptr = 0;
11113467Srrh 		ptoken(bufptr, PARSEEOF);
11213467Srrh 		goto done;
1135798Srrh 
1145798Srrh 	case DIV:		/*process C style comments*/
1155798Srrh 		if ( (ch = getchar()) == '*') {  /*comment prelude*/
1165798Srrh 			int	incomment;
1175798Srrh 			linescrossed = 0;
1185798Srrh 			incomment = 1;
1195798Srrh 			ch = getchar();	/*skip over the * */
1205798Srrh 			while(incomment){
1215798Srrh 				switch(ch){
1225798Srrh 				case '*':
1235798Srrh 					ch = getchar();
1245798Srrh 					incomment = (ch != '/');
1255798Srrh 					break;
1265798Srrh 				case '\n':
1275798Srrh 					scanlineno++;
1285798Srrh 					linescrossed++;
1295798Srrh 					ch = getchar();
1305798Srrh 					break;
1315798Srrh 				case EOFCHAR:
1325798Srrh 					goto endoffile;
1335798Srrh 				default:
1345798Srrh 					ch = getchar();
1355798Srrh 					break;
1365798Srrh 				}
1375798Srrh 			}
1385798Srrh 			val = ILINESKIP;
1395798Srrh 			ryylval = linescrossed;
1405798Srrh 			goto ret;
1415798Srrh 		} else {	/*just an ordinary DIV*/
1425798Srrh 			ungetc(ch);
1435798Srrh 			val = ryylval = DIV;
1445798Srrh 			goto ret;
1455798Srrh 		}
1465798Srrh 	case SH:
1475798Srrh 		if (oval == NL){
1485798Srrh 			/*
1495798Srrh 			 *	Attempt to recognize a C preprocessor
1505798Srrh 			 *	style comment '^#[ \t]*[0-9]*[ \t]*".*"
1515798Srrh 			 */
1525798Srrh 			ch = getchar();	/*bump the #*/
1535798Srrh 			while (INCHARSET(ch, SPACE))
1545798Srrh 				ch = getchar();/*bump white */
1555798Srrh 			if (INCHARSET(ch, DIGIT)){
1565798Srrh 				intval = 0;
1575798Srrh 				while(INCHARSET(ch, DIGIT)){
1585798Srrh 					intval = intval*10 + ch - '0';
1595798Srrh 					ch = getchar();
1605798Srrh 				}
1615798Srrh 				while (INCHARSET(ch, SPACE))
1625798Srrh 					ch = getchar();
1635798Srrh 				if (ch == '"'){
1645798Srrh 					ptoken(bufptr, ILINENO);
1655798Srrh 					ptoken(bufptr, INT);
1665798Srrh 					pint(bufptr, intval - 1);
1675798Srrh 					ptoken(bufptr, IFILE);
1685798Srrh 					/*
1695798Srrh 					 *	The '"' has already been
1705798Srrh 					 *	munched
1715798Srrh 					 *
1725798Srrh 					 *	eatstr will not eat
1735798Srrh 					 *	the trailing \n, so
1745798Srrh 					 *	it is given to the parser
1755798Srrh 					 *	and counted.
1765798Srrh 					 */
1775798Srrh 					goto eatstr;
1785798Srrh 				}
1795798Srrh 			}
1805798Srrh 		}
1815798Srrh 		/*
1825798Srrh 		 *	Well, its just an ordinary decadent comment
1835798Srrh 		 */
1845798Srrh 		while ((ch != '\n') && (ch != EOFCHAR))
1855798Srrh 			ch = getchar();
1865798Srrh 		if (ch == EOFCHAR)
1875798Srrh 			goto endoffile;
1885798Srrh 		val = ryylval = oval = NL;
1895798Srrh 		scanlineno++;
1905798Srrh 		goto ret;
1915798Srrh 
1925798Srrh 	case NL:
1935798Srrh 		scanlineno++;
1945798Srrh 		val = ryylval;
1955798Srrh 		goto ret;
1965798Srrh 
1975798Srrh 	case SP:
1985798Srrh 		oval = SP;	/*invalidate ^# meta comments*/
1995798Srrh 		goto loop;
2005798Srrh 
2015798Srrh 	case REGOP:		/* % , could be used as modulo, or register*/
2025798Srrh 		ch = getchar();
2035798Srrh 		if (INCHARSET(ch, DIGIT)){
2045798Srrh 			ryylval = ch-'0';
2055798Srrh 			if (ch=='1') {
2065798Srrh 				if (INCHARSET( (ch = getchar()), REGDIGIT))
2075798Srrh 					ryylval = 10+ch-'0';
2085798Srrh 				else
2095798Srrh 					ungetc(ch);
2105798Srrh 			}
2115798Srrh 			/*
2125798Srrh 			 *	God only knows what the original author
2135798Srrh 			 *	wanted this undocumented feature to
2145798Srrh 			 *	do.
2155798Srrh 			 *		%5++ is really  r7
2165798Srrh 			 */
2175798Srrh 			while(INCHARSET( (ch = getchar()), SIGN)) {
2185798Srrh 				if (ch=='+')
2195798Srrh 					ryylval++;
2205798Srrh 				else
2215798Srrh 					ryylval--;
2225798Srrh 			}
2235798Srrh 			ungetc(ch);
2245798Srrh 			val = REG;
2255798Srrh 		} else {
2265798Srrh 			ungetc(ch);
2275798Srrh 			val = REGOP;
2285798Srrh 		}
2295798Srrh 		goto ret;
2305798Srrh 
2315798Srrh 	case ALPH:
2325798Srrh 		ch1 = ch;
2335798Srrh 		if (INCHARSET(ch, SZSPECBEGIN)){
2345798Srrh 			if( (ch = getchar()) == '`' || ch == '^'){
2355798Srrh 				ch1 |= 0100;	/*convert to lower*/
2365798Srrh 				switch(ch1){
2375798Srrh 				case 'b':	ryylval = 1;	break;
2385798Srrh 				case 'w':	ryylval = 2;	break;
2395798Srrh 				case 'l':	ryylval = 4;	break;
2405798Srrh 				default:	ryylval = d124;	break;
2415798Srrh 				}
2425798Srrh 				val = SIZESPEC;
2435798Srrh 				goto ret;
2445798Srrh 			} else {
2455798Srrh 				ungetc(ch);
2465798Srrh 				ch = ch1;	/*restore first character*/
2475798Srrh 			}
2485798Srrh 		}
2495798Srrh 		rcp = yytext;
2505798Srrh 		do {
25113462Srrh 			if (rcp < &yytext[NCPName])
2525798Srrh 				*rcp++ = ch;
2535798Srrh 		} while (INCHARSET ( (ch = getchar()), ALPHA | DIGIT));
2545798Srrh 		*rcp = '\0';
2555798Srrh 		while (INCHARSET(ch, SPACE))
2565798Srrh 			ch = getchar();
2575798Srrh 		ungetc(ch);
2585798Srrh 
2595798Srrh 		switch((op = *lookup(1))->s_tag){
2605798Srrh 		case 0:
2615798Srrh 		case LABELID:
2625798Srrh 			/*
263*13516Srrh 			 *	Its a name... (Labels are subsets of name)
2645798Srrh 			 */
2655798Srrh 			ryylval = (int)op;
2665798Srrh 			val = NAME;
2675798Srrh 			break;
2685798Srrh 		case INST0:
2695798Srrh 		case INSTn:
2705798Srrh 		case IJXXX:
2715798Srrh 			opstruct.Op_popcode = ( (struct instab *)op)->i_popcode;
2725798Srrh 			opstruct.Op_eopcode = ( (struct instab *)op)->i_eopcode;
2735798Srrh 			val = op->s_tag;
2745798Srrh 			break;
2755798Srrh 		default:
2765798Srrh 			ryylval = ( (struct instab *)op)->i_popcode;
2775798Srrh 			val = op->s_tag;
2785798Srrh 			break;
2795798Srrh 		}
2805798Srrh 		goto ret;
2815798Srrh 
2825798Srrh 	case DIG:
2835798Srrh 		/*
28413467Srrh 		 *	restore local inbufptr and inbufcnt
2855798Srrh 		 */
28613467Srrh 		REGTOMEMBUF;
28713467Srrh 		val = number(ch);
28813467Srrh 		MEMTOREGBUF;
2895798Srrh 		/*
2905798Srrh 		 *	yylval or yybignum has been stuffed as a side
2915798Srrh 		 *	effect to number(); get the global yylval
2925798Srrh 		 *	into our fast local copy in case it was an INT.
2935798Srrh 		 */
2945798Srrh 		ryylval = yylval;
2955798Srrh 		goto ret;
2965798Srrh 
2975798Srrh 	case LSH:
2985798Srrh 	case RSH:
2995798Srrh 		/*
3005798Srrh 		 *	We allow the C style operators
3015798Srrh 		 *	<< and >>, as well as < and >
3025798Srrh 		 */
3035798Srrh 		if ( (ch1 = getchar()) != ch)
3045798Srrh 			ungetc(ch1);
3055798Srrh 		val = ryylval;
3065798Srrh 		goto ret;
3075798Srrh 
3085798Srrh 	case MINUS:
3095798Srrh 		if ( (ch = getchar()) =='(')
3105798Srrh 			ryylval=val=MP;
3115798Srrh 		else {
3125798Srrh 			ungetc(ch);
3135798Srrh 			val=MINUS;
3145798Srrh 		}
3155798Srrh 		goto ret;
3165798Srrh 
3175798Srrh 	case SQ:
3185798Srrh 		if ((ryylval = getchar()) == '\n')
3195798Srrh 			scanlineno++;		/*not entirely correct*/
3205798Srrh 		val = INT;
3215798Srrh 		goto ret;
3225798Srrh 
3235798Srrh 	case DQ:
3245798Srrh 	   eatstr:
3255798Srrh 		linescrossed = 0;
326*13516Srrh 		strd.sd_stroff = strfilepos;
327*13516Srrh 		strd.sd_place = STR_FILE;
328*13516Srrh 		for (strd.sd_strlen = 0; /*VOID*/; strd.sd_strlen++){
32913448Srrh 		    switch(ch = getchar()){
33013448Srrh 		    case '"':
33113448Srrh 			goto tailDQ;
33213448Srrh 		    default:
33313448Srrh 		    stuff:
334*13516Srrh 			putc(ch, strfile);
33513448Srrh 			break;
33613448Srrh 		    case '\n':
33713448Srrh 			yywarning("New line in a string constant");
3385798Srrh 			scanlineno++;
3395798Srrh 			linescrossed++;
3405798Srrh 			ch = getchar();
34113448Srrh 			switch(ch){
34213448Srrh 			case EOFCHAR:
343*13516Srrh 				putc('\n', strfile);
3445798Srrh 				ungetc(EOFCHAR);
34513448Srrh 				goto tailDQ;
34613448Srrh 			default:
3475798Srrh 				ungetc(ch);
3485798Srrh 				ch = '\n';
3495798Srrh 				goto stuff;
3505798Srrh 			}
35113448Srrh 			break;
35213448Srrh 
35313448Srrh 		    case '\\':
3545798Srrh 			ch = getchar();		/*skip the '\\'*/
3555798Srrh 			if ( INCHARSET(ch, BSESCAPE)){
3565798Srrh 				switch (ch){
3575798Srrh 				  case 'b':  ch = '\b'; goto stuff;
3585798Srrh 				  case 'f':  ch = '\f'; goto stuff;
3595798Srrh 				  case 'n':  ch = '\n'; goto stuff;
3605798Srrh 				  case 'r':  ch = '\r'; goto stuff;
3615798Srrh 				  case 't':  ch = '\t'; goto stuff;
3625798Srrh 				}
3635798Srrh 			}
36413448Srrh 			if ( !(INCHARSET(ch, OCTDIGIT)) )
36513448Srrh 				goto stuff;
3665798Srrh 			i = 0;
3675798Srrh 			intval = 0;
3685798Srrh 			while ( (i < 3) && (INCHARSET(ch, OCTDIGIT))){
36913448Srrh 				i++;
37013448Srrh 				intval <<= 3;
37113448Srrh 				intval += ch - '0';
3725798Srrh 				ch = getchar();
3735798Srrh 			}
3745798Srrh 			ungetc(ch);
3756558Srrh 			ch = (char)intval;
3765798Srrh 			goto stuff;
37713448Srrh 		    }
3785798Srrh 		}
37913448Srrh 	tailDQ: ;
3805798Srrh 		/*
38113448Srrh 		 *	account for any lines that were crossed
3825798Srrh 		 */
3835798Srrh 		if (linescrossed){
38413448Srrh 			ptoken(bufptr, ILINESKIP);
38513448Srrh 			pint(bufptr, linescrossed);
38613448Srrh 		}
38713448Srrh 		/*
38813462Srrh 		 *	put the string in strtext into the string pool
38913448Srrh 		 *
39013448Srrh 		 *	Cheat: append a trailing null to the string
39113448Srrh 		 *	and then adjust the string length to ignore
39213448Srrh 		 *	the trailing null.  If any STRING client requires
39313448Srrh 		 *	the trailing null, the client can just change STRLEN
39413448Srrh 		 */
39513448Srrh 		val = STRING;
396*13516Srrh 		putc(0, strfile);
397*13516Srrh 		strd.sd_strlen += 1;
398*13516Srrh 		strfilepos += strd.sd_strlen;
399*13516Srrh 		ryylval = (int)savestr(strtext, &strd);
400*13516Srrh 		((struct strdesc *)ryylval)->sd_strlen -= 1;
40113448Srrh 		goto ret;
4025798Srrh 
4035798Srrh 	case BADCHAR:
4045798Srrh 		linescrossed = lineno;
4055798Srrh 		lineno = scanlineno;
4065798Srrh 		yyerror("Illegal character mapped: %d, char read:(octal) %o",
4075798Srrh 			ryylval, ch);
4085798Srrh 		lineno = linescrossed;
4095798Srrh 		val = BADCHAR;
4105798Srrh 		goto ret;
4115798Srrh 
4125798Srrh 	default:
4135798Srrh 		val = ryylval;
4145798Srrh 		goto ret;
4155798Srrh 	}	/*end of the switch*/
4165798Srrh 	/*
4175798Srrh 	 *	here with one token, so stuff it
4185798Srrh 	 */
4195798Srrh    ret:
4205798Srrh 	oval = val;
4215798Srrh 	ptoken(bufptr, val);
4225798Srrh 	switch(val){
4235798Srrh 		case	ILINESKIP:
4245798Srrh 				pint(bufptr, ryylval);
4255798Srrh 				break;
4265798Srrh 		case	SIZESPEC:
4275798Srrh 				pchar(bufptr, ryylval);
4285798Srrh 				break;
4295798Srrh 		case	BFINT:	plong(bufptr, ryylval);
4305798Srrh 				break;
4315798Srrh 		case	INT:	plong(bufptr, ryylval);
4325798Srrh 				break;
4335798Srrh 		case 	BIGNUM:	pnumber(bufptr, yybignum);
4345798Srrh 				break;
43513448Srrh 		case	STRING:	pptr(bufptr, (int)(char *)ryylval);
43613448Srrh 				break;
4375798Srrh 		case	NAME:	pptr(bufptr, (int)(struct symtab *)ryylval);
4385798Srrh 				break;
4395798Srrh 		case	REG:	pchar(bufptr, ryylval);
4405798Srrh 				break;
4415798Srrh 		case	INST0:
4425798Srrh 		case	INSTn:
4435798Srrh 				popcode(bufptr, opstruct);
4445798Srrh 				break;
4455798Srrh 		case 	IJXXX:
4465798Srrh 				popcode(bufptr, opstruct);
4475798Srrh 				pptr(bufptr, (int)(struct symtab *)symalloc());
4485798Srrh 				break;
4495798Srrh 		case	ISTAB:
4505798Srrh 		case	ISTABSTR:
4515798Srrh 		case	ISTABNONE:
4525798Srrh 		case	ISTABDOT:
4535798Srrh 		case	IALIGN:
4545798Srrh 				pptr(bufptr, (int)(struct symtab *)symalloc());
4555798Srrh 				break;
4565798Srrh 	/*
4575798Srrh 	 *	default:
4585798Srrh 	 */
4595798Srrh 	 }
4605798Srrh 	 builtval: ;
4615798Srrh    }			/*end of the while to stuff the buffer*/
4625798Srrh    done:
4635798Srrh 	bufferbox->tok_count = (bytetoktype *)bufptr - &(bufferbox->toks[0]);
4645798Srrh 	/*
4655798Srrh 	 *	This is a real kludge:
4665798Srrh 	 *
4675798Srrh 	 *	We put the last token in the buffer to be  a MINUS
4685798Srrh 	 *	symbol.  This last token will never be picked up
4695798Srrh 	 *	in the normal way, but can be looked at during
4705798Srrh 	 *	a peekahead look that the short circuit expression
4715798Srrh 	 *	evaluator uses to see if an expression is complicated.
4725798Srrh 	 *
4735798Srrh 	 *	Consider the following situation:
4745798Srrh 	 *
4755798Srrh 	 *	.word	45		+	47
4765798Srrh 	 *        buffer 1      |  buffer 0
4775798Srrh 	 *	the peekahead would want to look across the buffer,
4785798Srrh 	 *	but will look in the buffer end zone, see the minus, and
4795798Srrh 	 *	fail.
4805798Srrh 	 */
4815798Srrh 	ptoken(bufptr, MINUS);
48213467Srrh 	REGTOMEMBUF;
4835798Srrh }
484