xref: /csrg-svn/old/as.tahoe/asscan2.c (revision 40591)
1*40591Sbostic /*
2*40591Sbostic  *	Copyright (c) 1982 Regents of the University of California
3*40591Sbostic  */
4*40591Sbostic #ifndef lint
5*40591Sbostic static char sccsid[] = "@(#)asscan2.c 4.14 7/6/83";
6*40591Sbostic #endif not lint
7*40591Sbostic 
8*40591Sbostic #include "asscanl.h"
9*40591Sbostic 
10*40591Sbostic static	inttoktype	oval = NL;
11*40591Sbostic #define	ASINBUFSIZ	4096
12*40591Sbostic char	inbufunget[8];
13*40591Sbostic char	inbuffer[ASINBUFSIZ];
14*40591Sbostic char	*Ginbufptr = inbuffer;
15*40591Sbostic int	Ginbufcnt = 0;
16*40591Sbostic int	scannerhadeof;
17*40591Sbostic 
fillinbuffer()18*40591Sbostic fillinbuffer()
19*40591Sbostic {
20*40591Sbostic 		int	nread;
21*40591Sbostic 		int	goal;
22*40591Sbostic 		int	got;
23*40591Sbostic 
24*40591Sbostic 	nread = 0;
25*40591Sbostic 	if (scannerhadeof == 0){
26*40591Sbostic 		goal = sizeof(inbuffer);
27*40591Sbostic 		do {
28*40591Sbostic 			got = read(stdin->_file, inbuffer + nread, goal);
29*40591Sbostic 			if (got == 0)
30*40591Sbostic 				scannerhadeof = 1;
31*40591Sbostic 			if (got <= 0)
32*40591Sbostic 				break;
33*40591Sbostic 			nread += got;
34*40591Sbostic 			goal -= got;
35*40591Sbostic 		} while (goal);
36*40591Sbostic 	} else {
37*40591Sbostic 		scannerhadeof = 0;
38*40591Sbostic 	}
39*40591Sbostic 	/*
40*40591Sbostic 	 *	getchar assumes that Ginbufcnt and Ginbufptr
41*40591Sbostic 	 *	are adjusted as if one character has been removed
42*40591Sbostic 	 *	from the input.
43*40591Sbostic 	 */
44*40591Sbostic 	if (nread == 0){
45*40591Sbostic 		inbuffer[0] = EOFCHAR;
46*40591Sbostic 		nread = 1;
47*40591Sbostic 	}
48*40591Sbostic 	Ginbufcnt = nread - 1;
49*40591Sbostic 	Ginbufptr = inbuffer + 1;
50*40591Sbostic }
51*40591Sbostic 
52*40591Sbostic scan_dot_s(bufferbox)
53*40591Sbostic 	struct tokbufdesc *bufferbox;
54*40591Sbostic {
55*40591Sbostic 	reg	char	*inbufptr;
56*40591Sbostic 	reg	int	inbufcnt;
57*40591Sbostic 	reg	int	ryylval;	/* local copy of lexical value */
58*40591Sbostic 	extern	int	yylval;		/* global copy of lexical value */
59*40591Sbostic 	reg	int	val;		/* the value returned */
60*40591Sbostic 		int	i;		/* simple counter */
61*40591Sbostic 	reg	char	*rcp;
62*40591Sbostic 		int	ch;		/* treated as a character */
63*40591Sbostic 		int	ch1;		/* shadow value */
64*40591Sbostic 		struct 	symtab	*op;
65*40591Sbostic 	reg	ptrall	bufptr;		/* where to stuff tokens */
66*40591Sbostic 		ptrall	bufub;		/* where not to stuff tokens */
67*40591Sbostic 		long	intval;		/* value of int */
68*40591Sbostic 		int	linescrossed;	/* when doing strings and comments */
69*40591Sbostic 		u_char	opstruct;
70*40591Sbostic 	reg	int	strlg;		/* the length of a string */
71*40591Sbostic 
72*40591Sbostic 	(bytetoktype *)bufptr = (bytetoktype *) & (bufferbox->toks[0]);
73*40591Sbostic 	(bytetoktype *)bufub = &(bufferbox->toks[AVAILTOKS]);
74*40591Sbostic 
75*40591Sbostic 	MEMTOREGBUF;
76*40591Sbostic 	if (newfflag){
77*40591Sbostic 		newfflag = 0;
78*40591Sbostic 		ryylval = (int)savestr(newfname, strlen(newfname)+1, STR_BOTH);
79*40591Sbostic 
80*40591Sbostic 		ptoken(bufptr, IFILE);
81*40591Sbostic 		ptoken(bufptr, STRING);
82*40591Sbostic 		pptr(bufptr, ryylval);
83*40591Sbostic 
84*40591Sbostic 		ptoken(bufptr, ILINENO);
85*40591Sbostic 		ptoken(bufptr, INT);
86*40591Sbostic 		pint(bufptr,  1);
87*40591Sbostic 	}
88*40591Sbostic 
89*40591Sbostic 	while (bufptr < bufub){
90*40591Sbostic    loop:
91*40591Sbostic         switch(ryylval = (type+1)[ch = getchar()]) {
92*40591Sbostic 	case SCANEOF:
93*40591Sbostic 	endoffile: ;
94*40591Sbostic 		inbufptr = 0;
95*40591Sbostic 		ptoken(bufptr, PARSEEOF);
96*40591Sbostic 		goto done;
97*40591Sbostic 
98*40591Sbostic 	case DIV:		/*process C style comments*/
99*40591Sbostic 		if ( (ch = getchar()) == '*') {  /*comment prelude*/
100*40591Sbostic 			int	incomment;
101*40591Sbostic 			linescrossed = 0;
102*40591Sbostic 			incomment = 1;
103*40591Sbostic 			ch = getchar();	/*skip over the * */
104*40591Sbostic 			while(incomment){
105*40591Sbostic 				switch(ch){
106*40591Sbostic 				case '*':
107*40591Sbostic 					ch = getchar();
108*40591Sbostic 					incomment = (ch != '/');
109*40591Sbostic 					break;
110*40591Sbostic 				case '\n':
111*40591Sbostic 					scanlineno++;
112*40591Sbostic 					linescrossed++;
113*40591Sbostic 					ch = getchar();
114*40591Sbostic 					break;
115*40591Sbostic 				case EOFCHAR:
116*40591Sbostic 					goto endoffile;
117*40591Sbostic 				default:
118*40591Sbostic 					ch = getchar();
119*40591Sbostic 					break;
120*40591Sbostic 				}
121*40591Sbostic 			}
122*40591Sbostic 			val = ILINESKIP;
123*40591Sbostic 			ryylval = linescrossed;
124*40591Sbostic 			goto ret;
125*40591Sbostic 		} else {	/*just an ordinary DIV*/
126*40591Sbostic 			ungetc(ch);
127*40591Sbostic 			val = ryylval = DIV;
128*40591Sbostic 			goto ret;
129*40591Sbostic 		}
130*40591Sbostic 	case SH:
131*40591Sbostic 		if (oval == NL){
132*40591Sbostic 			/*
133*40591Sbostic 			 *	Attempt to recognize a C preprocessor
134*40591Sbostic 			 *	style comment '^#[ \t]*[0-9]*[ \t]*".*"
135*40591Sbostic 			 */
136*40591Sbostic 			ch = getchar();	/*bump the #*/
137*40591Sbostic 			while (INCHARSET(ch, SPACE))
138*40591Sbostic 				ch = getchar();/*bump white */
139*40591Sbostic 			if (INCHARSET(ch, DIGIT)){
140*40591Sbostic 				intval = 0;
141*40591Sbostic 				while(INCHARSET(ch, DIGIT)){
142*40591Sbostic 					intval = intval*10 + ch - '0';
143*40591Sbostic 					ch = getchar();
144*40591Sbostic 				}
145*40591Sbostic 				while (INCHARSET(ch, SPACE))
146*40591Sbostic 					ch = getchar();
147*40591Sbostic 				if (ch == '"' || ch == '\n'){
148*40591Sbostic 					ptoken(bufptr, ILINENO);
149*40591Sbostic 					ptoken(bufptr, INT);
150*40591Sbostic 					pint(bufptr, intval - 1);
151*40591Sbostic 					if (ch == '"')
152*40591Sbostic 					{
153*40591Sbostic 						ptoken(bufptr, IFILE);
154*40591Sbostic 					/*
155*40591Sbostic 					 *	The '"' has already been
156*40591Sbostic 					 *	munched
157*40591Sbostic 					 *
158*40591Sbostic 					 *	eatstr will not eat
159*40591Sbostic 					 *	the trailing \n, so
160*40591Sbostic 					 *	it is given to the parser
161*40591Sbostic 					 *	and counted.
162*40591Sbostic 					 */
163*40591Sbostic 					goto eatstr;
164*40591Sbostic 					}
165*40591Sbostic 				}
166*40591Sbostic 			}
167*40591Sbostic 		}
168*40591Sbostic 		/*
169*40591Sbostic 		 *	Well, its just an ordinary decadent comment
170*40591Sbostic 		 */
171*40591Sbostic 		while ((ch != '\n') && (ch != EOFCHAR))
172*40591Sbostic 			ch = getchar();
173*40591Sbostic 		if (ch == EOFCHAR)
174*40591Sbostic 			goto endoffile;
175*40591Sbostic 		val = ryylval = oval = NL;
176*40591Sbostic 		scanlineno++;
177*40591Sbostic 		goto ret;
178*40591Sbostic 
179*40591Sbostic 	case NL:
180*40591Sbostic 		scanlineno++;
181*40591Sbostic 		val = ryylval;
182*40591Sbostic 		goto ret;
183*40591Sbostic 
184*40591Sbostic 	case SP:
185*40591Sbostic 		oval = SP;	/*invalidate ^# meta comments*/
186*40591Sbostic 		goto loop;
187*40591Sbostic 
188*40591Sbostic 	case REGOP:		/* % , could be used as modulo, or register*/
189*40591Sbostic 		ch = getchar();
190*40591Sbostic 		if (INCHARSET(ch, DIGIT)){
191*40591Sbostic 			ryylval = ch-'0';
192*40591Sbostic 			if (ch=='1') {
193*40591Sbostic 				if (INCHARSET( (ch = getchar()), REGDIGIT))
194*40591Sbostic 					ryylval = 10+ch-'0';
195*40591Sbostic 				else
196*40591Sbostic 					ungetc(ch);
197*40591Sbostic 			}
198*40591Sbostic 			/*
199*40591Sbostic 			 *	God only knows what the original author
200*40591Sbostic 			 *	wanted this undocumented feature to
201*40591Sbostic 			 *	do.
202*40591Sbostic 			 *		%5++ is really  r7
203*40591Sbostic 			 */
204*40591Sbostic 			while(INCHARSET( (ch = getchar()), SIGN)) {
205*40591Sbostic 				if (ch=='+')
206*40591Sbostic 					ryylval++;
207*40591Sbostic 				else
208*40591Sbostic 					ryylval--;
209*40591Sbostic 			}
210*40591Sbostic 			ungetc(ch);
211*40591Sbostic 			val = REG;
212*40591Sbostic 		} else {
213*40591Sbostic 			ungetc(ch);
214*40591Sbostic 			val = REGOP;
215*40591Sbostic 		}
216*40591Sbostic 		goto ret;
217*40591Sbostic 
218*40591Sbostic 	case ALPH:
219*40591Sbostic 		ch1 = ch;
220*40591Sbostic 		if (INCHARSET(ch, SZSPECBEGIN)){
221*40591Sbostic 			if( (ch = getchar()) == '`' || ch == '^'){
222*40591Sbostic 				ch1 |= 0100;	/*convert to lower*/
223*40591Sbostic 				switch(ch1){
224*40591Sbostic 				case 'b':	ryylval = 1;	break;
225*40591Sbostic 				case 'w':	ryylval = 2;	break;
226*40591Sbostic 				case 'l':	ryylval = 4;	break;
227*40591Sbostic 				default:	ryylval = d124;	break;
228*40591Sbostic 				}
229*40591Sbostic 				val = SIZESPEC;
230*40591Sbostic 				goto ret;
231*40591Sbostic 			} else {
232*40591Sbostic 				ungetc(ch);
233*40591Sbostic 				ch = ch1;	/*restore first character*/
234*40591Sbostic 			}
235*40591Sbostic 		}
236*40591Sbostic 		rcp = yytext;
237*40591Sbostic 		do {
238*40591Sbostic 			if (rcp < &yytext[NCPName])
239*40591Sbostic 				*rcp++ = ch;
240*40591Sbostic 		} while (INCHARSET ( (ch = getchar()), ALPHA | DIGIT));
241*40591Sbostic 		*rcp = '\0';
242*40591Sbostic 		while (INCHARSET(ch, SPACE))
243*40591Sbostic 			ch = getchar();
244*40591Sbostic 		ungetc(ch);
245*40591Sbostic 
246*40591Sbostic 		switch((op = *lookup(1))->s_tag){
247*40591Sbostic 		case 0:
248*40591Sbostic 		case LABELID:
249*40591Sbostic 			/*
250*40591Sbostic 			 *	Its a name... (Labels are subsets of name)
251*40591Sbostic 			 */
252*40591Sbostic 			ryylval = (int)op;
253*40591Sbostic 			val = NAME;
254*40591Sbostic 			break;
255*40591Sbostic 		case INST0:
256*40591Sbostic 		case INSTn:
257*40591Sbostic 		case IJXXX:
258*40591Sbostic 			opstruct = ( (struct instab *)op)->i_opcode;
259*40591Sbostic 			val = op->s_tag;
260*40591Sbostic 			break;
261*40591Sbostic 		default:
262*40591Sbostic 			ryylval = ( (struct instab *)op)->i_opcode;
263*40591Sbostic 			val = op->s_tag;
264*40591Sbostic 			break;
265*40591Sbostic 		}
266*40591Sbostic 		goto ret;
267*40591Sbostic 
268*40591Sbostic 	case DIG:
269*40591Sbostic 		/*
270*40591Sbostic 		 *	restore local inbufptr and inbufcnt
271*40591Sbostic 		 */
272*40591Sbostic 		REGTOMEMBUF;
273*40591Sbostic 		val = number(ch);
274*40591Sbostic 		MEMTOREGBUF;
275*40591Sbostic 		/*
276*40591Sbostic 		 *	yylval or yybignum has been stuffed as a side
277*40591Sbostic 		 *	effect to number(); get the global yylval
278*40591Sbostic 		 *	into our fast local copy in case it was an INT.
279*40591Sbostic 		 */
280*40591Sbostic 		ryylval = yylval;
281*40591Sbostic 		goto ret;
282*40591Sbostic 
283*40591Sbostic 	case LSH:
284*40591Sbostic 	case RSH:
285*40591Sbostic 		/*
286*40591Sbostic 		 *	We allow the C style operators
287*40591Sbostic 		 *	<< and >>, as well as < and >
288*40591Sbostic 		 */
289*40591Sbostic 		if ( (ch1 = getchar()) != ch)
290*40591Sbostic 			ungetc(ch1);
291*40591Sbostic 		val = ryylval;
292*40591Sbostic 		goto ret;
293*40591Sbostic 
294*40591Sbostic 	case MINUS:
295*40591Sbostic 		if ( (ch = getchar()) =='(')
296*40591Sbostic 			ryylval=val=MP;
297*40591Sbostic 		else {
298*40591Sbostic 			ungetc(ch);
299*40591Sbostic 			val=MINUS;
300*40591Sbostic 		}
301*40591Sbostic 		goto ret;
302*40591Sbostic 
303*40591Sbostic 	case SQ:
304*40591Sbostic 		if ((ryylval = getchar()) == '\n')
305*40591Sbostic 			scanlineno++;		/*not entirely correct*/
306*40591Sbostic 		val = INT;
307*40591Sbostic 		goto ret;
308*40591Sbostic 
309*40591Sbostic 	case DQ:
310*40591Sbostic 	   eatstr:
311*40591Sbostic 		linescrossed = 0;
312*40591Sbostic 		for (strlg = 0; /*VOID*/; strlg++){
313*40591Sbostic 		    switch(ch = getchar()){
314*40591Sbostic 		    case '"':
315*40591Sbostic 			goto tailDQ;
316*40591Sbostic 		    default:
317*40591Sbostic 		    stuff:
318*40591Sbostic 			putc(ch, strfile);
319*40591Sbostic 			break;
320*40591Sbostic 		    case '\n':
321*40591Sbostic 			yywarning("New line in a string constant");
322*40591Sbostic 			scanlineno++;
323*40591Sbostic 			linescrossed++;
324*40591Sbostic 			ch = getchar();
325*40591Sbostic 			switch(ch){
326*40591Sbostic 			case EOFCHAR:
327*40591Sbostic 				putc('\n', strfile);
328*40591Sbostic 				ungetc(EOFCHAR);
329*40591Sbostic 				goto tailDQ;
330*40591Sbostic 			default:
331*40591Sbostic 				ungetc(ch);
332*40591Sbostic 				ch = '\n';
333*40591Sbostic 				goto stuff;
334*40591Sbostic 			}
335*40591Sbostic 			break;
336*40591Sbostic 
337*40591Sbostic 		    case '\\':
338*40591Sbostic 			ch = getchar();		/*skip the '\\'*/
339*40591Sbostic 			if ( INCHARSET(ch, BSESCAPE)){
340*40591Sbostic 				switch (ch){
341*40591Sbostic 				  case 'b':  ch = '\b'; goto stuff;
342*40591Sbostic 				  case 'f':  ch = '\f'; goto stuff;
343*40591Sbostic 				  case 'n':  ch = '\n'; goto stuff;
344*40591Sbostic 				  case 'r':  ch = '\r'; goto stuff;
345*40591Sbostic 				  case 't':  ch = '\t'; goto stuff;
346*40591Sbostic 				}
347*40591Sbostic 			}
348*40591Sbostic 			if ( !(INCHARSET(ch, OCTDIGIT)) )
349*40591Sbostic 				goto stuff;
350*40591Sbostic 			i = 0;
351*40591Sbostic 			intval = 0;
352*40591Sbostic 			while ( (i < 3) && (INCHARSET(ch, OCTDIGIT))){
353*40591Sbostic 				i++;
354*40591Sbostic 				intval <<= 3;
355*40591Sbostic 				intval += ch - '0';
356*40591Sbostic 				ch = getchar();
357*40591Sbostic 			}
358*40591Sbostic 			ungetc(ch);
359*40591Sbostic 			ch = (char)intval;
360*40591Sbostic 			goto stuff;
361*40591Sbostic 		    }
362*40591Sbostic 		}
363*40591Sbostic 	tailDQ: ;
364*40591Sbostic 		/*
365*40591Sbostic 		 *	account for any lines that were crossed
366*40591Sbostic 		 */
367*40591Sbostic 		if (linescrossed){
368*40591Sbostic 			ptoken(bufptr, ILINESKIP);
369*40591Sbostic 			pint(bufptr, linescrossed);
370*40591Sbostic 		}
371*40591Sbostic 		/*
372*40591Sbostic 		 *	Cheat: append a trailing null to the string
373*40591Sbostic 		 *	and then adjust the string length to ignore
374*40591Sbostic 		 *	the trailing null.  If any STRING client requires
375*40591Sbostic 		 *	the trailing null, the client can just change STRLEN
376*40591Sbostic 		 */
377*40591Sbostic 		putc(0, strfile);
378*40591Sbostic 		ryylval = (int)savestr((char *)0, strlg + 1, STR_FILE);
379*40591Sbostic 		val = STRING;
380*40591Sbostic 		((struct strdesc *)ryylval)->sd_strlen -= 1;
381*40591Sbostic 		goto ret;
382*40591Sbostic 
383*40591Sbostic 	case BADCHAR:
384*40591Sbostic 		linescrossed = lineno;
385*40591Sbostic 		lineno = scanlineno;
386*40591Sbostic 		yyerror("Illegal character mapped: %d, char read:(octal) %o",
387*40591Sbostic 			ryylval, ch);
388*40591Sbostic 		lineno = linescrossed;
389*40591Sbostic 		val = BADCHAR;
390*40591Sbostic 		goto ret;
391*40591Sbostic 
392*40591Sbostic 	default:
393*40591Sbostic 		val = ryylval;
394*40591Sbostic 		goto ret;
395*40591Sbostic 	}	/*end of the switch*/
396*40591Sbostic 	/*
397*40591Sbostic 	 *	here with one token, so stuff it
398*40591Sbostic 	 */
399*40591Sbostic    ret:
400*40591Sbostic 	oval = val;
401*40591Sbostic 	ptoken(bufptr, val);
402*40591Sbostic 	switch(val){
403*40591Sbostic 		case	ILINESKIP:
404*40591Sbostic 				pint(bufptr, ryylval);
405*40591Sbostic 				break;
406*40591Sbostic 		case	SIZESPEC:
407*40591Sbostic 				pchar(bufptr, ryylval);
408*40591Sbostic 				break;
409*40591Sbostic 		case	BFINT:	plong(bufptr, ryylval);
410*40591Sbostic 				break;
411*40591Sbostic 		case	INT:	plong(bufptr, ryylval);
412*40591Sbostic 				break;
413*40591Sbostic 		case 	BIGNUM:	pnumber(bufptr, yybignum);
414*40591Sbostic 				break;
415*40591Sbostic 		case	STRING:	pptr(bufptr, (int)(char *)ryylval);
416*40591Sbostic 				break;
417*40591Sbostic 		case	NAME:	pptr(bufptr, (int)(struct symtab *)ryylval);
418*40591Sbostic 				break;
419*40591Sbostic 		case	REG:	pchar(bufptr, ryylval);
420*40591Sbostic 				break;
421*40591Sbostic 		case	INST0:
422*40591Sbostic 		case	INSTn:
423*40591Sbostic 				popcode(bufptr, opstruct);
424*40591Sbostic 				break;
425*40591Sbostic 		case 	IJXXX:
426*40591Sbostic 				popcode(bufptr, opstruct);
427*40591Sbostic 				pptr(bufptr, (int)(struct symtab *)symalloc());
428*40591Sbostic 				break;
429*40591Sbostic 		case	ISTAB:
430*40591Sbostic 		case	ISTABSTR:
431*40591Sbostic 		case	ISTABNONE:
432*40591Sbostic 		case	ISTABDOT:
433*40591Sbostic 		case	IALIGN:
434*40591Sbostic 				pptr(bufptr, (int)(struct symtab *)symalloc());
435*40591Sbostic 				break;
436*40591Sbostic 	/*
437*40591Sbostic 	 *	default:
438*40591Sbostic 	 */
439*40591Sbostic 	 }
440*40591Sbostic 	 builtval: ;
441*40591Sbostic    }			/*end of the while to stuff the buffer*/
442*40591Sbostic    done:
443*40591Sbostic 	bufferbox->tok_count = (bytetoktype *)bufptr - &(bufferbox->toks[0]);
444*40591Sbostic 	/*
445*40591Sbostic 	 *	This is a real kludge:
446*40591Sbostic 	 *
447*40591Sbostic 	 *	We put the last token in the buffer to be  a MINUS
448*40591Sbostic 	 *	symbol.  This last token will never be picked up
449*40591Sbostic 	 *	in the normal way, but can be looked at during
450*40591Sbostic 	 *	a peekahead look that the short circuit expression
451*40591Sbostic 	 *	evaluator uses to see if an expression is complicated.
452*40591Sbostic 	 *
453*40591Sbostic 	 *	Consider the following situation:
454*40591Sbostic 	 *
455*40591Sbostic 	 *	.word	45		+	47
456*40591Sbostic 	 *        buffer 1      |  buffer 0
457*40591Sbostic 	 *	the peekahead would want to look across the buffer,
458*40591Sbostic 	 *	but will look in the buffer end zone, see the minus, and
459*40591Sbostic 	 *	fail.
460*40591Sbostic 	 */
461*40591Sbostic 	ptoken(bufptr, MINUS);
462*40591Sbostic 	REGTOMEMBUF;
463*40591Sbostic }
464