xref: /csrg-svn/old/as.vax/asscan2.c (revision 13575)
1 /*
2  *	Copyright (c) 1982 Regents of the University of California
3  */
4 #ifndef lint
5 static char sccsid[] = "@(#)asscan2.c 4.13 07/01/83";
6 #endif not lint
7 
8 #include "asscanl.h"
9 
10 static	inttoktype	oval = NL;
11 #define	ASINBUFSIZ	4096
12 char	inbufunget[8];
13 char	inbuffer[ASINBUFSIZ];
14 char	*Ginbufptr = inbuffer;
15 int	Ginbufcnt = 0;
16 
17 fillinbuffer()
18 {
19 		int	nread;
20 	static	int	hadeof;
21 		int	goal;
22 		int	got;
23 
24 	nread = 0;
25 	if (hadeof == 0){
26 		goal = sizeof(inbuffer);
27 		do {
28 			got = read(stdin->_file, inbuffer + nread, goal);
29 			if (got == 0)
30 				hadeof = 1;
31 			if (got <= 0)
32 				break;
33 			nread += got;
34 			goal -= got;
35 		} while (goal);
36 	}
37 	/*
38 	 *	getchar assumes that Ginbufcnt and Ginbufptr
39 	 *	are adjusted as if one character has been removed
40 	 *	from the input.
41 	 */
42 	if (nread == 0){
43 		inbuffer[0] = EOFCHAR;
44 		nread = 1;
45 	}
46 	Ginbufcnt = nread - 1;
47 	Ginbufptr = inbuffer + 1;
48 }
49 
50 scan_dot_s(bufferbox)
51 	struct tokbufdesc *bufferbox;
52 {
53 	reg	char	*inbufptr;
54 	reg	int	inbufcnt;
55 	reg	int	ryylval;	/* local copy of lexical value */
56 	extern	int	yylval;		/* global copy of lexical value */
57 	reg	int	val;		/* the value returned */
58 		int	i;		/* simple counter */
59 	reg	char	*rcp;
60 		int	ch;		/* treated as a character */
61 		int	ch1;		/* shadow value */
62 		struct 	symtab	*op;
63 		ptrall	lgbackpatch;	/* where to stuff a string length */
64 	reg	ptrall	bufptr;		/* where to stuff tokens */
65 		ptrall	bufub;		/* where not to stuff tokens */
66 		long	intval;		/* value of int */
67 		int	linescrossed;	/* when doing strings and comments */
68 		struct	Opcode		opstruct;
69 	reg	int	strlg;		/* the length of a string */
70 
71 	(bytetoktype *)bufptr = (bytetoktype *) & (bufferbox->toks[0]);
72 	(bytetoktype *)bufub = &(bufferbox->toks[AVAILTOKS]);
73 
74 	MEMTOREGBUF;
75 	if (newfflag){
76 		newfflag = 0;
77 		ryylval = (int)savestr(newfname, strlen(newfname)+1, STR_BOTH);
78 
79 		ptoken(bufptr, IFILE);
80 		ptoken(bufptr, STRING);
81 		pptr(bufptr, ryylval);
82 
83 		ptoken(bufptr, ILINENO);
84 		ptoken(bufptr, INT);
85 		pint(bufptr,  1);
86 	}
87 
88 	while (bufptr < bufub){
89    loop:
90         switch(ryylval = (type+1)[ch = getchar()]) {
91 	case SCANEOF:
92 	endoffile: ;
93 		inbufptr = 0;
94 		ptoken(bufptr, PARSEEOF);
95 		goto done;
96 
97 	case DIV:		/*process C style comments*/
98 		if ( (ch = getchar()) == '*') {  /*comment prelude*/
99 			int	incomment;
100 			linescrossed = 0;
101 			incomment = 1;
102 			ch = getchar();	/*skip over the * */
103 			while(incomment){
104 				switch(ch){
105 				case '*':
106 					ch = getchar();
107 					incomment = (ch != '/');
108 					break;
109 				case '\n':
110 					scanlineno++;
111 					linescrossed++;
112 					ch = getchar();
113 					break;
114 				case EOFCHAR:
115 					goto endoffile;
116 				default:
117 					ch = getchar();
118 					break;
119 				}
120 			}
121 			val = ILINESKIP;
122 			ryylval = linescrossed;
123 			goto ret;
124 		} else {	/*just an ordinary DIV*/
125 			ungetc(ch);
126 			val = ryylval = DIV;
127 			goto ret;
128 		}
129 	case SH:
130 		if (oval == NL){
131 			/*
132 			 *	Attempt to recognize a C preprocessor
133 			 *	style comment '^#[ \t]*[0-9]*[ \t]*".*"
134 			 */
135 			ch = getchar();	/*bump the #*/
136 			while (INCHARSET(ch, SPACE))
137 				ch = getchar();/*bump white */
138 			if (INCHARSET(ch, DIGIT)){
139 				intval = 0;
140 				while(INCHARSET(ch, DIGIT)){
141 					intval = intval*10 + ch - '0';
142 					ch = getchar();
143 				}
144 				while (INCHARSET(ch, SPACE))
145 					ch = getchar();
146 				if (ch == '"'){
147 					ptoken(bufptr, ILINENO);
148 					ptoken(bufptr, INT);
149 					pint(bufptr, intval - 1);
150 					ptoken(bufptr, IFILE);
151 					/*
152 					 *	The '"' has already been
153 					 *	munched
154 					 *
155 					 *	eatstr will not eat
156 					 *	the trailing \n, so
157 					 *	it is given to the parser
158 					 *	and counted.
159 					 */
160 					goto eatstr;
161 				}
162 			}
163 		}
164 		/*
165 		 *	Well, its just an ordinary decadent comment
166 		 */
167 		while ((ch != '\n') && (ch != EOFCHAR))
168 			ch = getchar();
169 		if (ch == EOFCHAR)
170 			goto endoffile;
171 		val = ryylval = oval = NL;
172 		scanlineno++;
173 		goto ret;
174 
175 	case NL:
176 		scanlineno++;
177 		val = ryylval;
178 		goto ret;
179 
180 	case SP:
181 		oval = SP;	/*invalidate ^# meta comments*/
182 		goto loop;
183 
184 	case REGOP:		/* % , could be used as modulo, or register*/
185 		ch = getchar();
186 		if (INCHARSET(ch, DIGIT)){
187 			ryylval = ch-'0';
188 			if (ch=='1') {
189 				if (INCHARSET( (ch = getchar()), REGDIGIT))
190 					ryylval = 10+ch-'0';
191 				else
192 					ungetc(ch);
193 			}
194 			/*
195 			 *	God only knows what the original author
196 			 *	wanted this undocumented feature to
197 			 *	do.
198 			 *		%5++ is really  r7
199 			 */
200 			while(INCHARSET( (ch = getchar()), SIGN)) {
201 				if (ch=='+')
202 					ryylval++;
203 				else
204 					ryylval--;
205 			}
206 			ungetc(ch);
207 			val = REG;
208 		} else {
209 			ungetc(ch);
210 			val = REGOP;
211 		}
212 		goto ret;
213 
214 	case ALPH:
215 		ch1 = ch;
216 		if (INCHARSET(ch, SZSPECBEGIN)){
217 			if( (ch = getchar()) == '`' || ch == '^'){
218 				ch1 |= 0100;	/*convert to lower*/
219 				switch(ch1){
220 				case 'b':	ryylval = 1;	break;
221 				case 'w':	ryylval = 2;	break;
222 				case 'l':	ryylval = 4;	break;
223 				default:	ryylval = d124;	break;
224 				}
225 				val = SIZESPEC;
226 				goto ret;
227 			} else {
228 				ungetc(ch);
229 				ch = ch1;	/*restore first character*/
230 			}
231 		}
232 		rcp = yytext;
233 		do {
234 			if (rcp < &yytext[NCPName])
235 				*rcp++ = ch;
236 		} while (INCHARSET ( (ch = getchar()), ALPHA | DIGIT));
237 		*rcp = '\0';
238 		while (INCHARSET(ch, SPACE))
239 			ch = getchar();
240 		ungetc(ch);
241 
242 		switch((op = *lookup(1))->s_tag){
243 		case 0:
244 		case LABELID:
245 			/*
246 			 *	Its a name... (Labels are subsets of name)
247 			 */
248 			ryylval = (int)op;
249 			val = NAME;
250 			break;
251 		case INST0:
252 		case INSTn:
253 		case IJXXX:
254 			opstruct.Op_popcode = ( (struct instab *)op)->i_popcode;
255 			opstruct.Op_eopcode = ( (struct instab *)op)->i_eopcode;
256 			val = op->s_tag;
257 			break;
258 		default:
259 			ryylval = ( (struct instab *)op)->i_popcode;
260 			val = op->s_tag;
261 			break;
262 		}
263 		goto ret;
264 
265 	case DIG:
266 		/*
267 		 *	restore local inbufptr and inbufcnt
268 		 */
269 		REGTOMEMBUF;
270 		val = number(ch);
271 		MEMTOREGBUF;
272 		/*
273 		 *	yylval or yybignum has been stuffed as a side
274 		 *	effect to number(); get the global yylval
275 		 *	into our fast local copy in case it was an INT.
276 		 */
277 		ryylval = yylval;
278 		goto ret;
279 
280 	case LSH:
281 	case RSH:
282 		/*
283 		 *	We allow the C style operators
284 		 *	<< and >>, as well as < and >
285 		 */
286 		if ( (ch1 = getchar()) != ch)
287 			ungetc(ch1);
288 		val = ryylval;
289 		goto ret;
290 
291 	case MINUS:
292 		if ( (ch = getchar()) =='(')
293 			ryylval=val=MP;
294 		else {
295 			ungetc(ch);
296 			val=MINUS;
297 		}
298 		goto ret;
299 
300 	case SQ:
301 		if ((ryylval = getchar()) == '\n')
302 			scanlineno++;		/*not entirely correct*/
303 		val = INT;
304 		goto ret;
305 
306 	case DQ:
307 	   eatstr:
308 		linescrossed = 0;
309 		for (strlg = 0; /*VOID*/; strlg++){
310 		    switch(ch = getchar()){
311 		    case '"':
312 			goto tailDQ;
313 		    default:
314 		    stuff:
315 			putc(ch, strfile);
316 			break;
317 		    case '\n':
318 			yywarning("New line in a string constant");
319 			scanlineno++;
320 			linescrossed++;
321 			ch = getchar();
322 			switch(ch){
323 			case EOFCHAR:
324 				putc('\n', strfile);
325 				ungetc(EOFCHAR);
326 				goto tailDQ;
327 			default:
328 				ungetc(ch);
329 				ch = '\n';
330 				goto stuff;
331 			}
332 			break;
333 
334 		    case '\\':
335 			ch = getchar();		/*skip the '\\'*/
336 			if ( INCHARSET(ch, BSESCAPE)){
337 				switch (ch){
338 				  case 'b':  ch = '\b'; goto stuff;
339 				  case 'f':  ch = '\f'; goto stuff;
340 				  case 'n':  ch = '\n'; goto stuff;
341 				  case 'r':  ch = '\r'; goto stuff;
342 				  case 't':  ch = '\t'; goto stuff;
343 				}
344 			}
345 			if ( !(INCHARSET(ch, OCTDIGIT)) )
346 				goto stuff;
347 			i = 0;
348 			intval = 0;
349 			while ( (i < 3) && (INCHARSET(ch, OCTDIGIT))){
350 				i++;
351 				intval <<= 3;
352 				intval += ch - '0';
353 				ch = getchar();
354 			}
355 			ungetc(ch);
356 			ch = (char)intval;
357 			goto stuff;
358 		    }
359 		}
360 	tailDQ: ;
361 		/*
362 		 *	account for any lines that were crossed
363 		 */
364 		if (linescrossed){
365 			ptoken(bufptr, ILINESKIP);
366 			pint(bufptr, linescrossed);
367 		}
368 		/*
369 		 *	Cheat: append a trailing null to the string
370 		 *	and then adjust the string length to ignore
371 		 *	the trailing null.  If any STRING client requires
372 		 *	the trailing null, the client can just change STRLEN
373 		 */
374 		putc(0, strfile);
375 		ryylval = (int)savestr((char *)0, strlg + 1, STR_FILE);
376 		val = STRING;
377 		((struct strdesc *)ryylval)->sd_strlen -= 1;
378 		goto ret;
379 
380 	case BADCHAR:
381 		linescrossed = lineno;
382 		lineno = scanlineno;
383 		yyerror("Illegal character mapped: %d, char read:(octal) %o",
384 			ryylval, ch);
385 		lineno = linescrossed;
386 		val = BADCHAR;
387 		goto ret;
388 
389 	default:
390 		val = ryylval;
391 		goto ret;
392 	}	/*end of the switch*/
393 	/*
394 	 *	here with one token, so stuff it
395 	 */
396    ret:
397 	oval = val;
398 	ptoken(bufptr, val);
399 	switch(val){
400 		case	ILINESKIP:
401 				pint(bufptr, ryylval);
402 				break;
403 		case	SIZESPEC:
404 				pchar(bufptr, ryylval);
405 				break;
406 		case	BFINT:	plong(bufptr, ryylval);
407 				break;
408 		case	INT:	plong(bufptr, ryylval);
409 				break;
410 		case 	BIGNUM:	pnumber(bufptr, yybignum);
411 				break;
412 		case	STRING:	pptr(bufptr, (int)(char *)ryylval);
413 				break;
414 		case	NAME:	pptr(bufptr, (int)(struct symtab *)ryylval);
415 				break;
416 		case	REG:	pchar(bufptr, ryylval);
417 				break;
418 		case	INST0:
419 		case	INSTn:
420 				popcode(bufptr, opstruct);
421 				break;
422 		case 	IJXXX:
423 				popcode(bufptr, opstruct);
424 				pptr(bufptr, (int)(struct symtab *)symalloc());
425 				break;
426 		case	ISTAB:
427 		case	ISTABSTR:
428 		case	ISTABNONE:
429 		case	ISTABDOT:
430 		case	IALIGN:
431 				pptr(bufptr, (int)(struct symtab *)symalloc());
432 				break;
433 	/*
434 	 *	default:
435 	 */
436 	 }
437 	 builtval: ;
438    }			/*end of the while to stuff the buffer*/
439    done:
440 	bufferbox->tok_count = (bytetoktype *)bufptr - &(bufferbox->toks[0]);
441 	/*
442 	 *	This is a real kludge:
443 	 *
444 	 *	We put the last token in the buffer to be  a MINUS
445 	 *	symbol.  This last token will never be picked up
446 	 *	in the normal way, but can be looked at during
447 	 *	a peekahead look that the short circuit expression
448 	 *	evaluator uses to see if an expression is complicated.
449 	 *
450 	 *	Consider the following situation:
451 	 *
452 	 *	.word	45		+	47
453 	 *        buffer 1      |  buffer 0
454 	 *	the peekahead would want to look across the buffer,
455 	 *	but will look in the buffer end zone, see the minus, and
456 	 *	fail.
457 	 */
458 	ptoken(bufptr, MINUS);
459 	REGTOMEMBUF;
460 }
461