xref: /csrg-svn/old/as.vax/asscan2.c (revision 13808)
1 /*
2  *	Copyright (c) 1982 Regents of the University of California
3  */
4 #ifndef lint
5 static char sccsid[] = "@(#)asscan2.c 4.14 07/06/83";
6 #endif not lint
7 
8 #include "asscanl.h"
9 
10 static	inttoktype	oval = NL;
11 #define	ASINBUFSIZ	4096
12 char	inbufunget[8];
13 char	inbuffer[ASINBUFSIZ];
14 char	*Ginbufptr = inbuffer;
15 int	Ginbufcnt = 0;
16 int	scannerhadeof;
17 
18 fillinbuffer()
19 {
20 		int	nread;
21 		int	goal;
22 		int	got;
23 
24 	nread = 0;
25 	if (scannerhadeof == 0){
26 		goal = sizeof(inbuffer);
27 		do {
28 			got = read(stdin->_file, inbuffer + nread, goal);
29 			if (got == 0)
30 				scannerhadeof = 1;
31 			if (got <= 0)
32 				break;
33 			nread += got;
34 			goal -= got;
35 		} while (goal);
36 	} else {
37 		scannerhadeof = 0;
38 	}
39 	/*
40 	 *	getchar assumes that Ginbufcnt and Ginbufptr
41 	 *	are adjusted as if one character has been removed
42 	 *	from the input.
43 	 */
44 	if (nread == 0){
45 		inbuffer[0] = EOFCHAR;
46 		nread = 1;
47 	}
48 	Ginbufcnt = nread - 1;
49 	Ginbufptr = inbuffer + 1;
50 }
51 
52 scan_dot_s(bufferbox)
53 	struct tokbufdesc *bufferbox;
54 {
55 	reg	char	*inbufptr;
56 	reg	int	inbufcnt;
57 	reg	int	ryylval;	/* local copy of lexical value */
58 	extern	int	yylval;		/* global copy of lexical value */
59 	reg	int	val;		/* the value returned */
60 		int	i;		/* simple counter */
61 	reg	char	*rcp;
62 		int	ch;		/* treated as a character */
63 		int	ch1;		/* shadow value */
64 		struct 	symtab	*op;
65 		ptrall	lgbackpatch;	/* where to stuff a string length */
66 	reg	ptrall	bufptr;		/* where to stuff tokens */
67 		ptrall	bufub;		/* where not to stuff tokens */
68 		long	intval;		/* value of int */
69 		int	linescrossed;	/* when doing strings and comments */
70 		struct	Opcode		opstruct;
71 	reg	int	strlg;		/* the length of a string */
72 
73 	(bytetoktype *)bufptr = (bytetoktype *) & (bufferbox->toks[0]);
74 	(bytetoktype *)bufub = &(bufferbox->toks[AVAILTOKS]);
75 
76 	MEMTOREGBUF;
77 	if (newfflag){
78 		newfflag = 0;
79 		ryylval = (int)savestr(newfname, strlen(newfname)+1, STR_BOTH);
80 
81 		ptoken(bufptr, IFILE);
82 		ptoken(bufptr, STRING);
83 		pptr(bufptr, ryylval);
84 
85 		ptoken(bufptr, ILINENO);
86 		ptoken(bufptr, INT);
87 		pint(bufptr,  1);
88 	}
89 
90 	while (bufptr < bufub){
91    loop:
92         switch(ryylval = (type+1)[ch = getchar()]) {
93 	case SCANEOF:
94 	endoffile: ;
95 		inbufptr = 0;
96 		ptoken(bufptr, PARSEEOF);
97 		goto done;
98 
99 	case DIV:		/*process C style comments*/
100 		if ( (ch = getchar()) == '*') {  /*comment prelude*/
101 			int	incomment;
102 			linescrossed = 0;
103 			incomment = 1;
104 			ch = getchar();	/*skip over the * */
105 			while(incomment){
106 				switch(ch){
107 				case '*':
108 					ch = getchar();
109 					incomment = (ch != '/');
110 					break;
111 				case '\n':
112 					scanlineno++;
113 					linescrossed++;
114 					ch = getchar();
115 					break;
116 				case EOFCHAR:
117 					goto endoffile;
118 				default:
119 					ch = getchar();
120 					break;
121 				}
122 			}
123 			val = ILINESKIP;
124 			ryylval = linescrossed;
125 			goto ret;
126 		} else {	/*just an ordinary DIV*/
127 			ungetc(ch);
128 			val = ryylval = DIV;
129 			goto ret;
130 		}
131 	case SH:
132 		if (oval == NL){
133 			/*
134 			 *	Attempt to recognize a C preprocessor
135 			 *	style comment '^#[ \t]*[0-9]*[ \t]*".*"
136 			 */
137 			ch = getchar();	/*bump the #*/
138 			while (INCHARSET(ch, SPACE))
139 				ch = getchar();/*bump white */
140 			if (INCHARSET(ch, DIGIT)){
141 				intval = 0;
142 				while(INCHARSET(ch, DIGIT)){
143 					intval = intval*10 + ch - '0';
144 					ch = getchar();
145 				}
146 				while (INCHARSET(ch, SPACE))
147 					ch = getchar();
148 				if (ch == '"'){
149 					ptoken(bufptr, ILINENO);
150 					ptoken(bufptr, INT);
151 					pint(bufptr, intval - 1);
152 					ptoken(bufptr, IFILE);
153 					/*
154 					 *	The '"' has already been
155 					 *	munched
156 					 *
157 					 *	eatstr will not eat
158 					 *	the trailing \n, so
159 					 *	it is given to the parser
160 					 *	and counted.
161 					 */
162 					goto eatstr;
163 				}
164 			}
165 		}
166 		/*
167 		 *	Well, its just an ordinary decadent comment
168 		 */
169 		while ((ch != '\n') && (ch != EOFCHAR))
170 			ch = getchar();
171 		if (ch == EOFCHAR)
172 			goto endoffile;
173 		val = ryylval = oval = NL;
174 		scanlineno++;
175 		goto ret;
176 
177 	case NL:
178 		scanlineno++;
179 		val = ryylval;
180 		goto ret;
181 
182 	case SP:
183 		oval = SP;	/*invalidate ^# meta comments*/
184 		goto loop;
185 
186 	case REGOP:		/* % , could be used as modulo, or register*/
187 		ch = getchar();
188 		if (INCHARSET(ch, DIGIT)){
189 			ryylval = ch-'0';
190 			if (ch=='1') {
191 				if (INCHARSET( (ch = getchar()), REGDIGIT))
192 					ryylval = 10+ch-'0';
193 				else
194 					ungetc(ch);
195 			}
196 			/*
197 			 *	God only knows what the original author
198 			 *	wanted this undocumented feature to
199 			 *	do.
200 			 *		%5++ is really  r7
201 			 */
202 			while(INCHARSET( (ch = getchar()), SIGN)) {
203 				if (ch=='+')
204 					ryylval++;
205 				else
206 					ryylval--;
207 			}
208 			ungetc(ch);
209 			val = REG;
210 		} else {
211 			ungetc(ch);
212 			val = REGOP;
213 		}
214 		goto ret;
215 
216 	case ALPH:
217 		ch1 = ch;
218 		if (INCHARSET(ch, SZSPECBEGIN)){
219 			if( (ch = getchar()) == '`' || ch == '^'){
220 				ch1 |= 0100;	/*convert to lower*/
221 				switch(ch1){
222 				case 'b':	ryylval = 1;	break;
223 				case 'w':	ryylval = 2;	break;
224 				case 'l':	ryylval = 4;	break;
225 				default:	ryylval = d124;	break;
226 				}
227 				val = SIZESPEC;
228 				goto ret;
229 			} else {
230 				ungetc(ch);
231 				ch = ch1;	/*restore first character*/
232 			}
233 		}
234 		rcp = yytext;
235 		do {
236 			if (rcp < &yytext[NCPName])
237 				*rcp++ = ch;
238 		} while (INCHARSET ( (ch = getchar()), ALPHA | DIGIT));
239 		*rcp = '\0';
240 		while (INCHARSET(ch, SPACE))
241 			ch = getchar();
242 		ungetc(ch);
243 
244 		switch((op = *lookup(1))->s_tag){
245 		case 0:
246 		case LABELID:
247 			/*
248 			 *	Its a name... (Labels are subsets of name)
249 			 */
250 			ryylval = (int)op;
251 			val = NAME;
252 			break;
253 		case INST0:
254 		case INSTn:
255 		case IJXXX:
256 			opstruct.Op_popcode = ( (struct instab *)op)->i_popcode;
257 			opstruct.Op_eopcode = ( (struct instab *)op)->i_eopcode;
258 			val = op->s_tag;
259 			break;
260 		default:
261 			ryylval = ( (struct instab *)op)->i_popcode;
262 			val = op->s_tag;
263 			break;
264 		}
265 		goto ret;
266 
267 	case DIG:
268 		/*
269 		 *	restore local inbufptr and inbufcnt
270 		 */
271 		REGTOMEMBUF;
272 		val = number(ch);
273 		MEMTOREGBUF;
274 		/*
275 		 *	yylval or yybignum has been stuffed as a side
276 		 *	effect to number(); get the global yylval
277 		 *	into our fast local copy in case it was an INT.
278 		 */
279 		ryylval = yylval;
280 		goto ret;
281 
282 	case LSH:
283 	case RSH:
284 		/*
285 		 *	We allow the C style operators
286 		 *	<< and >>, as well as < and >
287 		 */
288 		if ( (ch1 = getchar()) != ch)
289 			ungetc(ch1);
290 		val = ryylval;
291 		goto ret;
292 
293 	case MINUS:
294 		if ( (ch = getchar()) =='(')
295 			ryylval=val=MP;
296 		else {
297 			ungetc(ch);
298 			val=MINUS;
299 		}
300 		goto ret;
301 
302 	case SQ:
303 		if ((ryylval = getchar()) == '\n')
304 			scanlineno++;		/*not entirely correct*/
305 		val = INT;
306 		goto ret;
307 
308 	case DQ:
309 	   eatstr:
310 		linescrossed = 0;
311 		for (strlg = 0; /*VOID*/; strlg++){
312 		    switch(ch = getchar()){
313 		    case '"':
314 			goto tailDQ;
315 		    default:
316 		    stuff:
317 			putc(ch, strfile);
318 			break;
319 		    case '\n':
320 			yywarning("New line in a string constant");
321 			scanlineno++;
322 			linescrossed++;
323 			ch = getchar();
324 			switch(ch){
325 			case EOFCHAR:
326 				putc('\n', strfile);
327 				ungetc(EOFCHAR);
328 				goto tailDQ;
329 			default:
330 				ungetc(ch);
331 				ch = '\n';
332 				goto stuff;
333 			}
334 			break;
335 
336 		    case '\\':
337 			ch = getchar();		/*skip the '\\'*/
338 			if ( INCHARSET(ch, BSESCAPE)){
339 				switch (ch){
340 				  case 'b':  ch = '\b'; goto stuff;
341 				  case 'f':  ch = '\f'; goto stuff;
342 				  case 'n':  ch = '\n'; goto stuff;
343 				  case 'r':  ch = '\r'; goto stuff;
344 				  case 't':  ch = '\t'; goto stuff;
345 				}
346 			}
347 			if ( !(INCHARSET(ch, OCTDIGIT)) )
348 				goto stuff;
349 			i = 0;
350 			intval = 0;
351 			while ( (i < 3) && (INCHARSET(ch, OCTDIGIT))){
352 				i++;
353 				intval <<= 3;
354 				intval += ch - '0';
355 				ch = getchar();
356 			}
357 			ungetc(ch);
358 			ch = (char)intval;
359 			goto stuff;
360 		    }
361 		}
362 	tailDQ: ;
363 		/*
364 		 *	account for any lines that were crossed
365 		 */
366 		if (linescrossed){
367 			ptoken(bufptr, ILINESKIP);
368 			pint(bufptr, linescrossed);
369 		}
370 		/*
371 		 *	Cheat: append a trailing null to the string
372 		 *	and then adjust the string length to ignore
373 		 *	the trailing null.  If any STRING client requires
374 		 *	the trailing null, the client can just change STRLEN
375 		 */
376 		putc(0, strfile);
377 		ryylval = (int)savestr((char *)0, strlg + 1, STR_FILE);
378 		val = STRING;
379 		((struct strdesc *)ryylval)->sd_strlen -= 1;
380 		goto ret;
381 
382 	case BADCHAR:
383 		linescrossed = lineno;
384 		lineno = scanlineno;
385 		yyerror("Illegal character mapped: %d, char read:(octal) %o",
386 			ryylval, ch);
387 		lineno = linescrossed;
388 		val = BADCHAR;
389 		goto ret;
390 
391 	default:
392 		val = ryylval;
393 		goto ret;
394 	}	/*end of the switch*/
395 	/*
396 	 *	here with one token, so stuff it
397 	 */
398    ret:
399 	oval = val;
400 	ptoken(bufptr, val);
401 	switch(val){
402 		case	ILINESKIP:
403 				pint(bufptr, ryylval);
404 				break;
405 		case	SIZESPEC:
406 				pchar(bufptr, ryylval);
407 				break;
408 		case	BFINT:	plong(bufptr, ryylval);
409 				break;
410 		case	INT:	plong(bufptr, ryylval);
411 				break;
412 		case 	BIGNUM:	pnumber(bufptr, yybignum);
413 				break;
414 		case	STRING:	pptr(bufptr, (int)(char *)ryylval);
415 				break;
416 		case	NAME:	pptr(bufptr, (int)(struct symtab *)ryylval);
417 				break;
418 		case	REG:	pchar(bufptr, ryylval);
419 				break;
420 		case	INST0:
421 		case	INSTn:
422 				popcode(bufptr, opstruct);
423 				break;
424 		case 	IJXXX:
425 				popcode(bufptr, opstruct);
426 				pptr(bufptr, (int)(struct symtab *)symalloc());
427 				break;
428 		case	ISTAB:
429 		case	ISTABSTR:
430 		case	ISTABNONE:
431 		case	ISTABDOT:
432 		case	IALIGN:
433 				pptr(bufptr, (int)(struct symtab *)symalloc());
434 				break;
435 	/*
436 	 *	default:
437 	 */
438 	 }
439 	 builtval: ;
440    }			/*end of the while to stuff the buffer*/
441    done:
442 	bufferbox->tok_count = (bytetoktype *)bufptr - &(bufferbox->toks[0]);
443 	/*
444 	 *	This is a real kludge:
445 	 *
446 	 *	We put the last token in the buffer to be  a MINUS
447 	 *	symbol.  This last token will never be picked up
448 	 *	in the normal way, but can be looked at during
449 	 *	a peekahead look that the short circuit expression
450 	 *	evaluator uses to see if an expression is complicated.
451 	 *
452 	 *	Consider the following situation:
453 	 *
454 	 *	.word	45		+	47
455 	 *        buffer 1      |  buffer 0
456 	 *	the peekahead would want to look across the buffer,
457 	 *	but will look in the buffer end zone, see the minus, and
458 	 *	fail.
459 	 */
460 	ptoken(bufptr, MINUS);
461 	REGTOMEMBUF;
462 }
463