xref: /csrg-svn/old/as.vax/asscan2.c (revision 13467)
1 /*
2  *	Copyright (c) 1982 Regents of the University of California
3  */
4 #ifndef lint
5 static char sccsid[] = "@(#)asscan2.c 4.9 06/30/83";
6 #endif not lint
7 
8 #include "asscanl.h"
9 
10 static	inttoktype	oval = NL;
11 
12 char	inbufunget[8];
13 char	inbuffer[ASINBUFSIZ];
14 char	*Ginbufptr = inbuffer;
15 int	Ginbufcnt = 0;
16 
17 fillinbuffer()
18 {
19 		int	nread;
20 	static	int	hadeof;
21 		int	goal;
22 		int	got;
23 
24 	nread = 0;
25 	if (hadeof == 0){
26 		goal = sizeof(inbuffer);
27 		do {
28 			got = read(stdin->_file, inbuffer + nread, goal);
29 			if (got == 0)
30 				hadeof = 1;
31 			if (got <= 0)
32 				break;
33 			nread += got;
34 			goal -= got;
35 		} while (goal);
36 	}
37 	/*
38 	 *	getchar assumes that Ginbufcnt and Ginbufptr
39 	 *	are adjusted as if one character has been removed
40 	 *	from the input.
41 	 */
42 	if (nread == 0){
43 		inbuffer[0] = EOFCHAR;
44 		nread = 1;
45 	}
46 	Ginbufcnt = nread - 1;
47 	Ginbufptr = inbuffer + 1;
48 }
49 
50 #ifndef FLEXNAMES
51 char	strtext[NCPString + 1];
52 #else FLEXNAMES
53 # if NCPName < NCPString
54 char	strtext[NCPString + 1];
55 # else
56 #define	strtext yytext
57 # endif
58 #endif FLEXNAMES
59 
60 scan_dot_s(bufferbox)
61 	struct tokbufdesc *bufferbox;
62 {
63 	reg	char	*inbufptr;
64 	reg	int	inbufcnt;
65 	reg	int	ryylval;	/* local copy of lexical value */
66 	extern	int	yylval;		/* global copy of lexical value */
67 	reg	int	val;		/* the value returned */
68 		int	i;		/* simple counter */
69 	reg	char	*rcp;
70 		int	ch;		/* treated as a character */
71 		int	ch1;		/* shadow value */
72 		struct 	symtab	*op;
73 		ptrall	lgbackpatch;	/* where to stuff a string length */
74 	reg	ptrall	bufptr;		/* where to stuff tokens */
75 		ptrall	bufub;		/* where not to stuff tokens */
76 	reg	int	maxstrlg;	/* how long a string can be */
77 		long	intval;		/* value of int */
78 		int	linescrossed;	/* when doing strings and comments */
79 		struct	Opcode		opstruct;
80 
81 	(bytetoktype *)bufptr = (bytetoktype *) & (bufferbox->toks[0]);
82 	(bytetoktype *)bufub = &(bufferbox->toks[AVAILTOKS]);
83 
84 	MEMTOREGBUF;
85 	if (newfflag){
86 		newfflag = 0;
87 		ryylval = (int)savestr(newfname, strlen(newfname) + 1);
88 
89 		ptoken(bufptr, IFILE);
90 		ptoken(bufptr, STRING);
91 		pptr(bufptr, ryylval);
92 
93 		ptoken(bufptr, ILINENO);
94 		ptoken(bufptr, INT);
95 		pint(bufptr,  1);
96 	}
97 
98 	while (bufptr < bufub){
99    loop:
100         switch(ryylval = (type+1)[ch = getchar()]) {
101 	case SCANEOF:
102 	endoffile: ;
103 		inbufptr = 0;
104 		ptoken(bufptr, PARSEEOF);
105 		goto done;
106 
107 	case DIV:		/*process C style comments*/
108 		if ( (ch = getchar()) == '*') {  /*comment prelude*/
109 			int	incomment;
110 			linescrossed = 0;
111 			incomment = 1;
112 			ch = getchar();	/*skip over the * */
113 			while(incomment){
114 				switch(ch){
115 				case '*':
116 					ch = getchar();
117 					incomment = (ch != '/');
118 					break;
119 				case '\n':
120 					scanlineno++;
121 					linescrossed++;
122 					ch = getchar();
123 					break;
124 				case EOFCHAR:
125 					goto endoffile;
126 				default:
127 					ch = getchar();
128 					break;
129 				}
130 			}
131 			val = ILINESKIP;
132 			ryylval = linescrossed;
133 			goto ret;
134 		} else {	/*just an ordinary DIV*/
135 			ungetc(ch);
136 			val = ryylval = DIV;
137 			goto ret;
138 		}
139 	case SH:
140 		if (oval == NL){
141 			/*
142 			 *	Attempt to recognize a C preprocessor
143 			 *	style comment '^#[ \t]*[0-9]*[ \t]*".*"
144 			 */
145 			ch = getchar();	/*bump the #*/
146 			while (INCHARSET(ch, SPACE))
147 				ch = getchar();/*bump white */
148 			if (INCHARSET(ch, DIGIT)){
149 				intval = 0;
150 				while(INCHARSET(ch, DIGIT)){
151 					intval = intval*10 + ch - '0';
152 					ch = getchar();
153 				}
154 				while (INCHARSET(ch, SPACE))
155 					ch = getchar();
156 				if (ch == '"'){
157 					ptoken(bufptr, ILINENO);
158 					ptoken(bufptr, INT);
159 					pint(bufptr, intval - 1);
160 					ptoken(bufptr, IFILE);
161 					/*
162 					 *	The '"' has already been
163 					 *	munched
164 					 *
165 					 *	eatstr will not eat
166 					 *	the trailing \n, so
167 					 *	it is given to the parser
168 					 *	and counted.
169 					 */
170 					goto eatstr;
171 				}
172 			}
173 		}
174 		/*
175 		 *	Well, its just an ordinary decadent comment
176 		 */
177 		while ((ch != '\n') && (ch != EOFCHAR))
178 			ch = getchar();
179 		if (ch == EOFCHAR)
180 			goto endoffile;
181 		val = ryylval = oval = NL;
182 		scanlineno++;
183 		goto ret;
184 
185 	case NL:
186 		scanlineno++;
187 		val = ryylval;
188 		goto ret;
189 
190 	case SP:
191 		oval = SP;	/*invalidate ^# meta comments*/
192 		goto loop;
193 
194 	case REGOP:		/* % , could be used as modulo, or register*/
195 		ch = getchar();
196 		if (INCHARSET(ch, DIGIT)){
197 			ryylval = ch-'0';
198 			if (ch=='1') {
199 				if (INCHARSET( (ch = getchar()), REGDIGIT))
200 					ryylval = 10+ch-'0';
201 				else
202 					ungetc(ch);
203 			}
204 			/*
205 			 *	God only knows what the original author
206 			 *	wanted this undocumented feature to
207 			 *	do.
208 			 *		%5++ is really  r7
209 			 */
210 			while(INCHARSET( (ch = getchar()), SIGN)) {
211 				if (ch=='+')
212 					ryylval++;
213 				else
214 					ryylval--;
215 			}
216 			ungetc(ch);
217 			val = REG;
218 		} else {
219 			ungetc(ch);
220 			val = REGOP;
221 		}
222 		goto ret;
223 
224 	case ALPH:
225 		ch1 = ch;
226 		if (INCHARSET(ch, SZSPECBEGIN)){
227 			if( (ch = getchar()) == '`' || ch == '^'){
228 				ch1 |= 0100;	/*convert to lower*/
229 				switch(ch1){
230 				case 'b':	ryylval = 1;	break;
231 				case 'w':	ryylval = 2;	break;
232 				case 'l':	ryylval = 4;	break;
233 				default:	ryylval = d124;	break;
234 				}
235 				val = SIZESPEC;
236 				goto ret;
237 			} else {
238 				ungetc(ch);
239 				ch = ch1;	/*restore first character*/
240 			}
241 		}
242 		rcp = yytext;
243 		do {
244 			if (rcp < &yytext[NCPName])
245 				*rcp++ = ch;
246 		} while (INCHARSET ( (ch = getchar()), ALPHA | DIGIT));
247 		*rcp = '\0';
248 		while (INCHARSET(ch, SPACE))
249 			ch = getchar();
250 		ungetc(ch);
251 
252 		switch((op = *lookup(1))->s_tag){
253 		case 0:
254 		case LABELID:
255 			/*
256 			 *	Its a name... (Labels are subsets ofname)
257 			 */
258 			ryylval = (int)op;
259 			val = NAME;
260 			break;
261 		case INST0:
262 		case INSTn:
263 		case IJXXX:
264 			opstruct.Op_popcode = ( (struct instab *)op)->i_popcode;
265 			opstruct.Op_eopcode = ( (struct instab *)op)->i_eopcode;
266 			val = op->s_tag;
267 			break;
268 		default:
269 			ryylval = ( (struct instab *)op)->i_popcode;
270 			val = op->s_tag;
271 			break;
272 		}
273 		goto ret;
274 
275 	case DIG:
276 		/*
277 		 *	restore local inbufptr and inbufcnt
278 		 */
279 		REGTOMEMBUF;
280 		val = number(ch);
281 		MEMTOREGBUF;
282 		/*
283 		 *	yylval or yybignum has been stuffed as a side
284 		 *	effect to number(); get the global yylval
285 		 *	into our fast local copy in case it was an INT.
286 		 */
287 		ryylval = yylval;
288 		goto ret;
289 
290 	case LSH:
291 	case RSH:
292 		/*
293 		 *	We allow the C style operators
294 		 *	<< and >>, as well as < and >
295 		 */
296 		if ( (ch1 = getchar()) != ch)
297 			ungetc(ch1);
298 		val = ryylval;
299 		goto ret;
300 
301 	case MINUS:
302 		if ( (ch = getchar()) =='(')
303 			ryylval=val=MP;
304 		else {
305 			ungetc(ch);
306 			val=MINUS;
307 		}
308 		goto ret;
309 
310 	case SQ:
311 		if ((ryylval = getchar()) == '\n')
312 			scanlineno++;		/*not entirely correct*/
313 		val = INT;
314 		goto ret;
315 
316 	case DQ:
317 	   eatstr:
318 		linescrossed = 0;
319 		for(rcp = strtext, maxstrlg = NCPString; maxstrlg > 0; --maxstrlg){
320 		    switch(ch = getchar()){
321 		    case '"':
322 			goto tailDQ;
323 		    default:
324 		    stuff:
325 			pchar(rcp, ch);
326 			break;
327 		    case '\n':
328 			yywarning("New line in a string constant");
329 			scanlineno++;
330 			linescrossed++;
331 			ch = getchar();
332 			switch(ch){
333 			case EOFCHAR:
334 				pchar(rcp, '\n');
335 				ungetc(EOFCHAR);
336 				goto tailDQ;
337 			default:
338 				ungetc(ch);
339 				ch = '\n';
340 				goto stuff;
341 			}
342 			break;
343 
344 		    case '\\':
345 			ch = getchar();		/*skip the '\\'*/
346 			if ( INCHARSET(ch, BSESCAPE)){
347 				switch (ch){
348 				  case 'b':  ch = '\b'; goto stuff;
349 				  case 'f':  ch = '\f'; goto stuff;
350 				  case 'n':  ch = '\n'; goto stuff;
351 				  case 'r':  ch = '\r'; goto stuff;
352 				  case 't':  ch = '\t'; goto stuff;
353 				}
354 			}
355 			if ( !(INCHARSET(ch, OCTDIGIT)) )
356 				goto stuff;
357 			i = 0;
358 			intval = 0;
359 			while ( (i < 3) && (INCHARSET(ch, OCTDIGIT))){
360 				i++;
361 				intval <<= 3;
362 				intval += ch - '0';
363 				ch = getchar();
364 			}
365 			ungetc(ch);
366 			ch = (char)intval;
367 			goto stuff;
368 		    }
369 		}
370 	tailDQ: ;
371 		/*
372 		 *	account for any lines that were crossed
373 		 */
374 		if (linescrossed){
375 			ptoken(bufptr, ILINESKIP);
376 			pint(bufptr, linescrossed);
377 		}
378 		/*
379 		 *	put the string in strtext into the string pool
380 		 *
381 		 *	The value in ryylval points to the string;
382 		 *	the previous 2 bytes is the length of the string
383 		 *
384 		 *	Cheat: append a trailing null to the string
385 		 *	and then adjust the string length to ignore
386 		 *	the trailing null.  If any STRING client requires
387 		 *	the trailing null, the client can just change STRLEN
388 		 */
389 		val = STRING;
390 		*rcp++ = 0;
391 		ryylval = (int)savestr(strtext, rcp - strtext);
392 		STRLEN(((char *)ryylval)) -= 1;
393 		goto ret;
394 
395 	case BADCHAR:
396 		linescrossed = lineno;
397 		lineno = scanlineno;
398 		yyerror("Illegal character mapped: %d, char read:(octal) %o",
399 			ryylval, ch);
400 		lineno = linescrossed;
401 		val = BADCHAR;
402 		goto ret;
403 
404 	default:
405 		val = ryylval;
406 		goto ret;
407 	}	/*end of the switch*/
408 	/*
409 	 *	here with one token, so stuff it
410 	 */
411    ret:
412 	oval = val;
413 	ptoken(bufptr, val);
414 	switch(val){
415 		case	ILINESKIP:
416 				pint(bufptr, ryylval);
417 				break;
418 		case	SIZESPEC:
419 				pchar(bufptr, ryylval);
420 				break;
421 		case	BFINT:	plong(bufptr, ryylval);
422 				break;
423 		case	INT:	plong(bufptr, ryylval);
424 				break;
425 		case 	BIGNUM:	pnumber(bufptr, yybignum);
426 				break;
427 		case	STRING:	pptr(bufptr, (int)(char *)ryylval);
428 				break;
429 		case	NAME:	pptr(bufptr, (int)(struct symtab *)ryylval);
430 				break;
431 		case	REG:	pchar(bufptr, ryylval);
432 				break;
433 		case	INST0:
434 		case	INSTn:
435 				popcode(bufptr, opstruct);
436 				break;
437 		case 	IJXXX:
438 				popcode(bufptr, opstruct);
439 				pptr(bufptr, (int)(struct symtab *)symalloc());
440 				break;
441 		case	ISTAB:
442 		case	ISTABSTR:
443 		case	ISTABNONE:
444 		case	ISTABDOT:
445 		case	IALIGN:
446 				pptr(bufptr, (int)(struct symtab *)symalloc());
447 				break;
448 	/*
449 	 *	default:
450 	 */
451 	 }
452 	 builtval: ;
453    }			/*end of the while to stuff the buffer*/
454    done:
455 	bufferbox->tok_count = (bytetoktype *)bufptr - &(bufferbox->toks[0]);
456 	/*
457 	 *	This is a real kludge:
458 	 *
459 	 *	We put the last token in the buffer to be  a MINUS
460 	 *	symbol.  This last token will never be picked up
461 	 *	in the normal way, but can be looked at during
462 	 *	a peekahead look that the short circuit expression
463 	 *	evaluator uses to see if an expression is complicated.
464 	 *
465 	 *	Consider the following situation:
466 	 *
467 	 *	.word	45		+	47
468 	 *        buffer 1      |  buffer 0
469 	 *	the peekahead would want to look across the buffer,
470 	 *	but will look in the buffer end zone, see the minus, and
471 	 *	fail.
472 	 */
473 	ptoken(bufptr, MINUS);
474 	REGTOMEMBUF;
475 }
476