xref: /csrg-svn/old/as.vax/asscan2.c (revision 13524)
1 /*
2  *	Copyright (c) 1982 Regents of the University of California
3  */
4 #ifndef lint
5 static char sccsid[] = "@(#)asscan2.c 4.11 06/30/83";
6 #endif not lint
7 
8 #include "asscanl.h"
9 
10 static	inttoktype	oval = NL;
11 
12 char	inbufunget[8];
13 char	inbuffer[ASINBUFSIZ];
14 char	*Ginbufptr = inbuffer;
15 int	Ginbufcnt = 0;
16 
17 fillinbuffer()
18 {
19 		int	nread;
20 	static	int	hadeof;
21 		int	goal;
22 		int	got;
23 
24 	nread = 0;
25 	if (hadeof == 0){
26 		goal = sizeof(inbuffer);
27 		do {
28 			got = read(stdin->_file, inbuffer + nread, goal);
29 			if (got == 0)
30 				hadeof = 1;
31 			if (got <= 0)
32 				break;
33 			nread += got;
34 			goal -= got;
35 		} while (goal);
36 	}
37 	/*
38 	 *	getchar assumes that Ginbufcnt and Ginbufptr
39 	 *	are adjusted as if one character has been removed
40 	 *	from the input.
41 	 */
42 	if (nread == 0){
43 		inbuffer[0] = EOFCHAR;
44 		nread = 1;
45 	}
46 	Ginbufcnt = nread - 1;
47 	Ginbufptr = inbuffer + 1;
48 }
49 
50 #if NCPName < NCPString
51 char	strtext[NCPString + 1];
52 #else
53 #	define	strtext yytext
54 #endif
55 
56 scan_dot_s(bufferbox)
57 	struct tokbufdesc *bufferbox;
58 {
59 	reg	char	*inbufptr;
60 	reg	int	inbufcnt;
61 	reg	int	ryylval;	/* local copy of lexical value */
62 	extern	int	yylval;		/* global copy of lexical value */
63 	reg	int	val;		/* the value returned */
64 		int	i;		/* simple counter */
65 	reg	char	*rcp;
66 		int	ch;		/* treated as a character */
67 		int	ch1;		/* shadow value */
68 		struct 	symtab	*op;
69 		ptrall	lgbackpatch;	/* where to stuff a string length */
70 	reg	ptrall	bufptr;		/* where to stuff tokens */
71 		ptrall	bufub;		/* where not to stuff tokens */
72 	reg	int	strlg;		/* the length of a string */
73 		long	intval;		/* value of int */
74 		int	linescrossed;	/* when doing strings and comments */
75 		struct	Opcode		opstruct;
76 		struct	strdesc	strd;	/* for building DQ strings */
77 
78 	(bytetoktype *)bufptr = (bytetoktype *) & (bufferbox->toks[0]);
79 	(bytetoktype *)bufub = &(bufferbox->toks[AVAILTOKS]);
80 
81 	MEMTOREGBUF;
82 	if (newfflag){
83 		newfflag = 0;
84 		strd.sd_stroff = strfilepos;
85 		strd.sd_place = STR_BOTH;
86 		strd.sd_strlen = strlen(newfname) + 1;
87 		fputs(newfname, strfile);
88 		putc(0, strfile);
89 		strfilepos += strd.sd_strlen;
90 		ryylval = (int)savestr(newfname, &strd);
91 
92 		ptoken(bufptr, IFILE);
93 		ptoken(bufptr, STRING);
94 		pptr(bufptr, ryylval);
95 
96 		ptoken(bufptr, ILINENO);
97 		ptoken(bufptr, INT);
98 		pint(bufptr,  1);
99 	}
100 
101 	while (bufptr < bufub){
102    loop:
103         switch(ryylval = (type+1)[ch = getchar()]) {
104 	case SCANEOF:
105 	endoffile: ;
106 		inbufptr = 0;
107 		ptoken(bufptr, PARSEEOF);
108 		goto done;
109 
110 	case DIV:		/*process C style comments*/
111 		if ( (ch = getchar()) == '*') {  /*comment prelude*/
112 			int	incomment;
113 			linescrossed = 0;
114 			incomment = 1;
115 			ch = getchar();	/*skip over the * */
116 			while(incomment){
117 				switch(ch){
118 				case '*':
119 					ch = getchar();
120 					incomment = (ch != '/');
121 					break;
122 				case '\n':
123 					scanlineno++;
124 					linescrossed++;
125 					ch = getchar();
126 					break;
127 				case EOFCHAR:
128 					goto endoffile;
129 				default:
130 					ch = getchar();
131 					break;
132 				}
133 			}
134 			val = ILINESKIP;
135 			ryylval = linescrossed;
136 			goto ret;
137 		} else {	/*just an ordinary DIV*/
138 			ungetc(ch);
139 			val = ryylval = DIV;
140 			goto ret;
141 		}
142 	case SH:
143 		if (oval == NL){
144 			/*
145 			 *	Attempt to recognize a C preprocessor
146 			 *	style comment '^#[ \t]*[0-9]*[ \t]*".*"
147 			 */
148 			ch = getchar();	/*bump the #*/
149 			while (INCHARSET(ch, SPACE))
150 				ch = getchar();/*bump white */
151 			if (INCHARSET(ch, DIGIT)){
152 				intval = 0;
153 				while(INCHARSET(ch, DIGIT)){
154 					intval = intval*10 + ch - '0';
155 					ch = getchar();
156 				}
157 				while (INCHARSET(ch, SPACE))
158 					ch = getchar();
159 				if (ch == '"'){
160 					ptoken(bufptr, ILINENO);
161 					ptoken(bufptr, INT);
162 					pint(bufptr, intval - 1);
163 					ptoken(bufptr, IFILE);
164 					/*
165 					 *	The '"' has already been
166 					 *	munched
167 					 *
168 					 *	eatstr will not eat
169 					 *	the trailing \n, so
170 					 *	it is given to the parser
171 					 *	and counted.
172 					 */
173 					goto eatstr;
174 				}
175 			}
176 		}
177 		/*
178 		 *	Well, its just an ordinary decadent comment
179 		 */
180 		while ((ch != '\n') && (ch != EOFCHAR))
181 			ch = getchar();
182 		if (ch == EOFCHAR)
183 			goto endoffile;
184 		val = ryylval = oval = NL;
185 		scanlineno++;
186 		goto ret;
187 
188 	case NL:
189 		scanlineno++;
190 		val = ryylval;
191 		goto ret;
192 
193 	case SP:
194 		oval = SP;	/*invalidate ^# meta comments*/
195 		goto loop;
196 
197 	case REGOP:		/* % , could be used as modulo, or register*/
198 		ch = getchar();
199 		if (INCHARSET(ch, DIGIT)){
200 			ryylval = ch-'0';
201 			if (ch=='1') {
202 				if (INCHARSET( (ch = getchar()), REGDIGIT))
203 					ryylval = 10+ch-'0';
204 				else
205 					ungetc(ch);
206 			}
207 			/*
208 			 *	God only knows what the original author
209 			 *	wanted this undocumented feature to
210 			 *	do.
211 			 *		%5++ is really  r7
212 			 */
213 			while(INCHARSET( (ch = getchar()), SIGN)) {
214 				if (ch=='+')
215 					ryylval++;
216 				else
217 					ryylval--;
218 			}
219 			ungetc(ch);
220 			val = REG;
221 		} else {
222 			ungetc(ch);
223 			val = REGOP;
224 		}
225 		goto ret;
226 
227 	case ALPH:
228 		ch1 = ch;
229 		if (INCHARSET(ch, SZSPECBEGIN)){
230 			if( (ch = getchar()) == '`' || ch == '^'){
231 				ch1 |= 0100;	/*convert to lower*/
232 				switch(ch1){
233 				case 'b':	ryylval = 1;	break;
234 				case 'w':	ryylval = 2;	break;
235 				case 'l':	ryylval = 4;	break;
236 				default:	ryylval = d124;	break;
237 				}
238 				val = SIZESPEC;
239 				goto ret;
240 			} else {
241 				ungetc(ch);
242 				ch = ch1;	/*restore first character*/
243 			}
244 		}
245 		rcp = yytext;
246 		do {
247 			if (rcp < &yytext[NCPName])
248 				*rcp++ = ch;
249 		} while (INCHARSET ( (ch = getchar()), ALPHA | DIGIT));
250 		*rcp = '\0';
251 		while (INCHARSET(ch, SPACE))
252 			ch = getchar();
253 		ungetc(ch);
254 
255 		switch((op = *lookup(1))->s_tag){
256 		case 0:
257 		case LABELID:
258 			/*
259 			 *	Its a name... (Labels are subsets of name)
260 			 */
261 			ryylval = (int)op;
262 			val = NAME;
263 			break;
264 		case INST0:
265 		case INSTn:
266 		case IJXXX:
267 			opstruct.Op_popcode = ( (struct instab *)op)->i_popcode;
268 			opstruct.Op_eopcode = ( (struct instab *)op)->i_eopcode;
269 			val = op->s_tag;
270 			break;
271 		default:
272 			ryylval = ( (struct instab *)op)->i_popcode;
273 			val = op->s_tag;
274 			break;
275 		}
276 		goto ret;
277 
278 	case DIG:
279 		/*
280 		 *	restore local inbufptr and inbufcnt
281 		 */
282 		REGTOMEMBUF;
283 		val = number(ch);
284 		MEMTOREGBUF;
285 		/*
286 		 *	yylval or yybignum has been stuffed as a side
287 		 *	effect to number(); get the global yylval
288 		 *	into our fast local copy in case it was an INT.
289 		 */
290 		ryylval = yylval;
291 		goto ret;
292 
293 	case LSH:
294 	case RSH:
295 		/*
296 		 *	We allow the C style operators
297 		 *	<< and >>, as well as < and >
298 		 */
299 		if ( (ch1 = getchar()) != ch)
300 			ungetc(ch1);
301 		val = ryylval;
302 		goto ret;
303 
304 	case MINUS:
305 		if ( (ch = getchar()) =='(')
306 			ryylval=val=MP;
307 		else {
308 			ungetc(ch);
309 			val=MINUS;
310 		}
311 		goto ret;
312 
313 	case SQ:
314 		if ((ryylval = getchar()) == '\n')
315 			scanlineno++;		/*not entirely correct*/
316 		val = INT;
317 		goto ret;
318 
319 	case DQ:
320 	   eatstr:
321 		linescrossed = 0;
322 		strd.sd_stroff = strfilepos;
323 		strd.sd_place = STR_FILE;
324 		for (strd.sd_strlen = 0; /*VOID*/; strd.sd_strlen++){
325 		    switch(ch = getchar()){
326 		    case '"':
327 			goto tailDQ;
328 		    default:
329 		    stuff:
330 			putc(ch, strfile);
331 			break;
332 		    case '\n':
333 			yywarning("New line in a string constant");
334 			scanlineno++;
335 			linescrossed++;
336 			ch = getchar();
337 			switch(ch){
338 			case EOFCHAR:
339 				putc('\n', strfile);
340 				ungetc(EOFCHAR);
341 				goto tailDQ;
342 			default:
343 				ungetc(ch);
344 				ch = '\n';
345 				goto stuff;
346 			}
347 			break;
348 
349 		    case '\\':
350 			ch = getchar();		/*skip the '\\'*/
351 			if ( INCHARSET(ch, BSESCAPE)){
352 				switch (ch){
353 				  case 'b':  ch = '\b'; goto stuff;
354 				  case 'f':  ch = '\f'; goto stuff;
355 				  case 'n':  ch = '\n'; goto stuff;
356 				  case 'r':  ch = '\r'; goto stuff;
357 				  case 't':  ch = '\t'; goto stuff;
358 				}
359 			}
360 			if ( !(INCHARSET(ch, OCTDIGIT)) )
361 				goto stuff;
362 			i = 0;
363 			intval = 0;
364 			while ( (i < 3) && (INCHARSET(ch, OCTDIGIT))){
365 				i++;
366 				intval <<= 3;
367 				intval += ch - '0';
368 				ch = getchar();
369 			}
370 			ungetc(ch);
371 			ch = (char)intval;
372 			goto stuff;
373 		    }
374 		}
375 	tailDQ: ;
376 		/*
377 		 *	account for any lines that were crossed
378 		 */
379 		if (linescrossed){
380 			ptoken(bufptr, ILINESKIP);
381 			pint(bufptr, linescrossed);
382 		}
383 		/*
384 		 *	put the string in strtext into the string pool
385 		 *
386 		 *	Cheat: append a trailing null to the string
387 		 *	and then adjust the string length to ignore
388 		 *	the trailing null.  If any STRING client requires
389 		 *	the trailing null, the client can just change STRLEN
390 		 */
391 		val = STRING;
392 		putc(0, strfile);
393 		strd.sd_strlen += 1;
394 		strfilepos += strd.sd_strlen;
395 		ryylval = (int)savestr(strtext, &strd);
396 		((struct strdesc *)ryylval)->sd_strlen -= 1;
397 		goto ret;
398 
399 	case BADCHAR:
400 		linescrossed = lineno;
401 		lineno = scanlineno;
402 		yyerror("Illegal character mapped: %d, char read:(octal) %o",
403 			ryylval, ch);
404 		lineno = linescrossed;
405 		val = BADCHAR;
406 		goto ret;
407 
408 	default:
409 		val = ryylval;
410 		goto ret;
411 	}	/*end of the switch*/
412 	/*
413 	 *	here with one token, so stuff it
414 	 */
415    ret:
416 	oval = val;
417 	ptoken(bufptr, val);
418 	switch(val){
419 		case	ILINESKIP:
420 				pint(bufptr, ryylval);
421 				break;
422 		case	SIZESPEC:
423 				pchar(bufptr, ryylval);
424 				break;
425 		case	BFINT:	plong(bufptr, ryylval);
426 				break;
427 		case	INT:	plong(bufptr, ryylval);
428 				break;
429 		case 	BIGNUM:	pnumber(bufptr, yybignum);
430 				break;
431 		case	STRING:	pptr(bufptr, (int)(char *)ryylval);
432 				break;
433 		case	NAME:	pptr(bufptr, (int)(struct symtab *)ryylval);
434 				break;
435 		case	REG:	pchar(bufptr, ryylval);
436 				break;
437 		case	INST0:
438 		case	INSTn:
439 				popcode(bufptr, opstruct);
440 				break;
441 		case 	IJXXX:
442 				popcode(bufptr, opstruct);
443 				pptr(bufptr, (int)(struct symtab *)symalloc());
444 				break;
445 		case	ISTAB:
446 		case	ISTABSTR:
447 		case	ISTABNONE:
448 		case	ISTABDOT:
449 		case	IALIGN:
450 				pptr(bufptr, (int)(struct symtab *)symalloc());
451 				break;
452 	/*
453 	 *	default:
454 	 */
455 	 }
456 	 builtval: ;
457    }			/*end of the while to stuff the buffer*/
458    done:
459 	bufferbox->tok_count = (bytetoktype *)bufptr - &(bufferbox->toks[0]);
460 	/*
461 	 *	This is a real kludge:
462 	 *
463 	 *	We put the last token in the buffer to be  a MINUS
464 	 *	symbol.  This last token will never be picked up
465 	 *	in the normal way, but can be looked at during
466 	 *	a peekahead look that the short circuit expression
467 	 *	evaluator uses to see if an expression is complicated.
468 	 *
469 	 *	Consider the following situation:
470 	 *
471 	 *	.word	45		+	47
472 	 *        buffer 1      |  buffer 0
473 	 *	the peekahead would want to look across the buffer,
474 	 *	but will look in the buffer end zone, see the minus, and
475 	 *	fail.
476 	 */
477 	ptoken(bufptr, MINUS);
478 	REGTOMEMBUF;
479 }
480