xref: /csrg-svn/old/as.vax/asscan2.c (revision 13462)
1 /*
2  *	Copyright (c) 1982 Regents of the University of California
3  */
4 #ifndef lint
5 static char sccsid[] = "@(#)asscan2.c 4.8 06/30/83";
6 #endif not lint
7 
8 #include "asscanl.h"
9 static	inttoktype	oval = NL;
10 
11 #define	NINBUFFERS	2
12 #define	INBUFLG		NINBUFFERS*ASINBUFSIZ + 2
13 	/*
14 	 *	We have NINBUFFERS input buffers; the first one is reserved
15 	 *	for catching the tail of a line split across a buffer
16 	 *	boundary; the other ones are used for snarfing a buffer
17 	 *	worth of assembly language source.
18 	 */
19 static	char	inbuffer[INBUFLG];
20 static	char	*InBufPtr = 0;
21 
22 #ifndef FLEXNAMES
23 char	strtext[NCPString + 1];
24 #else FLEXNAMES
25 # if NCPName < NCPString
26 char	strtext[NCPString + 1];
27 # else
28 #define	strtext yytext
29 # endif
30 #endif FLEXNAMES
31 
32 /*
33  *	fill the inbuffer from the standard input.
34  *	Assert: there are always n COMPLETE! lines in the buffer area.
35  *	Assert: there is always a \n terminating the last line
36  *		in the buffer area.
37  *	Assert: after the \n, there is an EOFCHAR (hard end of file)
38  *		or a NEEDCHAR (end of buffer)
39  *	Assert:	fgets always null pads the string it reads.
40  *	Assert:	no ungetc's are done at the end of a line or at the
41  *		beginning of a line.
42  *
43  *	We read a complete buffer of characters in one single read.
44  *	We then back scan within this buffer to find the end of the
45  *	last complete line, and force the assertions, and save a pointer
46  *	to the incomplete line.
47  *	The next call to fillinbuffer will move the unread characters
48  *	to the end of the first buffer, and then read another two buffers,
49  *	completing the cycle.
50  */
51 
52 static	char	p_swapped = '\0';
53 static	char	*p_start = &inbuffer[NINBUFFERS * ASINBUFSIZ];
54 static	char	*p_stop = &inbuffer[NINBUFFERS * ASINBUFSIZ];
55 
56 #define	MIDDLE	&inbuffer[ASINBUFSIZ]
57 
58 char *fillinbuffer()
59 {
60 	register	char	*from;
61 			char	*inbufptr;
62 	int		nread;
63 	static		int	hadeof;
64 	int		goal;
65 	int		got;
66 
67 	*p_start = p_swapped;
68 	inbufptr = MIDDLE - (p_stop - p_start);
69 	movestr(inbufptr, p_start, p_stop - p_start);
70 	/*
71 	 *	Now, go read up to NINBUFFERS - 1 full buffers
72 	 */
73 	if (hadeof){
74 		hadeof = 0;
75 		return (0);
76 	}
77 	goal = (NINBUFFERS - 1)*ASINBUFSIZ;
78 	nread = 0;
79 	do {
80 		got = read(stdin->_file, MIDDLE + nread, goal);
81 		if (got == 0)
82 			hadeof = 1;
83 		if (got <= 0)
84 			break;
85 		nread += got;
86 		goal -= got;
87 	} while (goal);
88 
89 	if (nread == 0)
90 		return(0);
91 	from = MIDDLE + nread;
92 	p_stop = from;
93 	*from = '\0';
94 	while (*--from != '\n'){
95 		/*
96 		 *	back over the partial line
97 		 */
98 		if (from == MIDDLE) {
99 			from = p_stop;
100 			*p_stop++ = '\n';
101 			break;
102 		} else {
103 			continue;
104 		}
105 	}
106 
107 	from++;				/* first char of partial line */
108 	p_start = from;
109 	p_swapped = *p_start;
110 	*p_start = NEEDCHAR;		/* force assertion */
111 	return(inbufptr);
112 }
113 
114 scan_dot_s(bufferbox)
115 	struct tokbufdesc *bufferbox;
116 {
117 	reg	int	ryylval;	/* local copy of lexical value */
118 	extern	int	yylval;		/* global copy of lexical value */
119 	reg	int	val;		/* the value returned */
120 		int	i;		/* simple counter */
121 	reg	char	*rcp;
122 		char	*cp;		/* can have address taken */
123 	reg	int	ch;		/* treated as a character */
124 		int	ch1;		/* shadow value */
125 	reg	char	*inbufptr;
126 		struct 	symtab	*op;
127 
128 	reg	ptrall	bufptr;		/* where to stuff tokens */
129 		ptrall	lgbackpatch;	/* where to stuff a string length */
130 		ptrall	bufub;		/* where not to stuff tokens */
131 	reg	int	maxstrlg;	/* how long a string can be */
132 		long	intval;		/* value of int */
133 		int	linescrossed;	/* when doing strings and comments */
134 		struct	Opcode		opstruct;
135 
136 	(bytetoktype *)bufptr = (bytetoktype *) & (bufferbox->toks[0]);
137 	(bytetoktype *)bufub = &(bufferbox->toks[AVAILTOKS]);
138 
139 	inbufptr = InBufPtr;
140 	if (inbufptr == 0){
141 		inbufptr = fillinbuffer();
142 		if (inbufptr == 0){	/*end of file*/
143    		  endoffile:
144 			inbufptr = 0;
145 			ptoken(bufptr, PARSEEOF);
146 			goto done;
147 		}
148 	}
149 
150 	if (newfflag){
151 		newfflag = 0;
152 		ryylval = (int)savestr(newfname, strlen(newfname) + 1);
153 
154 		ptoken(bufptr, IFILE);
155 		ptoken(bufptr, STRING);
156 		pptr(bufptr, ryylval);
157 
158 		ptoken(bufptr, ILINENO);
159 		ptoken(bufptr, INT);
160 		pint(bufptr,  1);
161 	}
162 
163 	while (bufptr < bufub){
164    loop:
165         switch(ryylval = (type+2)[ch = getchar()]) {
166 	case SCANEOF:
167 		inbufptr = 0;
168 		goto endoffile;
169 
170 	case NEEDSBUF:
171 		inbufptr = fillinbuffer();
172 		if (inbufptr == 0)
173 			goto endoffile;
174 		goto loop;
175 
176 	case DIV:		/*process C style comments*/
177 		if ( (ch = getchar()) == '*') {  /*comment prelude*/
178 			int	incomment;
179 			linescrossed = 0;
180 			incomment = 1;
181 			ch = getchar();	/*skip over the * */
182 			while(incomment){
183 				switch(ch){
184 				case '*':
185 					ch = getchar();
186 					incomment = (ch != '/');
187 					break;
188 				case '\n':
189 					scanlineno++;
190 					linescrossed++;
191 					ch = getchar();
192 					break;
193 				case EOFCHAR:
194 					goto endoffile;
195 				case NEEDCHAR:
196 					inbufptr = fillinbuffer();
197 					if (inbufptr == 0)
198 						goto endoffile;
199 					lineno++;
200 					ch = getchar();
201 					break;
202 				default:
203 					ch = getchar();
204 					break;
205 				}
206 			}
207 			val = ILINESKIP;
208 			ryylval = linescrossed;
209 			goto ret;
210 		} else {	/*just an ordinary DIV*/
211 			ungetc(ch);
212 			val = ryylval = DIV;
213 			goto ret;
214 		}
215 	case SH:
216 		if (oval == NL){
217 			/*
218 			 *	Attempt to recognize a C preprocessor
219 			 *	style comment '^#[ \t]*[0-9]*[ \t]*".*"
220 			 */
221 			ch = getchar();	/*bump the #*/
222 			while (INCHARSET(ch, SPACE))
223 				ch = getchar();/*bump white */
224 			if (INCHARSET(ch, DIGIT)){
225 				intval = 0;
226 				while(INCHARSET(ch, DIGIT)){
227 					intval = intval*10 + ch - '0';
228 					ch = getchar();
229 				}
230 				while (INCHARSET(ch, SPACE))
231 					ch = getchar();
232 				if (ch == '"'){
233 					ptoken(bufptr, ILINENO);
234 					ptoken(bufptr, INT);
235 					pint(bufptr, intval - 1);
236 					ptoken(bufptr, IFILE);
237 					/*
238 					 *	The '"' has already been
239 					 *	munched
240 					 *
241 					 *	eatstr will not eat
242 					 *	the trailing \n, so
243 					 *	it is given to the parser
244 					 *	and counted.
245 					 */
246 					goto eatstr;
247 				}
248 			}
249 		}
250 		/*
251 		 *	Well, its just an ordinary decadent comment
252 		 */
253 		while ((ch != '\n') && (ch != EOFCHAR))
254 			ch = getchar();
255 		if (ch == EOFCHAR)
256 			goto endoffile;
257 		val = ryylval = oval = NL;
258 		scanlineno++;
259 		goto ret;
260 
261 	case NL:
262 		scanlineno++;
263 		val = ryylval;
264 		goto ret;
265 
266 	case SP:
267 		oval = SP;	/*invalidate ^# meta comments*/
268 		goto loop;
269 
270 	case REGOP:		/* % , could be used as modulo, or register*/
271 		ch = getchar();
272 		if (INCHARSET(ch, DIGIT)){
273 			ryylval = ch-'0';
274 			if (ch=='1') {
275 				if (INCHARSET( (ch = getchar()), REGDIGIT))
276 					ryylval = 10+ch-'0';
277 				else
278 					ungetc(ch);
279 			}
280 			/*
281 			 *	God only knows what the original author
282 			 *	wanted this undocumented feature to
283 			 *	do.
284 			 *		%5++ is really  r7
285 			 */
286 			while(INCHARSET( (ch = getchar()), SIGN)) {
287 				if (ch=='+')
288 					ryylval++;
289 				else
290 					ryylval--;
291 			}
292 			ungetc(ch);
293 			val = REG;
294 		} else {
295 			ungetc(ch);
296 			val = REGOP;
297 		}
298 		goto ret;
299 
300 	case ALPH:
301 		ch1 = ch;
302 		if (INCHARSET(ch, SZSPECBEGIN)){
303 			if( (ch = getchar()) == '`' || ch == '^'){
304 				ch1 |= 0100;	/*convert to lower*/
305 				switch(ch1){
306 				case 'b':	ryylval = 1;	break;
307 				case 'w':	ryylval = 2;	break;
308 				case 'l':	ryylval = 4;	break;
309 				default:	ryylval = d124;	break;
310 				}
311 				val = SIZESPEC;
312 				goto ret;
313 			} else {
314 				ungetc(ch);
315 				ch = ch1;	/*restore first character*/
316 			}
317 		}
318 		rcp = yytext;
319 		do {
320 			if (rcp < &yytext[NCPName])
321 				*rcp++ = ch;
322 		} while (INCHARSET ( (ch = getchar()), ALPHA | DIGIT));
323 		*rcp = '\0';
324 		while (INCHARSET(ch, SPACE))
325 			ch = getchar();
326 		ungetc(ch);
327 
328 		switch((op = *lookup(1))->s_tag){
329 		case 0:
330 		case LABELID:
331 			/*
332 			 *	Its a name... (Labels are subsets ofname)
333 			 */
334 			ryylval = (int)op;
335 			val = NAME;
336 			break;
337 		case INST0:
338 		case INSTn:
339 		case IJXXX:
340 			opstruct.Op_popcode = ( (struct instab *)op)->i_popcode;
341 			opstruct.Op_eopcode = ( (struct instab *)op)->i_eopcode;
342 			val = op->s_tag;
343 			break;
344 		default:
345 			ryylval = ( (struct instab *)op)->i_popcode;
346 			val = op->s_tag;
347 			break;
348 		}
349 		goto ret;
350 
351 	case DIG:
352 		/*
353 		 *	Implement call by reference on a reg variable
354 		 */
355 		cp = inbufptr;
356 		val = number(ch, &cp);
357 		/*
358 		 *	yylval or yybignum has been stuffed as a side
359 		 *	effect to number(); get the global yylval
360 		 *	into our fast local copy in case it was an INT.
361 		 */
362 		ryylval = yylval;
363 		inbufptr = cp;
364 		goto ret;
365 
366 	case LSH:
367 	case RSH:
368 		/*
369 		 *	We allow the C style operators
370 		 *	<< and >>, as well as < and >
371 		 */
372 		if ( (ch1 = getchar()) != ch)
373 			ungetc(ch1);
374 		val = ryylval;
375 		goto ret;
376 
377 	case MINUS:
378 		if ( (ch = getchar()) =='(')
379 			ryylval=val=MP;
380 		else {
381 			ungetc(ch);
382 			val=MINUS;
383 		}
384 		goto ret;
385 
386 	case SQ:
387 		if ((ryylval = getchar()) == '\n')
388 			scanlineno++;		/*not entirely correct*/
389 		val = INT;
390 		goto ret;
391 
392 	case DQ:
393 	   eatstr:
394 		linescrossed = 0;
395 		for(rcp = strtext, maxstrlg = NCPString; maxstrlg > 0; --maxstrlg){
396 		    switch(ch = getchar()){
397 		    case '"':
398 			goto tailDQ;
399 		    default:
400 		    stuff:
401 			pchar(rcp, ch);
402 			break;
403 		    case '\n':
404 			yywarning("New line in a string constant");
405 			scanlineno++;
406 			linescrossed++;
407 			ch = getchar();
408 			switch(ch){
409 			case NEEDCHAR:
410 				if ( (inbufptr = fillinbuffer()) != 0){
411 					ch = '\n';
412 					goto stuff;
413 				}
414 				/*FALLTHROUGH*/
415 			case EOFCHAR:
416 				pchar(rcp, '\n');
417 				ungetc(EOFCHAR);
418 				goto tailDQ;
419 			default:
420 				ungetc(ch);
421 				ch = '\n';
422 				goto stuff;
423 			}
424 			break;
425 
426 		    case '\\':
427 			ch = getchar();		/*skip the '\\'*/
428 			if ( INCHARSET(ch, BSESCAPE)){
429 				switch (ch){
430 				  case 'b':  ch = '\b'; goto stuff;
431 				  case 'f':  ch = '\f'; goto stuff;
432 				  case 'n':  ch = '\n'; goto stuff;
433 				  case 'r':  ch = '\r'; goto stuff;
434 				  case 't':  ch = '\t'; goto stuff;
435 				}
436 			}
437 			if ( !(INCHARSET(ch, OCTDIGIT)) )
438 				goto stuff;
439 			i = 0;
440 			intval = 0;
441 			while ( (i < 3) && (INCHARSET(ch, OCTDIGIT))){
442 				i++;
443 				intval <<= 3;
444 				intval += ch - '0';
445 				ch = getchar();
446 			}
447 			ungetc(ch);
448 			ch = (char)intval;
449 			goto stuff;
450 		    }
451 		}
452 	tailDQ: ;
453 		/*
454 		 *	account for any lines that were crossed
455 		 */
456 		if (linescrossed){
457 			ptoken(bufptr, ILINESKIP);
458 			pint(bufptr, linescrossed);
459 		}
460 		/*
461 		 *	put the string in strtext into the string pool
462 		 *
463 		 *	The value in ryylval points to the string;
464 		 *	the previous 2 bytes is the length of the string
465 		 *
466 		 *	Cheat: append a trailing null to the string
467 		 *	and then adjust the string length to ignore
468 		 *	the trailing null.  If any STRING client requires
469 		 *	the trailing null, the client can just change STRLEN
470 		 */
471 		val = STRING;
472 		*rcp++ = 0;
473 		ryylval = (int)savestr(strtext, rcp - strtext);
474 		STRLEN(((char *)ryylval)) -= 1;
475 		goto ret;
476 
477 	case BADCHAR:
478 		linescrossed = lineno;
479 		lineno = scanlineno;
480 		yyerror("Illegal character mapped: %d, char read:(octal) %o",
481 			ryylval, ch);
482 		lineno = linescrossed;
483 		val = BADCHAR;
484 		goto ret;
485 
486 	default:
487 		val = ryylval;
488 		goto ret;
489 	}	/*end of the switch*/
490 	/*
491 	 *	here with one token, so stuff it
492 	 */
493    ret:
494 	oval = val;
495 	ptoken(bufptr, val);
496 	switch(val){
497 		case	ILINESKIP:
498 				pint(bufptr, ryylval);
499 				break;
500 		case	SIZESPEC:
501 				pchar(bufptr, ryylval);
502 				break;
503 		case	BFINT:	plong(bufptr, ryylval);
504 				break;
505 		case	INT:	plong(bufptr, ryylval);
506 				break;
507 		case 	BIGNUM:	pnumber(bufptr, yybignum);
508 				break;
509 		case	STRING:	pptr(bufptr, (int)(char *)ryylval);
510 				break;
511 		case	NAME:	pptr(bufptr, (int)(struct symtab *)ryylval);
512 				break;
513 		case	REG:	pchar(bufptr, ryylval);
514 				break;
515 		case	INST0:
516 		case	INSTn:
517 				popcode(bufptr, opstruct);
518 				break;
519 		case 	IJXXX:
520 				popcode(bufptr, opstruct);
521 				pptr(bufptr, (int)(struct symtab *)symalloc());
522 				break;
523 		case	ISTAB:
524 		case	ISTABSTR:
525 		case	ISTABNONE:
526 		case	ISTABDOT:
527 		case	IALIGN:
528 				pptr(bufptr, (int)(struct symtab *)symalloc());
529 				break;
530 	/*
531 	 *	default:
532 	 */
533 	 }
534 	 builtval: ;
535    }			/*end of the while to stuff the buffer*/
536    done:
537 	bufferbox->tok_count = (bytetoktype *)bufptr - &(bufferbox->toks[0]);
538 
539 	/*
540 	 *	This is a real kludge:
541 	 *
542 	 *	We put the last token in the buffer to be  a MINUS
543 	 *	symbol.  This last token will never be picked up
544 	 *	in the normal way, but can be looked at during
545 	 *	a peekahead look that the short circuit expression
546 	 *	evaluator uses to see if an expression is complicated.
547 	 *
548 	 *	Consider the following situation:
549 	 *
550 	 *	.word	45		+	47
551 	 *        buffer 1      |  buffer 0
552 	 *	the peekahead would want to look across the buffer,
553 	 *	but will look in the buffer end zone, see the minus, and
554 	 *	fail.
555 	 */
556 	ptoken(bufptr, MINUS);
557 	InBufPtr = inbufptr;		/*copy this back*/
558 }
559