1*40591Sbostic /*
2*40591Sbostic * Copyright (c) 1982 Regents of the University of California
3*40591Sbostic */
4*40591Sbostic #ifndef lint
5*40591Sbostic static char sccsid[] = "@(#)asscan2.c 4.14 7/6/83";
6*40591Sbostic #endif not lint
7*40591Sbostic
8*40591Sbostic #include "asscanl.h"
9*40591Sbostic
10*40591Sbostic static inttoktype oval = NL;
11*40591Sbostic #define ASINBUFSIZ 4096
12*40591Sbostic char inbufunget[8];
13*40591Sbostic char inbuffer[ASINBUFSIZ];
14*40591Sbostic char *Ginbufptr = inbuffer;
15*40591Sbostic int Ginbufcnt = 0;
16*40591Sbostic int scannerhadeof;
17*40591Sbostic
fillinbuffer()18*40591Sbostic fillinbuffer()
19*40591Sbostic {
20*40591Sbostic int nread;
21*40591Sbostic int goal;
22*40591Sbostic int got;
23*40591Sbostic
24*40591Sbostic nread = 0;
25*40591Sbostic if (scannerhadeof == 0){
26*40591Sbostic goal = sizeof(inbuffer);
27*40591Sbostic do {
28*40591Sbostic got = read(stdin->_file, inbuffer + nread, goal);
29*40591Sbostic if (got == 0)
30*40591Sbostic scannerhadeof = 1;
31*40591Sbostic if (got <= 0)
32*40591Sbostic break;
33*40591Sbostic nread += got;
34*40591Sbostic goal -= got;
35*40591Sbostic } while (goal);
36*40591Sbostic } else {
37*40591Sbostic scannerhadeof = 0;
38*40591Sbostic }
39*40591Sbostic /*
40*40591Sbostic * getchar assumes that Ginbufcnt and Ginbufptr
41*40591Sbostic * are adjusted as if one character has been removed
42*40591Sbostic * from the input.
43*40591Sbostic */
44*40591Sbostic if (nread == 0){
45*40591Sbostic inbuffer[0] = EOFCHAR;
46*40591Sbostic nread = 1;
47*40591Sbostic }
48*40591Sbostic Ginbufcnt = nread - 1;
49*40591Sbostic Ginbufptr = inbuffer + 1;
50*40591Sbostic }
51*40591Sbostic
52*40591Sbostic scan_dot_s(bufferbox)
53*40591Sbostic struct tokbufdesc *bufferbox;
54*40591Sbostic {
55*40591Sbostic reg char *inbufptr;
56*40591Sbostic reg int inbufcnt;
57*40591Sbostic reg int ryylval; /* local copy of lexical value */
58*40591Sbostic extern int yylval; /* global copy of lexical value */
59*40591Sbostic reg int val; /* the value returned */
60*40591Sbostic int i; /* simple counter */
61*40591Sbostic reg char *rcp;
62*40591Sbostic int ch; /* treated as a character */
63*40591Sbostic int ch1; /* shadow value */
64*40591Sbostic struct symtab *op;
65*40591Sbostic reg ptrall bufptr; /* where to stuff tokens */
66*40591Sbostic ptrall bufub; /* where not to stuff tokens */
67*40591Sbostic long intval; /* value of int */
68*40591Sbostic int linescrossed; /* when doing strings and comments */
69*40591Sbostic u_char opstruct;
70*40591Sbostic reg int strlg; /* the length of a string */
71*40591Sbostic
72*40591Sbostic (bytetoktype *)bufptr = (bytetoktype *) & (bufferbox->toks[0]);
73*40591Sbostic (bytetoktype *)bufub = &(bufferbox->toks[AVAILTOKS]);
74*40591Sbostic
75*40591Sbostic MEMTOREGBUF;
76*40591Sbostic if (newfflag){
77*40591Sbostic newfflag = 0;
78*40591Sbostic ryylval = (int)savestr(newfname, strlen(newfname)+1, STR_BOTH);
79*40591Sbostic
80*40591Sbostic ptoken(bufptr, IFILE);
81*40591Sbostic ptoken(bufptr, STRING);
82*40591Sbostic pptr(bufptr, ryylval);
83*40591Sbostic
84*40591Sbostic ptoken(bufptr, ILINENO);
85*40591Sbostic ptoken(bufptr, INT);
86*40591Sbostic pint(bufptr, 1);
87*40591Sbostic }
88*40591Sbostic
89*40591Sbostic while (bufptr < bufub){
90*40591Sbostic loop:
91*40591Sbostic switch(ryylval = (type+1)[ch = getchar()]) {
92*40591Sbostic case SCANEOF:
93*40591Sbostic endoffile: ;
94*40591Sbostic inbufptr = 0;
95*40591Sbostic ptoken(bufptr, PARSEEOF);
96*40591Sbostic goto done;
97*40591Sbostic
98*40591Sbostic case DIV: /*process C style comments*/
99*40591Sbostic if ( (ch = getchar()) == '*') { /*comment prelude*/
100*40591Sbostic int incomment;
101*40591Sbostic linescrossed = 0;
102*40591Sbostic incomment = 1;
103*40591Sbostic ch = getchar(); /*skip over the * */
104*40591Sbostic while(incomment){
105*40591Sbostic switch(ch){
106*40591Sbostic case '*':
107*40591Sbostic ch = getchar();
108*40591Sbostic incomment = (ch != '/');
109*40591Sbostic break;
110*40591Sbostic case '\n':
111*40591Sbostic scanlineno++;
112*40591Sbostic linescrossed++;
113*40591Sbostic ch = getchar();
114*40591Sbostic break;
115*40591Sbostic case EOFCHAR:
116*40591Sbostic goto endoffile;
117*40591Sbostic default:
118*40591Sbostic ch = getchar();
119*40591Sbostic break;
120*40591Sbostic }
121*40591Sbostic }
122*40591Sbostic val = ILINESKIP;
123*40591Sbostic ryylval = linescrossed;
124*40591Sbostic goto ret;
125*40591Sbostic } else { /*just an ordinary DIV*/
126*40591Sbostic ungetc(ch);
127*40591Sbostic val = ryylval = DIV;
128*40591Sbostic goto ret;
129*40591Sbostic }
130*40591Sbostic case SH:
131*40591Sbostic if (oval == NL){
132*40591Sbostic /*
133*40591Sbostic * Attempt to recognize a C preprocessor
134*40591Sbostic * style comment '^#[ \t]*[0-9]*[ \t]*".*"
135*40591Sbostic */
136*40591Sbostic ch = getchar(); /*bump the #*/
137*40591Sbostic while (INCHARSET(ch, SPACE))
138*40591Sbostic ch = getchar();/*bump white */
139*40591Sbostic if (INCHARSET(ch, DIGIT)){
140*40591Sbostic intval = 0;
141*40591Sbostic while(INCHARSET(ch, DIGIT)){
142*40591Sbostic intval = intval*10 + ch - '0';
143*40591Sbostic ch = getchar();
144*40591Sbostic }
145*40591Sbostic while (INCHARSET(ch, SPACE))
146*40591Sbostic ch = getchar();
147*40591Sbostic if (ch == '"' || ch == '\n'){
148*40591Sbostic ptoken(bufptr, ILINENO);
149*40591Sbostic ptoken(bufptr, INT);
150*40591Sbostic pint(bufptr, intval - 1);
151*40591Sbostic if (ch == '"')
152*40591Sbostic {
153*40591Sbostic ptoken(bufptr, IFILE);
154*40591Sbostic /*
155*40591Sbostic * The '"' has already been
156*40591Sbostic * munched
157*40591Sbostic *
158*40591Sbostic * eatstr will not eat
159*40591Sbostic * the trailing \n, so
160*40591Sbostic * it is given to the parser
161*40591Sbostic * and counted.
162*40591Sbostic */
163*40591Sbostic goto eatstr;
164*40591Sbostic }
165*40591Sbostic }
166*40591Sbostic }
167*40591Sbostic }
168*40591Sbostic /*
169*40591Sbostic * Well, its just an ordinary decadent comment
170*40591Sbostic */
171*40591Sbostic while ((ch != '\n') && (ch != EOFCHAR))
172*40591Sbostic ch = getchar();
173*40591Sbostic if (ch == EOFCHAR)
174*40591Sbostic goto endoffile;
175*40591Sbostic val = ryylval = oval = NL;
176*40591Sbostic scanlineno++;
177*40591Sbostic goto ret;
178*40591Sbostic
179*40591Sbostic case NL:
180*40591Sbostic scanlineno++;
181*40591Sbostic val = ryylval;
182*40591Sbostic goto ret;
183*40591Sbostic
184*40591Sbostic case SP:
185*40591Sbostic oval = SP; /*invalidate ^# meta comments*/
186*40591Sbostic goto loop;
187*40591Sbostic
188*40591Sbostic case REGOP: /* % , could be used as modulo, or register*/
189*40591Sbostic ch = getchar();
190*40591Sbostic if (INCHARSET(ch, DIGIT)){
191*40591Sbostic ryylval = ch-'0';
192*40591Sbostic if (ch=='1') {
193*40591Sbostic if (INCHARSET( (ch = getchar()), REGDIGIT))
194*40591Sbostic ryylval = 10+ch-'0';
195*40591Sbostic else
196*40591Sbostic ungetc(ch);
197*40591Sbostic }
198*40591Sbostic /*
199*40591Sbostic * God only knows what the original author
200*40591Sbostic * wanted this undocumented feature to
201*40591Sbostic * do.
202*40591Sbostic * %5++ is really r7
203*40591Sbostic */
204*40591Sbostic while(INCHARSET( (ch = getchar()), SIGN)) {
205*40591Sbostic if (ch=='+')
206*40591Sbostic ryylval++;
207*40591Sbostic else
208*40591Sbostic ryylval--;
209*40591Sbostic }
210*40591Sbostic ungetc(ch);
211*40591Sbostic val = REG;
212*40591Sbostic } else {
213*40591Sbostic ungetc(ch);
214*40591Sbostic val = REGOP;
215*40591Sbostic }
216*40591Sbostic goto ret;
217*40591Sbostic
218*40591Sbostic case ALPH:
219*40591Sbostic ch1 = ch;
220*40591Sbostic if (INCHARSET(ch, SZSPECBEGIN)){
221*40591Sbostic if( (ch = getchar()) == '`' || ch == '^'){
222*40591Sbostic ch1 |= 0100; /*convert to lower*/
223*40591Sbostic switch(ch1){
224*40591Sbostic case 'b': ryylval = 1; break;
225*40591Sbostic case 'w': ryylval = 2; break;
226*40591Sbostic case 'l': ryylval = 4; break;
227*40591Sbostic default: ryylval = d124; break;
228*40591Sbostic }
229*40591Sbostic val = SIZESPEC;
230*40591Sbostic goto ret;
231*40591Sbostic } else {
232*40591Sbostic ungetc(ch);
233*40591Sbostic ch = ch1; /*restore first character*/
234*40591Sbostic }
235*40591Sbostic }
236*40591Sbostic rcp = yytext;
237*40591Sbostic do {
238*40591Sbostic if (rcp < &yytext[NCPName])
239*40591Sbostic *rcp++ = ch;
240*40591Sbostic } while (INCHARSET ( (ch = getchar()), ALPHA | DIGIT));
241*40591Sbostic *rcp = '\0';
242*40591Sbostic while (INCHARSET(ch, SPACE))
243*40591Sbostic ch = getchar();
244*40591Sbostic ungetc(ch);
245*40591Sbostic
246*40591Sbostic switch((op = *lookup(1))->s_tag){
247*40591Sbostic case 0:
248*40591Sbostic case LABELID:
249*40591Sbostic /*
250*40591Sbostic * Its a name... (Labels are subsets of name)
251*40591Sbostic */
252*40591Sbostic ryylval = (int)op;
253*40591Sbostic val = NAME;
254*40591Sbostic break;
255*40591Sbostic case INST0:
256*40591Sbostic case INSTn:
257*40591Sbostic case IJXXX:
258*40591Sbostic opstruct = ( (struct instab *)op)->i_opcode;
259*40591Sbostic val = op->s_tag;
260*40591Sbostic break;
261*40591Sbostic default:
262*40591Sbostic ryylval = ( (struct instab *)op)->i_opcode;
263*40591Sbostic val = op->s_tag;
264*40591Sbostic break;
265*40591Sbostic }
266*40591Sbostic goto ret;
267*40591Sbostic
268*40591Sbostic case DIG:
269*40591Sbostic /*
270*40591Sbostic * restore local inbufptr and inbufcnt
271*40591Sbostic */
272*40591Sbostic REGTOMEMBUF;
273*40591Sbostic val = number(ch);
274*40591Sbostic MEMTOREGBUF;
275*40591Sbostic /*
276*40591Sbostic * yylval or yybignum has been stuffed as a side
277*40591Sbostic * effect to number(); get the global yylval
278*40591Sbostic * into our fast local copy in case it was an INT.
279*40591Sbostic */
280*40591Sbostic ryylval = yylval;
281*40591Sbostic goto ret;
282*40591Sbostic
283*40591Sbostic case LSH:
284*40591Sbostic case RSH:
285*40591Sbostic /*
286*40591Sbostic * We allow the C style operators
287*40591Sbostic * << and >>, as well as < and >
288*40591Sbostic */
289*40591Sbostic if ( (ch1 = getchar()) != ch)
290*40591Sbostic ungetc(ch1);
291*40591Sbostic val = ryylval;
292*40591Sbostic goto ret;
293*40591Sbostic
294*40591Sbostic case MINUS:
295*40591Sbostic if ( (ch = getchar()) =='(')
296*40591Sbostic ryylval=val=MP;
297*40591Sbostic else {
298*40591Sbostic ungetc(ch);
299*40591Sbostic val=MINUS;
300*40591Sbostic }
301*40591Sbostic goto ret;
302*40591Sbostic
303*40591Sbostic case SQ:
304*40591Sbostic if ((ryylval = getchar()) == '\n')
305*40591Sbostic scanlineno++; /*not entirely correct*/
306*40591Sbostic val = INT;
307*40591Sbostic goto ret;
308*40591Sbostic
309*40591Sbostic case DQ:
310*40591Sbostic eatstr:
311*40591Sbostic linescrossed = 0;
312*40591Sbostic for (strlg = 0; /*VOID*/; strlg++){
313*40591Sbostic switch(ch = getchar()){
314*40591Sbostic case '"':
315*40591Sbostic goto tailDQ;
316*40591Sbostic default:
317*40591Sbostic stuff:
318*40591Sbostic putc(ch, strfile);
319*40591Sbostic break;
320*40591Sbostic case '\n':
321*40591Sbostic yywarning("New line in a string constant");
322*40591Sbostic scanlineno++;
323*40591Sbostic linescrossed++;
324*40591Sbostic ch = getchar();
325*40591Sbostic switch(ch){
326*40591Sbostic case EOFCHAR:
327*40591Sbostic putc('\n', strfile);
328*40591Sbostic ungetc(EOFCHAR);
329*40591Sbostic goto tailDQ;
330*40591Sbostic default:
331*40591Sbostic ungetc(ch);
332*40591Sbostic ch = '\n';
333*40591Sbostic goto stuff;
334*40591Sbostic }
335*40591Sbostic break;
336*40591Sbostic
337*40591Sbostic case '\\':
338*40591Sbostic ch = getchar(); /*skip the '\\'*/
339*40591Sbostic if ( INCHARSET(ch, BSESCAPE)){
340*40591Sbostic switch (ch){
341*40591Sbostic case 'b': ch = '\b'; goto stuff;
342*40591Sbostic case 'f': ch = '\f'; goto stuff;
343*40591Sbostic case 'n': ch = '\n'; goto stuff;
344*40591Sbostic case 'r': ch = '\r'; goto stuff;
345*40591Sbostic case 't': ch = '\t'; goto stuff;
346*40591Sbostic }
347*40591Sbostic }
348*40591Sbostic if ( !(INCHARSET(ch, OCTDIGIT)) )
349*40591Sbostic goto stuff;
350*40591Sbostic i = 0;
351*40591Sbostic intval = 0;
352*40591Sbostic while ( (i < 3) && (INCHARSET(ch, OCTDIGIT))){
353*40591Sbostic i++;
354*40591Sbostic intval <<= 3;
355*40591Sbostic intval += ch - '0';
356*40591Sbostic ch = getchar();
357*40591Sbostic }
358*40591Sbostic ungetc(ch);
359*40591Sbostic ch = (char)intval;
360*40591Sbostic goto stuff;
361*40591Sbostic }
362*40591Sbostic }
363*40591Sbostic tailDQ: ;
364*40591Sbostic /*
365*40591Sbostic * account for any lines that were crossed
366*40591Sbostic */
367*40591Sbostic if (linescrossed){
368*40591Sbostic ptoken(bufptr, ILINESKIP);
369*40591Sbostic pint(bufptr, linescrossed);
370*40591Sbostic }
371*40591Sbostic /*
372*40591Sbostic * Cheat: append a trailing null to the string
373*40591Sbostic * and then adjust the string length to ignore
374*40591Sbostic * the trailing null. If any STRING client requires
375*40591Sbostic * the trailing null, the client can just change STRLEN
376*40591Sbostic */
377*40591Sbostic putc(0, strfile);
378*40591Sbostic ryylval = (int)savestr((char *)0, strlg + 1, STR_FILE);
379*40591Sbostic val = STRING;
380*40591Sbostic ((struct strdesc *)ryylval)->sd_strlen -= 1;
381*40591Sbostic goto ret;
382*40591Sbostic
383*40591Sbostic case BADCHAR:
384*40591Sbostic linescrossed = lineno;
385*40591Sbostic lineno = scanlineno;
386*40591Sbostic yyerror("Illegal character mapped: %d, char read:(octal) %o",
387*40591Sbostic ryylval, ch);
388*40591Sbostic lineno = linescrossed;
389*40591Sbostic val = BADCHAR;
390*40591Sbostic goto ret;
391*40591Sbostic
392*40591Sbostic default:
393*40591Sbostic val = ryylval;
394*40591Sbostic goto ret;
395*40591Sbostic } /*end of the switch*/
396*40591Sbostic /*
397*40591Sbostic * here with one token, so stuff it
398*40591Sbostic */
399*40591Sbostic ret:
400*40591Sbostic oval = val;
401*40591Sbostic ptoken(bufptr, val);
402*40591Sbostic switch(val){
403*40591Sbostic case ILINESKIP:
404*40591Sbostic pint(bufptr, ryylval);
405*40591Sbostic break;
406*40591Sbostic case SIZESPEC:
407*40591Sbostic pchar(bufptr, ryylval);
408*40591Sbostic break;
409*40591Sbostic case BFINT: plong(bufptr, ryylval);
410*40591Sbostic break;
411*40591Sbostic case INT: plong(bufptr, ryylval);
412*40591Sbostic break;
413*40591Sbostic case BIGNUM: pnumber(bufptr, yybignum);
414*40591Sbostic break;
415*40591Sbostic case STRING: pptr(bufptr, (int)(char *)ryylval);
416*40591Sbostic break;
417*40591Sbostic case NAME: pptr(bufptr, (int)(struct symtab *)ryylval);
418*40591Sbostic break;
419*40591Sbostic case REG: pchar(bufptr, ryylval);
420*40591Sbostic break;
421*40591Sbostic case INST0:
422*40591Sbostic case INSTn:
423*40591Sbostic popcode(bufptr, opstruct);
424*40591Sbostic break;
425*40591Sbostic case IJXXX:
426*40591Sbostic popcode(bufptr, opstruct);
427*40591Sbostic pptr(bufptr, (int)(struct symtab *)symalloc());
428*40591Sbostic break;
429*40591Sbostic case ISTAB:
430*40591Sbostic case ISTABSTR:
431*40591Sbostic case ISTABNONE:
432*40591Sbostic case ISTABDOT:
433*40591Sbostic case IALIGN:
434*40591Sbostic pptr(bufptr, (int)(struct symtab *)symalloc());
435*40591Sbostic break;
436*40591Sbostic /*
437*40591Sbostic * default:
438*40591Sbostic */
439*40591Sbostic }
440*40591Sbostic builtval: ;
441*40591Sbostic } /*end of the while to stuff the buffer*/
442*40591Sbostic done:
443*40591Sbostic bufferbox->tok_count = (bytetoktype *)bufptr - &(bufferbox->toks[0]);
444*40591Sbostic /*
445*40591Sbostic * This is a real kludge:
446*40591Sbostic *
447*40591Sbostic * We put the last token in the buffer to be a MINUS
448*40591Sbostic * symbol. This last token will never be picked up
449*40591Sbostic * in the normal way, but can be looked at during
450*40591Sbostic * a peekahead look that the short circuit expression
451*40591Sbostic * evaluator uses to see if an expression is complicated.
452*40591Sbostic *
453*40591Sbostic * Consider the following situation:
454*40591Sbostic *
455*40591Sbostic * .word 45 + 47
456*40591Sbostic * buffer 1 | buffer 0
457*40591Sbostic * the peekahead would want to look across the buffer,
458*40591Sbostic * but will look in the buffer end zone, see the minus, and
459*40591Sbostic * fail.
460*40591Sbostic */
461*40591Sbostic ptoken(bufptr, MINUS);
462*40591Sbostic REGTOMEMBUF;
463*40591Sbostic }
464