1*5798Srrh /* 2*5798Srrh * Copyright (c) 1982 Regents of the University of California 3*5798Srrh */ 4*5798Srrh #ifndef lint 5*5798Srrh static char sccsid[] = "@(#)asscan2.c 4.1 02/14/82"; 6*5798Srrh #endif not lint 7*5798Srrh 8*5798Srrh #include "asscanl.h" 9*5798Srrh static inttoktype oval = NL; 10*5798Srrh 11*5798Srrh #define NINBUFFERS 2 12*5798Srrh #define INBUFLG NINBUFFERS*BUFSIZ + 2 13*5798Srrh /* 14*5798Srrh * We have two input buffers; the first one is reserved 15*5798Srrh * for catching the tail of a line split across a buffer 16*5798Srrh * boundary; the other one are used for snarfing a buffer 17*5798Srrh * worth of .s source. 18*5798Srrh */ 19*5798Srrh static char inbuffer[INBUFLG]; 20*5798Srrh static char *InBufPtr = 0; 21*5798Srrh 22*5798Srrh /* 23*5798Srrh * fill the inbuffer from the standard input. 24*5798Srrh * Assert: there are always n COMPLETE! lines in the buffer area. 25*5798Srrh * Assert: there is always a \n terminating the last line 26*5798Srrh * in the buffer area. 27*5798Srrh * Assert: after the \n, there is an EOFCHAR (hard end of file) 28*5798Srrh * or a NEEDCHAR (end of buffer) 29*5798Srrh * Assert: fgets always null pads the string it reads. 30*5798Srrh * Assert: no ungetc's are done at the end of a line or at the 31*5798Srrh * beginning of a line. 32*5798Srrh * 33*5798Srrh * We read a complete buffer of characters in one single read. 34*5798Srrh * We then back scan within this buffer to find the end of the 35*5798Srrh * last complete line, and force the assertions, and save a pointer 36*5798Srrh * to the incomplete line. 37*5798Srrh * The next call to fillinbuffer will move the unread characters 38*5798Srrh * to the end of the first buffer, and then read another two buffers, 39*5798Srrh * completing the cycle. 40*5798Srrh */ 41*5798Srrh 42*5798Srrh static char p_swapped = '\0'; 43*5798Srrh static char *p_start = &inbuffer[NINBUFFERS * BUFSIZ]; 44*5798Srrh static char *p_stop = &inbuffer[NINBUFFERS * BUFSIZ]; 45*5798Srrh char *fillinbuffer() 46*5798Srrh { 47*5798Srrh register char *to; 48*5798Srrh register char *from; 49*5798Srrh char *inbufptr; 50*5798Srrh int nread; 51*5798Srrh 52*5798Srrh *p_start = p_swapped; 53*5798Srrh inbufptr = &inbuffer[1*BUFSIZ] - (p_stop - p_start); 54*5798Srrh 55*5798Srrh for (to = inbufptr, from = p_start; from < p_stop;) 56*5798Srrh *to++ = *from++; 57*5798Srrh /* 58*5798Srrh * Now, go read two full buffers (hopefully) 59*5798Srrh */ 60*5798Srrh nread = read(stdin->_file, &inbuffer[1*BUFSIZ], (NINBUFFERS - 1)*BUFSIZ); 61*5798Srrh if (nread == 0) 62*5798Srrh return(0); 63*5798Srrh p_stop = from = &inbuffer[1*BUFSIZ + nread]; 64*5798Srrh *from = '\0'; 65*5798Srrh while (*--from != '\n') /* back over the partial line */ 66*5798Srrh continue; 67*5798Srrh from++; /* first char of partial line */ 68*5798Srrh p_start = from; 69*5798Srrh p_swapped = *p_start; 70*5798Srrh *p_start = NEEDCHAR; /* force assertion */ 71*5798Srrh return(inbufptr); 72*5798Srrh } 73*5798Srrh 74*5798Srrh scan_dot_s(bufferbox) 75*5798Srrh struct tokbufdesc *bufferbox; 76*5798Srrh { 77*5798Srrh reg int ryylval; /* local copy of lexical value */ 78*5798Srrh extern int yylval; /* global copy of lexical value */ 79*5798Srrh reg int val; /* the value returned */ 80*5798Srrh int i; /* simple counter */ 81*5798Srrh reg char *rcp; 82*5798Srrh char *cp; /* can have address taken */ 83*5798Srrh reg int ch; /* treated as a character */ 84*5798Srrh int ch1; /* shadow value */ 85*5798Srrh reg char *inbufptr; 86*5798Srrh struct symtab *op; 87*5798Srrh 88*5798Srrh reg ptrall bufptr; /* where to stuff tokens */ 89*5798Srrh ptrall lgbackpatch; /* where to stuff a string length */ 90*5798Srrh ptrall bufub; /* where not to stuff tokens */ 91*5798Srrh int maxstrlg; /* how long a string can be */ 92*5798Srrh long intval; /* value of int */ 93*5798Srrh int linescrossed; /* when doing strings and comments */ 94*5798Srrh struct Opcode opstruct; 95*5798Srrh 96*5798Srrh (bytetoktype *)bufptr = (bytetoktype *) & (bufferbox->toks[0]); 97*5798Srrh (bytetoktype *)bufub = &(bufferbox->toks[AVAILTOKS]); 98*5798Srrh 99*5798Srrh inbufptr = InBufPtr; 100*5798Srrh if (inbufptr == 0){ 101*5798Srrh inbufptr = fillinbuffer(); 102*5798Srrh if (inbufptr == 0){ /*end of file*/ 103*5798Srrh endoffile: 104*5798Srrh inbufptr = 0; 105*5798Srrh ptoken(bufptr, PARSEEOF); 106*5798Srrh goto done; 107*5798Srrh } 108*5798Srrh } 109*5798Srrh 110*5798Srrh if (newfflag){ 111*5798Srrh ptoken(bufptr, IFILE); 112*5798Srrh ptoken(bufptr, STRING); 113*5798Srrh val = strlen(newfname) + 1; 114*5798Srrh movestr( (char *)&( ( (lgtype *)bufptr)[1]), newfname, val); 115*5798Srrh bstrlg(bufptr, val); 116*5798Srrh 117*5798Srrh ptoken(bufptr, ILINENO); 118*5798Srrh ptoken(bufptr, INT); 119*5798Srrh pint(bufptr, 1); 120*5798Srrh newfflag = 0; 121*5798Srrh } 122*5798Srrh 123*5798Srrh while (bufptr < bufub){ 124*5798Srrh loop: 125*5798Srrh switch(ryylval = (type+2)[ch = getchar()]) { 126*5798Srrh case SCANEOF: 127*5798Srrh inbufptr = 0; 128*5798Srrh goto endoffile; 129*5798Srrh 130*5798Srrh case NEEDSBUF: 131*5798Srrh inbufptr = fillinbuffer(); 132*5798Srrh if (inbufptr == 0) 133*5798Srrh goto endoffile; 134*5798Srrh goto loop; 135*5798Srrh 136*5798Srrh case DIV: /*process C style comments*/ 137*5798Srrh if ( (ch = getchar()) == '*') { /*comment prelude*/ 138*5798Srrh int incomment; 139*5798Srrh linescrossed = 0; 140*5798Srrh incomment = 1; 141*5798Srrh ch = getchar(); /*skip over the * */ 142*5798Srrh while(incomment){ 143*5798Srrh switch(ch){ 144*5798Srrh case '*': 145*5798Srrh ch = getchar(); 146*5798Srrh incomment = (ch != '/'); 147*5798Srrh break; 148*5798Srrh case '\n': 149*5798Srrh scanlineno++; 150*5798Srrh linescrossed++; 151*5798Srrh ch = getchar(); 152*5798Srrh break; 153*5798Srrh case EOFCHAR: 154*5798Srrh goto endoffile; 155*5798Srrh case NEEDCHAR: 156*5798Srrh inbufptr = fillinbuffer(); 157*5798Srrh if (inbufptr == 0) 158*5798Srrh goto endoffile; 159*5798Srrh lineno++; 160*5798Srrh ch = getchar(); 161*5798Srrh break; 162*5798Srrh default: 163*5798Srrh ch = getchar(); 164*5798Srrh break; 165*5798Srrh } 166*5798Srrh } 167*5798Srrh val = ILINESKIP; 168*5798Srrh ryylval = linescrossed; 169*5798Srrh goto ret; 170*5798Srrh } else { /*just an ordinary DIV*/ 171*5798Srrh ungetc(ch); 172*5798Srrh val = ryylval = DIV; 173*5798Srrh goto ret; 174*5798Srrh } 175*5798Srrh case SH: 176*5798Srrh if (oval == NL){ 177*5798Srrh /* 178*5798Srrh * Attempt to recognize a C preprocessor 179*5798Srrh * style comment '^#[ \t]*[0-9]*[ \t]*".*" 180*5798Srrh */ 181*5798Srrh ch = getchar(); /*bump the #*/ 182*5798Srrh while (INCHARSET(ch, SPACE)) 183*5798Srrh ch = getchar();/*bump white */ 184*5798Srrh if (INCHARSET(ch, DIGIT)){ 185*5798Srrh intval = 0; 186*5798Srrh while(INCHARSET(ch, DIGIT)){ 187*5798Srrh intval = intval*10 + ch - '0'; 188*5798Srrh ch = getchar(); 189*5798Srrh } 190*5798Srrh while (INCHARSET(ch, SPACE)) 191*5798Srrh ch = getchar(); 192*5798Srrh if (ch == '"'){ 193*5798Srrh ptoken(bufptr, ILINENO); 194*5798Srrh ptoken(bufptr, INT); 195*5798Srrh pint(bufptr, intval - 1); 196*5798Srrh ptoken(bufptr, IFILE); 197*5798Srrh /* 198*5798Srrh * The '"' has already been 199*5798Srrh * munched 200*5798Srrh * 201*5798Srrh * eatstr will not eat 202*5798Srrh * the trailing \n, so 203*5798Srrh * it is given to the parser 204*5798Srrh * and counted. 205*5798Srrh */ 206*5798Srrh goto eatstr; 207*5798Srrh } 208*5798Srrh } 209*5798Srrh } 210*5798Srrh /* 211*5798Srrh * Well, its just an ordinary decadent comment 212*5798Srrh */ 213*5798Srrh while ((ch != '\n') && (ch != EOFCHAR)) 214*5798Srrh ch = getchar(); 215*5798Srrh if (ch == EOFCHAR) 216*5798Srrh goto endoffile; 217*5798Srrh val = ryylval = oval = NL; 218*5798Srrh scanlineno++; 219*5798Srrh goto ret; 220*5798Srrh 221*5798Srrh case NL: 222*5798Srrh scanlineno++; 223*5798Srrh val = ryylval; 224*5798Srrh goto ret; 225*5798Srrh 226*5798Srrh case SP: 227*5798Srrh oval = SP; /*invalidate ^# meta comments*/ 228*5798Srrh goto loop; 229*5798Srrh 230*5798Srrh case REGOP: /* % , could be used as modulo, or register*/ 231*5798Srrh ch = getchar(); 232*5798Srrh if (INCHARSET(ch, DIGIT)){ 233*5798Srrh ryylval = ch-'0'; 234*5798Srrh if (ch=='1') { 235*5798Srrh if (INCHARSET( (ch = getchar()), REGDIGIT)) 236*5798Srrh ryylval = 10+ch-'0'; 237*5798Srrh else 238*5798Srrh ungetc(ch); 239*5798Srrh } 240*5798Srrh /* 241*5798Srrh * God only knows what the original author 242*5798Srrh * wanted this undocumented feature to 243*5798Srrh * do. 244*5798Srrh * %5++ is really r7 245*5798Srrh */ 246*5798Srrh while(INCHARSET( (ch = getchar()), SIGN)) { 247*5798Srrh if (ch=='+') 248*5798Srrh ryylval++; 249*5798Srrh else 250*5798Srrh ryylval--; 251*5798Srrh } 252*5798Srrh ungetc(ch); 253*5798Srrh val = REG; 254*5798Srrh } else { 255*5798Srrh ungetc(ch); 256*5798Srrh val = REGOP; 257*5798Srrh } 258*5798Srrh goto ret; 259*5798Srrh 260*5798Srrh case ALPH: 261*5798Srrh ch1 = ch; 262*5798Srrh if (INCHARSET(ch, SZSPECBEGIN)){ 263*5798Srrh if( (ch = getchar()) == '`' || ch == '^'){ 264*5798Srrh ch1 |= 0100; /*convert to lower*/ 265*5798Srrh switch(ch1){ 266*5798Srrh case 'b': ryylval = 1; break; 267*5798Srrh case 'w': ryylval = 2; break; 268*5798Srrh case 'l': ryylval = 4; break; 269*5798Srrh default: ryylval = d124; break; 270*5798Srrh } 271*5798Srrh val = SIZESPEC; 272*5798Srrh goto ret; 273*5798Srrh } else { 274*5798Srrh ungetc(ch); 275*5798Srrh ch = ch1; /*restore first character*/ 276*5798Srrh } 277*5798Srrh } 278*5798Srrh rcp = yytext; 279*5798Srrh do { 280*5798Srrh if (rcp < &yytext[NCPS]) 281*5798Srrh *rcp++ = ch; 282*5798Srrh } while (INCHARSET ( (ch = getchar()), ALPHA | DIGIT)); 283*5798Srrh *rcp = '\0'; 284*5798Srrh while (INCHARSET(ch, SPACE)) 285*5798Srrh ch = getchar(); 286*5798Srrh ungetc(ch); 287*5798Srrh 288*5798Srrh switch((op = *lookup(1))->s_tag){ 289*5798Srrh case 0: 290*5798Srrh case LABELID: 291*5798Srrh /* 292*5798Srrh * Its a name... (Labels are subsets ofname) 293*5798Srrh */ 294*5798Srrh ryylval = (int)op; 295*5798Srrh val = NAME; 296*5798Srrh break; 297*5798Srrh case INST0: 298*5798Srrh case INSTn: 299*5798Srrh case IJXXX: 300*5798Srrh opstruct.Op_popcode = ( (struct instab *)op)->i_popcode; 301*5798Srrh opstruct.Op_eopcode = ( (struct instab *)op)->i_eopcode; 302*5798Srrh val = op->s_tag; 303*5798Srrh break; 304*5798Srrh default: 305*5798Srrh ryylval = ( (struct instab *)op)->i_popcode; 306*5798Srrh val = op->s_tag; 307*5798Srrh break; 308*5798Srrh } 309*5798Srrh goto ret; 310*5798Srrh 311*5798Srrh case DIG: 312*5798Srrh /* 313*5798Srrh * Implement call by reference on a reg variable 314*5798Srrh */ 315*5798Srrh cp = inbufptr; 316*5798Srrh val = number(ch, &cp); 317*5798Srrh /* 318*5798Srrh * yylval or yybignum has been stuffed as a side 319*5798Srrh * effect to number(); get the global yylval 320*5798Srrh * into our fast local copy in case it was an INT. 321*5798Srrh */ 322*5798Srrh ryylval = yylval; 323*5798Srrh inbufptr = cp; 324*5798Srrh goto ret; 325*5798Srrh 326*5798Srrh case LSH: 327*5798Srrh case RSH: 328*5798Srrh /* 329*5798Srrh * We allow the C style operators 330*5798Srrh * << and >>, as well as < and > 331*5798Srrh */ 332*5798Srrh if ( (ch1 = getchar()) != ch) 333*5798Srrh ungetc(ch1); 334*5798Srrh val = ryylval; 335*5798Srrh goto ret; 336*5798Srrh 337*5798Srrh case MINUS: 338*5798Srrh if ( (ch = getchar()) =='(') 339*5798Srrh ryylval=val=MP; 340*5798Srrh else { 341*5798Srrh ungetc(ch); 342*5798Srrh val=MINUS; 343*5798Srrh } 344*5798Srrh goto ret; 345*5798Srrh 346*5798Srrh case SQ: 347*5798Srrh if ((ryylval = getchar()) == '\n') 348*5798Srrh scanlineno++; /*not entirely correct*/ 349*5798Srrh val = INT; 350*5798Srrh goto ret; 351*5798Srrh 352*5798Srrh case DQ: 353*5798Srrh eatstr: 354*5798Srrh linescrossed = 0; 355*5798Srrh maxstrlg = (char *)bufub - (char *)bufptr; 356*5798Srrh 357*5798Srrh if (maxstrlg < MAXSTRLG) { 358*5798Srrh ungetc('"'); 359*5798Srrh *(bytetoktype *)bufptr = VOID ; 360*5798Srrh bufub = bufptr; 361*5798Srrh goto done; 362*5798Srrh } 363*5798Srrh if (maxstrlg > MAXSTRLG) 364*5798Srrh maxstrlg = MAXSTRLG; 365*5798Srrh 366*5798Srrh ptoken(bufptr, STRING); 367*5798Srrh lgbackpatch = bufptr; /*this is where the size goes*/ 368*5798Srrh bufptr += sizeof(lgtype); 369*5798Srrh /* 370*5798Srrh * bufptr is now set to 371*5798Srrh * be stuffed with characters from 372*5798Srrh * the input 373*5798Srrh */ 374*5798Srrh 375*5798Srrh while ( (maxstrlg > 0) 376*5798Srrh && !(INCHARSET( (ch = getchar()), STRESCAPE)) 377*5798Srrh ){ 378*5798Srrh stuff: 379*5798Srrh maxstrlg-= 1; 380*5798Srrh pchar(bufptr, ch); 381*5798Srrh } 382*5798Srrh if (maxstrlg <= 0){ /*enough characters to fill a string buffer*/ 383*5798Srrh ungetc('"'); /*will read it next*/ 384*5798Srrh } 385*5798Srrh else if (ch == '"'); /*done*/ 386*5798Srrh else if (ch == '\n'){ 387*5798Srrh yywarning("New line embedded in a string constant."); 388*5798Srrh scanlineno++; 389*5798Srrh linescrossed++; 390*5798Srrh ch = getchar(); 391*5798Srrh if (ch == EOFCHAR){ 392*5798Srrh do_eof: 393*5798Srrh pchar(bufptr, '\n'); 394*5798Srrh ungetc(EOFCHAR); 395*5798Srrh } else 396*5798Srrh if (ch == NEEDCHAR){ 397*5798Srrh if ( (inbufptr = fillinbuffer()) == 0) 398*5798Srrh goto do_eof; 399*5798Srrh ch = '\n'; 400*5798Srrh goto stuff; 401*5798Srrh } else { /* simple case */ 402*5798Srrh ungetc(ch); 403*5798Srrh ch = '\n'; 404*5798Srrh goto stuff; 405*5798Srrh } 406*5798Srrh } else { 407*5798Srrh ch = getchar(); /*skip the '\\'*/ 408*5798Srrh if ( INCHARSET(ch, BSESCAPE)){ 409*5798Srrh switch (ch){ 410*5798Srrh case 'b': ch = '\b'; goto stuff; 411*5798Srrh case 'f': ch = '\f'; goto stuff; 412*5798Srrh case 'n': ch = '\n'; goto stuff; 413*5798Srrh case 'r': ch = '\r'; goto stuff; 414*5798Srrh case 't': ch = '\t'; goto stuff; 415*5798Srrh } 416*5798Srrh } 417*5798Srrh if ( !(INCHARSET(ch,OCTDIGIT)) ) goto stuff; 418*5798Srrh i = 0; 419*5798Srrh intval = 0; 420*5798Srrh while ( (i < 3) && (INCHARSET(ch, OCTDIGIT))){ 421*5798Srrh i++;intval <<= 3;intval += ch - '0'; 422*5798Srrh ch = getchar(); 423*5798Srrh } 424*5798Srrh ungetc(ch); 425*5798Srrh val = (char)intval; 426*5798Srrh goto stuff; 427*5798Srrh } 428*5798Srrh /* 429*5798Srrh * bufptr now points at the next free slot 430*5798Srrh */ 431*5798Srrh bstrfromto(lgbackpatch, bufptr); 432*5798Srrh if (linescrossed){ 433*5798Srrh val = ILINESKIP; 434*5798Srrh ryylval = linescrossed; 435*5798Srrh goto ret; 436*5798Srrh } else 437*5798Srrh goto builtval; 438*5798Srrh 439*5798Srrh case BADCHAR: 440*5798Srrh linescrossed = lineno; 441*5798Srrh lineno = scanlineno; 442*5798Srrh yyerror("Illegal character mapped: %d, char read:(octal) %o", 443*5798Srrh ryylval, ch); 444*5798Srrh lineno = linescrossed; 445*5798Srrh val = BADCHAR; 446*5798Srrh goto ret; 447*5798Srrh 448*5798Srrh default: 449*5798Srrh val = ryylval; 450*5798Srrh goto ret; 451*5798Srrh } /*end of the switch*/ 452*5798Srrh /* 453*5798Srrh * here with one token, so stuff it 454*5798Srrh */ 455*5798Srrh ret: 456*5798Srrh oval = val; 457*5798Srrh ptoken(bufptr, val); 458*5798Srrh switch(val){ 459*5798Srrh case ILINESKIP: 460*5798Srrh pint(bufptr, ryylval); 461*5798Srrh break; 462*5798Srrh case SIZESPEC: 463*5798Srrh pchar(bufptr, ryylval); 464*5798Srrh break; 465*5798Srrh case BFINT: plong(bufptr, ryylval); 466*5798Srrh break; 467*5798Srrh case INT: plong(bufptr, ryylval); 468*5798Srrh break; 469*5798Srrh case BIGNUM: pnumber(bufptr, yybignum); 470*5798Srrh break; 471*5798Srrh case NAME: pptr(bufptr, (int)(struct symtab *)ryylval); 472*5798Srrh break; 473*5798Srrh case REG: pchar(bufptr, ryylval); 474*5798Srrh break; 475*5798Srrh case INST0: 476*5798Srrh case INSTn: 477*5798Srrh popcode(bufptr, opstruct); 478*5798Srrh break; 479*5798Srrh case IJXXX: 480*5798Srrh popcode(bufptr, opstruct); 481*5798Srrh pptr(bufptr, (int)(struct symtab *)symalloc()); 482*5798Srrh break; 483*5798Srrh case ISTAB: 484*5798Srrh case ISTABSTR: 485*5798Srrh case ISTABNONE: 486*5798Srrh case ISTABDOT: 487*5798Srrh case IALIGN: 488*5798Srrh pptr(bufptr, (int)(struct symtab *)symalloc()); 489*5798Srrh break; 490*5798Srrh /* 491*5798Srrh * default: 492*5798Srrh */ 493*5798Srrh } 494*5798Srrh builtval: ; 495*5798Srrh } /*end of the while to stuff the buffer*/ 496*5798Srrh done: 497*5798Srrh bufferbox->tok_count = (bytetoktype *)bufptr - &(bufferbox->toks[0]); 498*5798Srrh 499*5798Srrh /* 500*5798Srrh * This is a real kludge: 501*5798Srrh * 502*5798Srrh * We put the last token in the buffer to be a MINUS 503*5798Srrh * symbol. This last token will never be picked up 504*5798Srrh * in the normal way, but can be looked at during 505*5798Srrh * a peekahead look that the short circuit expression 506*5798Srrh * evaluator uses to see if an expression is complicated. 507*5798Srrh * 508*5798Srrh * Consider the following situation: 509*5798Srrh * 510*5798Srrh * .word 45 + 47 511*5798Srrh * buffer 1 | buffer 0 512*5798Srrh * the peekahead would want to look across the buffer, 513*5798Srrh * but will look in the buffer end zone, see the minus, and 514*5798Srrh * fail. 515*5798Srrh */ 516*5798Srrh ptoken(bufptr, MINUS); 517*5798Srrh InBufPtr = inbufptr; /*copy this back*/ 518*5798Srrh } 519