15798Srrh /* 25798Srrh * Copyright (c) 1982 Regents of the University of California 35798Srrh */ 45798Srrh #ifndef lint 5*13448Srrh static char sccsid[] = "@(#)asscan2.c 4.7 06/29/83"; 65798Srrh #endif not lint 75798Srrh 85798Srrh #include "asscanl.h" 95798Srrh static inttoktype oval = NL; 105798Srrh 11*13448Srrh #define ASINBUFSIZ 4096 125798Srrh #define NINBUFFERS 2 13*13448Srrh #define INBUFLG NINBUFFERS*ASINBUFSIZ + 2 145798Srrh /* 15*13448Srrh * We have NINBUFFERS input buffers; the first one is reserved 165798Srrh * for catching the tail of a line split across a buffer 17*13448Srrh * boundary; the other ones are used for snarfing a buffer 18*13448Srrh * worth of assembly language source. 195798Srrh */ 205798Srrh static char inbuffer[INBUFLG]; 215798Srrh static char *InBufPtr = 0; 225798Srrh 235798Srrh /* 245798Srrh * fill the inbuffer from the standard input. 255798Srrh * Assert: there are always n COMPLETE! lines in the buffer area. 265798Srrh * Assert: there is always a \n terminating the last line 275798Srrh * in the buffer area. 285798Srrh * Assert: after the \n, there is an EOFCHAR (hard end of file) 295798Srrh * or a NEEDCHAR (end of buffer) 305798Srrh * Assert: fgets always null pads the string it reads. 315798Srrh * Assert: no ungetc's are done at the end of a line or at the 325798Srrh * beginning of a line. 335798Srrh * 345798Srrh * We read a complete buffer of characters in one single read. 355798Srrh * We then back scan within this buffer to find the end of the 365798Srrh * last complete line, and force the assertions, and save a pointer 375798Srrh * to the incomplete line. 385798Srrh * The next call to fillinbuffer will move the unread characters 395798Srrh * to the end of the first buffer, and then read another two buffers, 405798Srrh * completing the cycle. 415798Srrh */ 425798Srrh 435798Srrh static char p_swapped = '\0'; 44*13448Srrh static char *p_start = &inbuffer[NINBUFFERS * ASINBUFSIZ]; 45*13448Srrh static char *p_stop = &inbuffer[NINBUFFERS * ASINBUFSIZ]; 466558Srrh 47*13448Srrh #define MIDDLE &inbuffer[ASINBUFSIZ] 48*13448Srrh 495798Srrh char *fillinbuffer() 505798Srrh { 515798Srrh register char *from; 525798Srrh char *inbufptr; 535798Srrh int nread; 546558Srrh static int hadeof; 556558Srrh int goal; 566558Srrh int got; 575798Srrh 585798Srrh *p_start = p_swapped; 59*13448Srrh inbufptr = MIDDLE - (p_stop - p_start); 60*13448Srrh movestr(inbufptr, p_start, p_stop - p_start); 615798Srrh /* 62*13448Srrh * Now, go read up to NINBUFFERS - 1 full buffers 635798Srrh */ 646558Srrh if (hadeof){ 656558Srrh hadeof = 0; 666558Srrh return (0); 676558Srrh } 68*13448Srrh goal = (NINBUFFERS - 1)*ASINBUFSIZ; 696558Srrh nread = 0; 706558Srrh do { 71*13448Srrh got = read(stdin->_file, MIDDLE + nread, goal); 726558Srrh if (got == 0) 736558Srrh hadeof = 1; 746558Srrh if (got <= 0) 756558Srrh break; 766558Srrh nread += got; 776558Srrh goal -= got; 786558Srrh } while (goal); 796558Srrh 805798Srrh if (nread == 0) 815798Srrh return(0); 82*13448Srrh from = MIDDLE + nread; 83*13448Srrh p_stop = from; 845798Srrh *from = '\0'; 856558Srrh while (*--from != '\n'){ 866558Srrh /* 876558Srrh * back over the partial line 886558Srrh */ 89*13448Srrh if (from == MIDDLE) { 906558Srrh from = p_stop; 916558Srrh *p_stop++ = '\n'; 926558Srrh break; 936558Srrh } else { 946558Srrh continue; 956558Srrh } 966558Srrh } 976558Srrh 985798Srrh from++; /* first char of partial line */ 995798Srrh p_start = from; 1005798Srrh p_swapped = *p_start; 1015798Srrh *p_start = NEEDCHAR; /* force assertion */ 1025798Srrh return(inbufptr); 1035798Srrh } 1045798Srrh 1055798Srrh scan_dot_s(bufferbox) 1065798Srrh struct tokbufdesc *bufferbox; 1075798Srrh { 1085798Srrh reg int ryylval; /* local copy of lexical value */ 1095798Srrh extern int yylval; /* global copy of lexical value */ 1105798Srrh reg int val; /* the value returned */ 1115798Srrh int i; /* simple counter */ 1125798Srrh reg char *rcp; 1135798Srrh char *cp; /* can have address taken */ 1145798Srrh reg int ch; /* treated as a character */ 1155798Srrh int ch1; /* shadow value */ 1165798Srrh reg char *inbufptr; 1175798Srrh struct symtab *op; 1185798Srrh 1195798Srrh reg ptrall bufptr; /* where to stuff tokens */ 1205798Srrh ptrall lgbackpatch; /* where to stuff a string length */ 1215798Srrh ptrall bufub; /* where not to stuff tokens */ 122*13448Srrh reg int maxstrlg; /* how long a string can be */ 1235798Srrh long intval; /* value of int */ 1245798Srrh int linescrossed; /* when doing strings and comments */ 1255798Srrh struct Opcode opstruct; 1265798Srrh 1275798Srrh (bytetoktype *)bufptr = (bytetoktype *) & (bufferbox->toks[0]); 1285798Srrh (bytetoktype *)bufub = &(bufferbox->toks[AVAILTOKS]); 1295798Srrh 1305798Srrh inbufptr = InBufPtr; 1315798Srrh if (inbufptr == 0){ 1325798Srrh inbufptr = fillinbuffer(); 1335798Srrh if (inbufptr == 0){ /*end of file*/ 1345798Srrh endoffile: 1355798Srrh inbufptr = 0; 1365798Srrh ptoken(bufptr, PARSEEOF); 1375798Srrh goto done; 1385798Srrh } 1395798Srrh } 1405798Srrh 1415798Srrh if (newfflag){ 142*13448Srrh newfflag = 0; 143*13448Srrh ryylval = (int)savestr(newfname, strlen(newfname) + 1); 144*13448Srrh 1455798Srrh ptoken(bufptr, IFILE); 1465798Srrh ptoken(bufptr, STRING); 147*13448Srrh pptr(bufptr, ryylval); 1485798Srrh 1495798Srrh ptoken(bufptr, ILINENO); 1505798Srrh ptoken(bufptr, INT); 1515798Srrh pint(bufptr, 1); 1525798Srrh } 1535798Srrh 1545798Srrh while (bufptr < bufub){ 1555798Srrh loop: 1565798Srrh switch(ryylval = (type+2)[ch = getchar()]) { 1575798Srrh case SCANEOF: 1585798Srrh inbufptr = 0; 1595798Srrh goto endoffile; 1605798Srrh 1615798Srrh case NEEDSBUF: 1625798Srrh inbufptr = fillinbuffer(); 1635798Srrh if (inbufptr == 0) 1645798Srrh goto endoffile; 1655798Srrh goto loop; 1665798Srrh 1675798Srrh case DIV: /*process C style comments*/ 1685798Srrh if ( (ch = getchar()) == '*') { /*comment prelude*/ 1695798Srrh int incomment; 1705798Srrh linescrossed = 0; 1715798Srrh incomment = 1; 1725798Srrh ch = getchar(); /*skip over the * */ 1735798Srrh while(incomment){ 1745798Srrh switch(ch){ 1755798Srrh case '*': 1765798Srrh ch = getchar(); 1775798Srrh incomment = (ch != '/'); 1785798Srrh break; 1795798Srrh case '\n': 1805798Srrh scanlineno++; 1815798Srrh linescrossed++; 1825798Srrh ch = getchar(); 1835798Srrh break; 1845798Srrh case EOFCHAR: 1855798Srrh goto endoffile; 1865798Srrh case NEEDCHAR: 1875798Srrh inbufptr = fillinbuffer(); 1885798Srrh if (inbufptr == 0) 1895798Srrh goto endoffile; 1905798Srrh lineno++; 1915798Srrh ch = getchar(); 1925798Srrh break; 1935798Srrh default: 1945798Srrh ch = getchar(); 1955798Srrh break; 1965798Srrh } 1975798Srrh } 1985798Srrh val = ILINESKIP; 1995798Srrh ryylval = linescrossed; 2005798Srrh goto ret; 2015798Srrh } else { /*just an ordinary DIV*/ 2025798Srrh ungetc(ch); 2035798Srrh val = ryylval = DIV; 2045798Srrh goto ret; 2055798Srrh } 2065798Srrh case SH: 2075798Srrh if (oval == NL){ 2085798Srrh /* 2095798Srrh * Attempt to recognize a C preprocessor 2105798Srrh * style comment '^#[ \t]*[0-9]*[ \t]*".*" 2115798Srrh */ 2125798Srrh ch = getchar(); /*bump the #*/ 2135798Srrh while (INCHARSET(ch, SPACE)) 2145798Srrh ch = getchar();/*bump white */ 2155798Srrh if (INCHARSET(ch, DIGIT)){ 2165798Srrh intval = 0; 2175798Srrh while(INCHARSET(ch, DIGIT)){ 2185798Srrh intval = intval*10 + ch - '0'; 2195798Srrh ch = getchar(); 2205798Srrh } 2215798Srrh while (INCHARSET(ch, SPACE)) 2225798Srrh ch = getchar(); 2235798Srrh if (ch == '"'){ 2245798Srrh ptoken(bufptr, ILINENO); 2255798Srrh ptoken(bufptr, INT); 2265798Srrh pint(bufptr, intval - 1); 2275798Srrh ptoken(bufptr, IFILE); 2285798Srrh /* 2295798Srrh * The '"' has already been 2305798Srrh * munched 2315798Srrh * 2325798Srrh * eatstr will not eat 2335798Srrh * the trailing \n, so 2345798Srrh * it is given to the parser 2355798Srrh * and counted. 2365798Srrh */ 2375798Srrh goto eatstr; 2385798Srrh } 2395798Srrh } 2405798Srrh } 2415798Srrh /* 2425798Srrh * Well, its just an ordinary decadent comment 2435798Srrh */ 2445798Srrh while ((ch != '\n') && (ch != EOFCHAR)) 2455798Srrh ch = getchar(); 2465798Srrh if (ch == EOFCHAR) 2475798Srrh goto endoffile; 2485798Srrh val = ryylval = oval = NL; 2495798Srrh scanlineno++; 2505798Srrh goto ret; 2515798Srrh 2525798Srrh case NL: 2535798Srrh scanlineno++; 2545798Srrh val = ryylval; 2555798Srrh goto ret; 2565798Srrh 2575798Srrh case SP: 2585798Srrh oval = SP; /*invalidate ^# meta comments*/ 2595798Srrh goto loop; 2605798Srrh 2615798Srrh case REGOP: /* % , could be used as modulo, or register*/ 2625798Srrh ch = getchar(); 2635798Srrh if (INCHARSET(ch, DIGIT)){ 2645798Srrh ryylval = ch-'0'; 2655798Srrh if (ch=='1') { 2665798Srrh if (INCHARSET( (ch = getchar()), REGDIGIT)) 2675798Srrh ryylval = 10+ch-'0'; 2685798Srrh else 2695798Srrh ungetc(ch); 2705798Srrh } 2715798Srrh /* 2725798Srrh * God only knows what the original author 2735798Srrh * wanted this undocumented feature to 2745798Srrh * do. 2755798Srrh * %5++ is really r7 2765798Srrh */ 2775798Srrh while(INCHARSET( (ch = getchar()), SIGN)) { 2785798Srrh if (ch=='+') 2795798Srrh ryylval++; 2805798Srrh else 2815798Srrh ryylval--; 2825798Srrh } 2835798Srrh ungetc(ch); 2845798Srrh val = REG; 2855798Srrh } else { 2865798Srrh ungetc(ch); 2875798Srrh val = REGOP; 2885798Srrh } 2895798Srrh goto ret; 2905798Srrh 2915798Srrh case ALPH: 2925798Srrh ch1 = ch; 2935798Srrh if (INCHARSET(ch, SZSPECBEGIN)){ 2945798Srrh if( (ch = getchar()) == '`' || ch == '^'){ 2955798Srrh ch1 |= 0100; /*convert to lower*/ 2965798Srrh switch(ch1){ 2975798Srrh case 'b': ryylval = 1; break; 2985798Srrh case 'w': ryylval = 2; break; 2995798Srrh case 'l': ryylval = 4; break; 3005798Srrh default: ryylval = d124; break; 3015798Srrh } 3025798Srrh val = SIZESPEC; 3035798Srrh goto ret; 3045798Srrh } else { 3055798Srrh ungetc(ch); 3065798Srrh ch = ch1; /*restore first character*/ 3075798Srrh } 3085798Srrh } 3095798Srrh rcp = yytext; 3105798Srrh do { 3115798Srrh if (rcp < &yytext[NCPS]) 3125798Srrh *rcp++ = ch; 3135798Srrh } while (INCHARSET ( (ch = getchar()), ALPHA | DIGIT)); 3145798Srrh *rcp = '\0'; 3155798Srrh while (INCHARSET(ch, SPACE)) 3165798Srrh ch = getchar(); 3175798Srrh ungetc(ch); 3185798Srrh 3195798Srrh switch((op = *lookup(1))->s_tag){ 3205798Srrh case 0: 3215798Srrh case LABELID: 3225798Srrh /* 3235798Srrh * Its a name... (Labels are subsets ofname) 3245798Srrh */ 3255798Srrh ryylval = (int)op; 3265798Srrh val = NAME; 3275798Srrh break; 3285798Srrh case INST0: 3295798Srrh case INSTn: 3305798Srrh case IJXXX: 3315798Srrh opstruct.Op_popcode = ( (struct instab *)op)->i_popcode; 3325798Srrh opstruct.Op_eopcode = ( (struct instab *)op)->i_eopcode; 3335798Srrh val = op->s_tag; 3345798Srrh break; 3355798Srrh default: 3365798Srrh ryylval = ( (struct instab *)op)->i_popcode; 3375798Srrh val = op->s_tag; 3385798Srrh break; 3395798Srrh } 3405798Srrh goto ret; 3415798Srrh 3425798Srrh case DIG: 3435798Srrh /* 3445798Srrh * Implement call by reference on a reg variable 3455798Srrh */ 3465798Srrh cp = inbufptr; 3475798Srrh val = number(ch, &cp); 3485798Srrh /* 3495798Srrh * yylval or yybignum has been stuffed as a side 3505798Srrh * effect to number(); get the global yylval 3515798Srrh * into our fast local copy in case it was an INT. 3525798Srrh */ 3535798Srrh ryylval = yylval; 3545798Srrh inbufptr = cp; 3555798Srrh goto ret; 3565798Srrh 3575798Srrh case LSH: 3585798Srrh case RSH: 3595798Srrh /* 3605798Srrh * We allow the C style operators 3615798Srrh * << and >>, as well as < and > 3625798Srrh */ 3635798Srrh if ( (ch1 = getchar()) != ch) 3645798Srrh ungetc(ch1); 3655798Srrh val = ryylval; 3665798Srrh goto ret; 3675798Srrh 3685798Srrh case MINUS: 3695798Srrh if ( (ch = getchar()) =='(') 3705798Srrh ryylval=val=MP; 3715798Srrh else { 3725798Srrh ungetc(ch); 3735798Srrh val=MINUS; 3745798Srrh } 3755798Srrh goto ret; 3765798Srrh 3775798Srrh case SQ: 3785798Srrh if ((ryylval = getchar()) == '\n') 3795798Srrh scanlineno++; /*not entirely correct*/ 3805798Srrh val = INT; 3815798Srrh goto ret; 3825798Srrh 3835798Srrh case DQ: 3845798Srrh eatstr: 3855798Srrh linescrossed = 0; 386*13448Srrh for(rcp = yytext, maxstrlg = NCPS; maxstrlg > 0; --maxstrlg){ 387*13448Srrh switch(ch = getchar()){ 388*13448Srrh case '"': 389*13448Srrh goto tailDQ; 390*13448Srrh default: 391*13448Srrh stuff: 392*13448Srrh pchar(rcp, ch); 393*13448Srrh break; 394*13448Srrh case '\n': 395*13448Srrh yywarning("New line in a string constant"); 3965798Srrh scanlineno++; 3975798Srrh linescrossed++; 3985798Srrh ch = getchar(); 399*13448Srrh switch(ch){ 400*13448Srrh case NEEDCHAR: 401*13448Srrh if ( (inbufptr = fillinbuffer()) != 0){ 402*13448Srrh ch = '\n'; 403*13448Srrh goto stuff; 404*13448Srrh } 405*13448Srrh /*FALLTHROUGH*/ 406*13448Srrh case EOFCHAR: 407*13448Srrh pchar(rcp, '\n'); 4085798Srrh ungetc(EOFCHAR); 409*13448Srrh goto tailDQ; 410*13448Srrh default: 4115798Srrh ungetc(ch); 4125798Srrh ch = '\n'; 4135798Srrh goto stuff; 4145798Srrh } 415*13448Srrh break; 416*13448Srrh 417*13448Srrh case '\\': 4185798Srrh ch = getchar(); /*skip the '\\'*/ 4195798Srrh if ( INCHARSET(ch, BSESCAPE)){ 4205798Srrh switch (ch){ 4215798Srrh case 'b': ch = '\b'; goto stuff; 4225798Srrh case 'f': ch = '\f'; goto stuff; 4235798Srrh case 'n': ch = '\n'; goto stuff; 4245798Srrh case 'r': ch = '\r'; goto stuff; 4255798Srrh case 't': ch = '\t'; goto stuff; 4265798Srrh } 4275798Srrh } 428*13448Srrh if ( !(INCHARSET(ch, OCTDIGIT)) ) 429*13448Srrh goto stuff; 4305798Srrh i = 0; 4315798Srrh intval = 0; 4325798Srrh while ( (i < 3) && (INCHARSET(ch, OCTDIGIT))){ 433*13448Srrh i++; 434*13448Srrh intval <<= 3; 435*13448Srrh intval += ch - '0'; 4365798Srrh ch = getchar(); 4375798Srrh } 4385798Srrh ungetc(ch); 4396558Srrh ch = (char)intval; 4405798Srrh goto stuff; 441*13448Srrh } 4425798Srrh } 443*13448Srrh tailDQ: ; 4445798Srrh /* 445*13448Srrh * account for any lines that were crossed 4465798Srrh */ 4475798Srrh if (linescrossed){ 448*13448Srrh ptoken(bufptr, ILINESKIP); 449*13448Srrh pint(bufptr, linescrossed); 450*13448Srrh } 451*13448Srrh /* 452*13448Srrh * put the string in yytext into the string pool 453*13448Srrh * 454*13448Srrh * The value in ryylval points to the string; 455*13448Srrh * the previous 2 bytes is the length of the string 456*13448Srrh * 457*13448Srrh * Cheat: append a trailing null to the string 458*13448Srrh * and then adjust the string length to ignore 459*13448Srrh * the trailing null. If any STRING client requires 460*13448Srrh * the trailing null, the client can just change STRLEN 461*13448Srrh */ 462*13448Srrh val = STRING; 463*13448Srrh *rcp++ = 0; 464*13448Srrh ryylval = (int)savestr(yytext, rcp - yytext); 465*13448Srrh STRLEN(((char *)ryylval)) -= 1; 466*13448Srrh goto ret; 4675798Srrh 4685798Srrh case BADCHAR: 4695798Srrh linescrossed = lineno; 4705798Srrh lineno = scanlineno; 4715798Srrh yyerror("Illegal character mapped: %d, char read:(octal) %o", 4725798Srrh ryylval, ch); 4735798Srrh lineno = linescrossed; 4745798Srrh val = BADCHAR; 4755798Srrh goto ret; 4765798Srrh 4775798Srrh default: 4785798Srrh val = ryylval; 4795798Srrh goto ret; 4805798Srrh } /*end of the switch*/ 4815798Srrh /* 4825798Srrh * here with one token, so stuff it 4835798Srrh */ 4845798Srrh ret: 4855798Srrh oval = val; 4865798Srrh ptoken(bufptr, val); 4875798Srrh switch(val){ 4885798Srrh case ILINESKIP: 4895798Srrh pint(bufptr, ryylval); 4905798Srrh break; 4915798Srrh case SIZESPEC: 4925798Srrh pchar(bufptr, ryylval); 4935798Srrh break; 4945798Srrh case BFINT: plong(bufptr, ryylval); 4955798Srrh break; 4965798Srrh case INT: plong(bufptr, ryylval); 4975798Srrh break; 4985798Srrh case BIGNUM: pnumber(bufptr, yybignum); 4995798Srrh break; 500*13448Srrh case STRING: pptr(bufptr, (int)(char *)ryylval); 501*13448Srrh break; 5025798Srrh case NAME: pptr(bufptr, (int)(struct symtab *)ryylval); 5035798Srrh break; 5045798Srrh case REG: pchar(bufptr, ryylval); 5055798Srrh break; 5065798Srrh case INST0: 5075798Srrh case INSTn: 5085798Srrh popcode(bufptr, opstruct); 5095798Srrh break; 5105798Srrh case IJXXX: 5115798Srrh popcode(bufptr, opstruct); 5125798Srrh pptr(bufptr, (int)(struct symtab *)symalloc()); 5135798Srrh break; 5145798Srrh case ISTAB: 5155798Srrh case ISTABSTR: 5165798Srrh case ISTABNONE: 5175798Srrh case ISTABDOT: 5185798Srrh case IALIGN: 5195798Srrh pptr(bufptr, (int)(struct symtab *)symalloc()); 5205798Srrh break; 5215798Srrh /* 5225798Srrh * default: 5235798Srrh */ 5245798Srrh } 5255798Srrh builtval: ; 5265798Srrh } /*end of the while to stuff the buffer*/ 5275798Srrh done: 5285798Srrh bufferbox->tok_count = (bytetoktype *)bufptr - &(bufferbox->toks[0]); 5295798Srrh 5305798Srrh /* 5315798Srrh * This is a real kludge: 5325798Srrh * 5335798Srrh * We put the last token in the buffer to be a MINUS 5345798Srrh * symbol. This last token will never be picked up 5355798Srrh * in the normal way, but can be looked at during 5365798Srrh * a peekahead look that the short circuit expression 5375798Srrh * evaluator uses to see if an expression is complicated. 5385798Srrh * 5395798Srrh * Consider the following situation: 5405798Srrh * 5415798Srrh * .word 45 + 47 5425798Srrh * buffer 1 | buffer 0 5435798Srrh * the peekahead would want to look across the buffer, 5445798Srrh * but will look in the buffer end zone, see the minus, and 5455798Srrh * fail. 5465798Srrh */ 5475798Srrh ptoken(bufptr, MINUS); 5485798Srrh InBufPtr = inbufptr; /*copy this back*/ 5495798Srrh } 550