15798Srrh /* 25798Srrh * Copyright (c) 1982 Regents of the University of California 35798Srrh */ 45798Srrh #ifndef lint 5*13808Srrh static char sccsid[] = "@(#)asscan2.c 4.14 07/06/83"; 65798Srrh #endif not lint 75798Srrh 85798Srrh #include "asscanl.h" 913467Srrh 105798Srrh static inttoktype oval = NL; 1113575Srrh #define ASINBUFSIZ 4096 1213467Srrh char inbufunget[8]; 1313467Srrh char inbuffer[ASINBUFSIZ]; 1413467Srrh char *Ginbufptr = inbuffer; 1513467Srrh int Ginbufcnt = 0; 16*13808Srrh int scannerhadeof; 1713467Srrh 1813467Srrh fillinbuffer() 1913467Srrh { 2013467Srrh int nread; 2113467Srrh int goal; 2213467Srrh int got; 2313467Srrh 2413467Srrh nread = 0; 25*13808Srrh if (scannerhadeof == 0){ 2613467Srrh goal = sizeof(inbuffer); 2713467Srrh do { 2813467Srrh got = read(stdin->_file, inbuffer + nread, goal); 2913467Srrh if (got == 0) 30*13808Srrh scannerhadeof = 1; 3113467Srrh if (got <= 0) 3213467Srrh break; 3313467Srrh nread += got; 3413467Srrh goal -= got; 3513467Srrh } while (goal); 36*13808Srrh } else { 37*13808Srrh scannerhadeof = 0; 3813467Srrh } 395798Srrh /* 4013467Srrh * getchar assumes that Ginbufcnt and Ginbufptr 4113467Srrh * are adjusted as if one character has been removed 4213467Srrh * from the input. 435798Srrh */ 4413467Srrh if (nread == 0){ 4513467Srrh inbuffer[0] = EOFCHAR; 4613467Srrh nread = 1; 4713467Srrh } 4813467Srrh Ginbufcnt = nread - 1; 4913467Srrh Ginbufptr = inbuffer + 1; 5013467Srrh } 515798Srrh 525798Srrh scan_dot_s(bufferbox) 535798Srrh struct tokbufdesc *bufferbox; 545798Srrh { 5513467Srrh reg char *inbufptr; 5613467Srrh reg int inbufcnt; 575798Srrh reg int ryylval; /* local copy of lexical value */ 585798Srrh extern int yylval; /* global copy of lexical value */ 595798Srrh reg int val; /* the value returned */ 605798Srrh int i; /* simple counter */ 615798Srrh reg char *rcp; 6213467Srrh int ch; /* treated as a character */ 635798Srrh int ch1; /* shadow value */ 645798Srrh struct symtab *op; 6513467Srrh ptrall lgbackpatch; /* where to stuff a string length */ 665798Srrh reg ptrall bufptr; /* where to stuff tokens */ 675798Srrh ptrall bufub; /* where not to stuff tokens */ 685798Srrh long intval; /* value of int */ 695798Srrh int linescrossed; /* when doing strings and comments */ 705798Srrh struct Opcode opstruct; 7113573Srrh reg int strlg; /* the length of a string */ 725798Srrh 735798Srrh (bytetoktype *)bufptr = (bytetoktype *) & (bufferbox->toks[0]); 745798Srrh (bytetoktype *)bufub = &(bufferbox->toks[AVAILTOKS]); 755798Srrh 7613467Srrh MEMTOREGBUF; 775798Srrh if (newfflag){ 7813448Srrh newfflag = 0; 7913573Srrh ryylval = (int)savestr(newfname, strlen(newfname)+1, STR_BOTH); 8013448Srrh 815798Srrh ptoken(bufptr, IFILE); 825798Srrh ptoken(bufptr, STRING); 8313448Srrh pptr(bufptr, ryylval); 845798Srrh 855798Srrh ptoken(bufptr, ILINENO); 865798Srrh ptoken(bufptr, INT); 875798Srrh pint(bufptr, 1); 885798Srrh } 895798Srrh 905798Srrh while (bufptr < bufub){ 915798Srrh loop: 9213467Srrh switch(ryylval = (type+1)[ch = getchar()]) { 935798Srrh case SCANEOF: 9413467Srrh endoffile: ; 955798Srrh inbufptr = 0; 9613467Srrh ptoken(bufptr, PARSEEOF); 9713467Srrh goto done; 985798Srrh 995798Srrh case DIV: /*process C style comments*/ 1005798Srrh if ( (ch = getchar()) == '*') { /*comment prelude*/ 1015798Srrh int incomment; 1025798Srrh linescrossed = 0; 1035798Srrh incomment = 1; 1045798Srrh ch = getchar(); /*skip over the * */ 1055798Srrh while(incomment){ 1065798Srrh switch(ch){ 1075798Srrh case '*': 1085798Srrh ch = getchar(); 1095798Srrh incomment = (ch != '/'); 1105798Srrh break; 1115798Srrh case '\n': 1125798Srrh scanlineno++; 1135798Srrh linescrossed++; 1145798Srrh ch = getchar(); 1155798Srrh break; 1165798Srrh case EOFCHAR: 1175798Srrh goto endoffile; 1185798Srrh default: 1195798Srrh ch = getchar(); 1205798Srrh break; 1215798Srrh } 1225798Srrh } 1235798Srrh val = ILINESKIP; 1245798Srrh ryylval = linescrossed; 1255798Srrh goto ret; 1265798Srrh } else { /*just an ordinary DIV*/ 1275798Srrh ungetc(ch); 1285798Srrh val = ryylval = DIV; 1295798Srrh goto ret; 1305798Srrh } 1315798Srrh case SH: 1325798Srrh if (oval == NL){ 1335798Srrh /* 1345798Srrh * Attempt to recognize a C preprocessor 1355798Srrh * style comment '^#[ \t]*[0-9]*[ \t]*".*" 1365798Srrh */ 1375798Srrh ch = getchar(); /*bump the #*/ 1385798Srrh while (INCHARSET(ch, SPACE)) 1395798Srrh ch = getchar();/*bump white */ 1405798Srrh if (INCHARSET(ch, DIGIT)){ 1415798Srrh intval = 0; 1425798Srrh while(INCHARSET(ch, DIGIT)){ 1435798Srrh intval = intval*10 + ch - '0'; 1445798Srrh ch = getchar(); 1455798Srrh } 1465798Srrh while (INCHARSET(ch, SPACE)) 1475798Srrh ch = getchar(); 1485798Srrh if (ch == '"'){ 1495798Srrh ptoken(bufptr, ILINENO); 1505798Srrh ptoken(bufptr, INT); 1515798Srrh pint(bufptr, intval - 1); 1525798Srrh ptoken(bufptr, IFILE); 1535798Srrh /* 1545798Srrh * The '"' has already been 1555798Srrh * munched 1565798Srrh * 1575798Srrh * eatstr will not eat 1585798Srrh * the trailing \n, so 1595798Srrh * it is given to the parser 1605798Srrh * and counted. 1615798Srrh */ 1625798Srrh goto eatstr; 1635798Srrh } 1645798Srrh } 1655798Srrh } 1665798Srrh /* 1675798Srrh * Well, its just an ordinary decadent comment 1685798Srrh */ 1695798Srrh while ((ch != '\n') && (ch != EOFCHAR)) 1705798Srrh ch = getchar(); 1715798Srrh if (ch == EOFCHAR) 1725798Srrh goto endoffile; 1735798Srrh val = ryylval = oval = NL; 1745798Srrh scanlineno++; 1755798Srrh goto ret; 1765798Srrh 1775798Srrh case NL: 1785798Srrh scanlineno++; 1795798Srrh val = ryylval; 1805798Srrh goto ret; 1815798Srrh 1825798Srrh case SP: 1835798Srrh oval = SP; /*invalidate ^# meta comments*/ 1845798Srrh goto loop; 1855798Srrh 1865798Srrh case REGOP: /* % , could be used as modulo, or register*/ 1875798Srrh ch = getchar(); 1885798Srrh if (INCHARSET(ch, DIGIT)){ 1895798Srrh ryylval = ch-'0'; 1905798Srrh if (ch=='1') { 1915798Srrh if (INCHARSET( (ch = getchar()), REGDIGIT)) 1925798Srrh ryylval = 10+ch-'0'; 1935798Srrh else 1945798Srrh ungetc(ch); 1955798Srrh } 1965798Srrh /* 1975798Srrh * God only knows what the original author 1985798Srrh * wanted this undocumented feature to 1995798Srrh * do. 2005798Srrh * %5++ is really r7 2015798Srrh */ 2025798Srrh while(INCHARSET( (ch = getchar()), SIGN)) { 2035798Srrh if (ch=='+') 2045798Srrh ryylval++; 2055798Srrh else 2065798Srrh ryylval--; 2075798Srrh } 2085798Srrh ungetc(ch); 2095798Srrh val = REG; 2105798Srrh } else { 2115798Srrh ungetc(ch); 2125798Srrh val = REGOP; 2135798Srrh } 2145798Srrh goto ret; 2155798Srrh 2165798Srrh case ALPH: 2175798Srrh ch1 = ch; 2185798Srrh if (INCHARSET(ch, SZSPECBEGIN)){ 2195798Srrh if( (ch = getchar()) == '`' || ch == '^'){ 2205798Srrh ch1 |= 0100; /*convert to lower*/ 2215798Srrh switch(ch1){ 2225798Srrh case 'b': ryylval = 1; break; 2235798Srrh case 'w': ryylval = 2; break; 2245798Srrh case 'l': ryylval = 4; break; 2255798Srrh default: ryylval = d124; break; 2265798Srrh } 2275798Srrh val = SIZESPEC; 2285798Srrh goto ret; 2295798Srrh } else { 2305798Srrh ungetc(ch); 2315798Srrh ch = ch1; /*restore first character*/ 2325798Srrh } 2335798Srrh } 2345798Srrh rcp = yytext; 2355798Srrh do { 23613462Srrh if (rcp < &yytext[NCPName]) 2375798Srrh *rcp++ = ch; 2385798Srrh } while (INCHARSET ( (ch = getchar()), ALPHA | DIGIT)); 2395798Srrh *rcp = '\0'; 2405798Srrh while (INCHARSET(ch, SPACE)) 2415798Srrh ch = getchar(); 2425798Srrh ungetc(ch); 2435798Srrh 2445798Srrh switch((op = *lookup(1))->s_tag){ 2455798Srrh case 0: 2465798Srrh case LABELID: 2475798Srrh /* 24813516Srrh * Its a name... (Labels are subsets of name) 2495798Srrh */ 2505798Srrh ryylval = (int)op; 2515798Srrh val = NAME; 2525798Srrh break; 2535798Srrh case INST0: 2545798Srrh case INSTn: 2555798Srrh case IJXXX: 2565798Srrh opstruct.Op_popcode = ( (struct instab *)op)->i_popcode; 2575798Srrh opstruct.Op_eopcode = ( (struct instab *)op)->i_eopcode; 2585798Srrh val = op->s_tag; 2595798Srrh break; 2605798Srrh default: 2615798Srrh ryylval = ( (struct instab *)op)->i_popcode; 2625798Srrh val = op->s_tag; 2635798Srrh break; 2645798Srrh } 2655798Srrh goto ret; 2665798Srrh 2675798Srrh case DIG: 2685798Srrh /* 26913467Srrh * restore local inbufptr and inbufcnt 2705798Srrh */ 27113467Srrh REGTOMEMBUF; 27213467Srrh val = number(ch); 27313467Srrh MEMTOREGBUF; 2745798Srrh /* 2755798Srrh * yylval or yybignum has been stuffed as a side 2765798Srrh * effect to number(); get the global yylval 2775798Srrh * into our fast local copy in case it was an INT. 2785798Srrh */ 2795798Srrh ryylval = yylval; 2805798Srrh goto ret; 2815798Srrh 2825798Srrh case LSH: 2835798Srrh case RSH: 2845798Srrh /* 2855798Srrh * We allow the C style operators 2865798Srrh * << and >>, as well as < and > 2875798Srrh */ 2885798Srrh if ( (ch1 = getchar()) != ch) 2895798Srrh ungetc(ch1); 2905798Srrh val = ryylval; 2915798Srrh goto ret; 2925798Srrh 2935798Srrh case MINUS: 2945798Srrh if ( (ch = getchar()) =='(') 2955798Srrh ryylval=val=MP; 2965798Srrh else { 2975798Srrh ungetc(ch); 2985798Srrh val=MINUS; 2995798Srrh } 3005798Srrh goto ret; 3015798Srrh 3025798Srrh case SQ: 3035798Srrh if ((ryylval = getchar()) == '\n') 3045798Srrh scanlineno++; /*not entirely correct*/ 3055798Srrh val = INT; 3065798Srrh goto ret; 3075798Srrh 3085798Srrh case DQ: 3095798Srrh eatstr: 3105798Srrh linescrossed = 0; 31113573Srrh for (strlg = 0; /*VOID*/; strlg++){ 31213448Srrh switch(ch = getchar()){ 31313448Srrh case '"': 31413448Srrh goto tailDQ; 31513448Srrh default: 31613448Srrh stuff: 31713516Srrh putc(ch, strfile); 31813448Srrh break; 31913448Srrh case '\n': 32013448Srrh yywarning("New line in a string constant"); 3215798Srrh scanlineno++; 3225798Srrh linescrossed++; 3235798Srrh ch = getchar(); 32413448Srrh switch(ch){ 32513448Srrh case EOFCHAR: 32613516Srrh putc('\n', strfile); 3275798Srrh ungetc(EOFCHAR); 32813448Srrh goto tailDQ; 32913448Srrh default: 3305798Srrh ungetc(ch); 3315798Srrh ch = '\n'; 3325798Srrh goto stuff; 3335798Srrh } 33413448Srrh break; 33513448Srrh 33613448Srrh case '\\': 3375798Srrh ch = getchar(); /*skip the '\\'*/ 3385798Srrh if ( INCHARSET(ch, BSESCAPE)){ 3395798Srrh switch (ch){ 3405798Srrh case 'b': ch = '\b'; goto stuff; 3415798Srrh case 'f': ch = '\f'; goto stuff; 3425798Srrh case 'n': ch = '\n'; goto stuff; 3435798Srrh case 'r': ch = '\r'; goto stuff; 3445798Srrh case 't': ch = '\t'; goto stuff; 3455798Srrh } 3465798Srrh } 34713448Srrh if ( !(INCHARSET(ch, OCTDIGIT)) ) 34813448Srrh goto stuff; 3495798Srrh i = 0; 3505798Srrh intval = 0; 3515798Srrh while ( (i < 3) && (INCHARSET(ch, OCTDIGIT))){ 35213448Srrh i++; 35313448Srrh intval <<= 3; 35413448Srrh intval += ch - '0'; 3555798Srrh ch = getchar(); 3565798Srrh } 3575798Srrh ungetc(ch); 3586558Srrh ch = (char)intval; 3595798Srrh goto stuff; 36013448Srrh } 3615798Srrh } 36213448Srrh tailDQ: ; 3635798Srrh /* 36413448Srrh * account for any lines that were crossed 3655798Srrh */ 3665798Srrh if (linescrossed){ 36713448Srrh ptoken(bufptr, ILINESKIP); 36813448Srrh pint(bufptr, linescrossed); 36913448Srrh } 37013448Srrh /* 37113448Srrh * Cheat: append a trailing null to the string 37213448Srrh * and then adjust the string length to ignore 37313448Srrh * the trailing null. If any STRING client requires 37413448Srrh * the trailing null, the client can just change STRLEN 37513448Srrh */ 37613573Srrh putc(0, strfile); 37713573Srrh ryylval = (int)savestr((char *)0, strlg + 1, STR_FILE); 37813448Srrh val = STRING; 37913516Srrh ((struct strdesc *)ryylval)->sd_strlen -= 1; 38013448Srrh goto ret; 3815798Srrh 3825798Srrh case BADCHAR: 3835798Srrh linescrossed = lineno; 3845798Srrh lineno = scanlineno; 3855798Srrh yyerror("Illegal character mapped: %d, char read:(octal) %o", 3865798Srrh ryylval, ch); 3875798Srrh lineno = linescrossed; 3885798Srrh val = BADCHAR; 3895798Srrh goto ret; 3905798Srrh 3915798Srrh default: 3925798Srrh val = ryylval; 3935798Srrh goto ret; 3945798Srrh } /*end of the switch*/ 3955798Srrh /* 3965798Srrh * here with one token, so stuff it 3975798Srrh */ 3985798Srrh ret: 3995798Srrh oval = val; 4005798Srrh ptoken(bufptr, val); 4015798Srrh switch(val){ 4025798Srrh case ILINESKIP: 4035798Srrh pint(bufptr, ryylval); 4045798Srrh break; 4055798Srrh case SIZESPEC: 4065798Srrh pchar(bufptr, ryylval); 4075798Srrh break; 4085798Srrh case BFINT: plong(bufptr, ryylval); 4095798Srrh break; 4105798Srrh case INT: plong(bufptr, ryylval); 4115798Srrh break; 4125798Srrh case BIGNUM: pnumber(bufptr, yybignum); 4135798Srrh break; 41413448Srrh case STRING: pptr(bufptr, (int)(char *)ryylval); 41513448Srrh break; 4165798Srrh case NAME: pptr(bufptr, (int)(struct symtab *)ryylval); 4175798Srrh break; 4185798Srrh case REG: pchar(bufptr, ryylval); 4195798Srrh break; 4205798Srrh case INST0: 4215798Srrh case INSTn: 4225798Srrh popcode(bufptr, opstruct); 4235798Srrh break; 4245798Srrh case IJXXX: 4255798Srrh popcode(bufptr, opstruct); 4265798Srrh pptr(bufptr, (int)(struct symtab *)symalloc()); 4275798Srrh break; 4285798Srrh case ISTAB: 4295798Srrh case ISTABSTR: 4305798Srrh case ISTABNONE: 4315798Srrh case ISTABDOT: 4325798Srrh case IALIGN: 4335798Srrh pptr(bufptr, (int)(struct symtab *)symalloc()); 4345798Srrh break; 4355798Srrh /* 4365798Srrh * default: 4375798Srrh */ 4385798Srrh } 4395798Srrh builtval: ; 4405798Srrh } /*end of the while to stuff the buffer*/ 4415798Srrh done: 4425798Srrh bufferbox->tok_count = (bytetoktype *)bufptr - &(bufferbox->toks[0]); 4435798Srrh /* 4445798Srrh * This is a real kludge: 4455798Srrh * 4465798Srrh * We put the last token in the buffer to be a MINUS 4475798Srrh * symbol. This last token will never be picked up 4485798Srrh * in the normal way, but can be looked at during 4495798Srrh * a peekahead look that the short circuit expression 4505798Srrh * evaluator uses to see if an expression is complicated. 4515798Srrh * 4525798Srrh * Consider the following situation: 4535798Srrh * 4545798Srrh * .word 45 + 47 4555798Srrh * buffer 1 | buffer 0 4565798Srrh * the peekahead would want to look across the buffer, 4575798Srrh * but will look in the buffer end zone, see the minus, and 4585798Srrh * fail. 4595798Srrh */ 4605798Srrh ptoken(bufptr, MINUS); 46113467Srrh REGTOMEMBUF; 4625798Srrh } 463