15798Srrh /* 25798Srrh * Copyright (c) 1982 Regents of the University of California 35798Srrh */ 45798Srrh #ifndef lint 5*13462Srrh static char sccsid[] = "@(#)asscan2.c 4.8 06/30/83"; 65798Srrh #endif not lint 75798Srrh 85798Srrh #include "asscanl.h" 95798Srrh static inttoktype oval = NL; 105798Srrh 115798Srrh #define NINBUFFERS 2 1213448Srrh #define INBUFLG NINBUFFERS*ASINBUFSIZ + 2 135798Srrh /* 1413448Srrh * We have NINBUFFERS input buffers; the first one is reserved 155798Srrh * for catching the tail of a line split across a buffer 1613448Srrh * boundary; the other ones are used for snarfing a buffer 1713448Srrh * worth of assembly language source. 185798Srrh */ 195798Srrh static char inbuffer[INBUFLG]; 205798Srrh static char *InBufPtr = 0; 215798Srrh 22*13462Srrh #ifndef FLEXNAMES 23*13462Srrh char strtext[NCPString + 1]; 24*13462Srrh #else FLEXNAMES 25*13462Srrh # if NCPName < NCPString 26*13462Srrh char strtext[NCPString + 1]; 27*13462Srrh # else 28*13462Srrh #define strtext yytext 29*13462Srrh # endif 30*13462Srrh #endif FLEXNAMES 31*13462Srrh 325798Srrh /* 335798Srrh * fill the inbuffer from the standard input. 345798Srrh * Assert: there are always n COMPLETE! lines in the buffer area. 355798Srrh * Assert: there is always a \n terminating the last line 365798Srrh * in the buffer area. 375798Srrh * Assert: after the \n, there is an EOFCHAR (hard end of file) 385798Srrh * or a NEEDCHAR (end of buffer) 395798Srrh * Assert: fgets always null pads the string it reads. 405798Srrh * Assert: no ungetc's are done at the end of a line or at the 415798Srrh * beginning of a line. 425798Srrh * 435798Srrh * We read a complete buffer of characters in one single read. 445798Srrh * We then back scan within this buffer to find the end of the 455798Srrh * last complete line, and force the assertions, and save a pointer 465798Srrh * to the incomplete line. 475798Srrh * The next call to fillinbuffer will move the unread characters 485798Srrh * to the end of the first buffer, and then read another two buffers, 495798Srrh * completing the cycle. 505798Srrh */ 515798Srrh 525798Srrh static char p_swapped = '\0'; 5313448Srrh static char *p_start = &inbuffer[NINBUFFERS * ASINBUFSIZ]; 5413448Srrh static char *p_stop = &inbuffer[NINBUFFERS * ASINBUFSIZ]; 556558Srrh 5613448Srrh #define MIDDLE &inbuffer[ASINBUFSIZ] 5713448Srrh 585798Srrh char *fillinbuffer() 595798Srrh { 605798Srrh register char *from; 615798Srrh char *inbufptr; 625798Srrh int nread; 636558Srrh static int hadeof; 646558Srrh int goal; 656558Srrh int got; 665798Srrh 675798Srrh *p_start = p_swapped; 6813448Srrh inbufptr = MIDDLE - (p_stop - p_start); 6913448Srrh movestr(inbufptr, p_start, p_stop - p_start); 705798Srrh /* 7113448Srrh * Now, go read up to NINBUFFERS - 1 full buffers 725798Srrh */ 736558Srrh if (hadeof){ 746558Srrh hadeof = 0; 756558Srrh return (0); 766558Srrh } 7713448Srrh goal = (NINBUFFERS - 1)*ASINBUFSIZ; 786558Srrh nread = 0; 796558Srrh do { 8013448Srrh got = read(stdin->_file, MIDDLE + nread, goal); 816558Srrh if (got == 0) 826558Srrh hadeof = 1; 836558Srrh if (got <= 0) 846558Srrh break; 856558Srrh nread += got; 866558Srrh goal -= got; 876558Srrh } while (goal); 886558Srrh 895798Srrh if (nread == 0) 905798Srrh return(0); 9113448Srrh from = MIDDLE + nread; 9213448Srrh p_stop = from; 935798Srrh *from = '\0'; 946558Srrh while (*--from != '\n'){ 956558Srrh /* 966558Srrh * back over the partial line 976558Srrh */ 9813448Srrh if (from == MIDDLE) { 996558Srrh from = p_stop; 1006558Srrh *p_stop++ = '\n'; 1016558Srrh break; 1026558Srrh } else { 1036558Srrh continue; 1046558Srrh } 1056558Srrh } 1066558Srrh 1075798Srrh from++; /* first char of partial line */ 1085798Srrh p_start = from; 1095798Srrh p_swapped = *p_start; 1105798Srrh *p_start = NEEDCHAR; /* force assertion */ 1115798Srrh return(inbufptr); 1125798Srrh } 1135798Srrh 1145798Srrh scan_dot_s(bufferbox) 1155798Srrh struct tokbufdesc *bufferbox; 1165798Srrh { 1175798Srrh reg int ryylval; /* local copy of lexical value */ 1185798Srrh extern int yylval; /* global copy of lexical value */ 1195798Srrh reg int val; /* the value returned */ 1205798Srrh int i; /* simple counter */ 1215798Srrh reg char *rcp; 1225798Srrh char *cp; /* can have address taken */ 1235798Srrh reg int ch; /* treated as a character */ 1245798Srrh int ch1; /* shadow value */ 1255798Srrh reg char *inbufptr; 1265798Srrh struct symtab *op; 1275798Srrh 1285798Srrh reg ptrall bufptr; /* where to stuff tokens */ 1295798Srrh ptrall lgbackpatch; /* where to stuff a string length */ 1305798Srrh ptrall bufub; /* where not to stuff tokens */ 13113448Srrh reg int maxstrlg; /* how long a string can be */ 1325798Srrh long intval; /* value of int */ 1335798Srrh int linescrossed; /* when doing strings and comments */ 1345798Srrh struct Opcode opstruct; 1355798Srrh 1365798Srrh (bytetoktype *)bufptr = (bytetoktype *) & (bufferbox->toks[0]); 1375798Srrh (bytetoktype *)bufub = &(bufferbox->toks[AVAILTOKS]); 1385798Srrh 1395798Srrh inbufptr = InBufPtr; 1405798Srrh if (inbufptr == 0){ 1415798Srrh inbufptr = fillinbuffer(); 1425798Srrh if (inbufptr == 0){ /*end of file*/ 1435798Srrh endoffile: 1445798Srrh inbufptr = 0; 1455798Srrh ptoken(bufptr, PARSEEOF); 1465798Srrh goto done; 1475798Srrh } 1485798Srrh } 1495798Srrh 1505798Srrh if (newfflag){ 15113448Srrh newfflag = 0; 15213448Srrh ryylval = (int)savestr(newfname, strlen(newfname) + 1); 15313448Srrh 1545798Srrh ptoken(bufptr, IFILE); 1555798Srrh ptoken(bufptr, STRING); 15613448Srrh pptr(bufptr, ryylval); 1575798Srrh 1585798Srrh ptoken(bufptr, ILINENO); 1595798Srrh ptoken(bufptr, INT); 1605798Srrh pint(bufptr, 1); 1615798Srrh } 1625798Srrh 1635798Srrh while (bufptr < bufub){ 1645798Srrh loop: 1655798Srrh switch(ryylval = (type+2)[ch = getchar()]) { 1665798Srrh case SCANEOF: 1675798Srrh inbufptr = 0; 1685798Srrh goto endoffile; 1695798Srrh 1705798Srrh case NEEDSBUF: 1715798Srrh inbufptr = fillinbuffer(); 1725798Srrh if (inbufptr == 0) 1735798Srrh goto endoffile; 1745798Srrh goto loop; 1755798Srrh 1765798Srrh case DIV: /*process C style comments*/ 1775798Srrh if ( (ch = getchar()) == '*') { /*comment prelude*/ 1785798Srrh int incomment; 1795798Srrh linescrossed = 0; 1805798Srrh incomment = 1; 1815798Srrh ch = getchar(); /*skip over the * */ 1825798Srrh while(incomment){ 1835798Srrh switch(ch){ 1845798Srrh case '*': 1855798Srrh ch = getchar(); 1865798Srrh incomment = (ch != '/'); 1875798Srrh break; 1885798Srrh case '\n': 1895798Srrh scanlineno++; 1905798Srrh linescrossed++; 1915798Srrh ch = getchar(); 1925798Srrh break; 1935798Srrh case EOFCHAR: 1945798Srrh goto endoffile; 1955798Srrh case NEEDCHAR: 1965798Srrh inbufptr = fillinbuffer(); 1975798Srrh if (inbufptr == 0) 1985798Srrh goto endoffile; 1995798Srrh lineno++; 2005798Srrh ch = getchar(); 2015798Srrh break; 2025798Srrh default: 2035798Srrh ch = getchar(); 2045798Srrh break; 2055798Srrh } 2065798Srrh } 2075798Srrh val = ILINESKIP; 2085798Srrh ryylval = linescrossed; 2095798Srrh goto ret; 2105798Srrh } else { /*just an ordinary DIV*/ 2115798Srrh ungetc(ch); 2125798Srrh val = ryylval = DIV; 2135798Srrh goto ret; 2145798Srrh } 2155798Srrh case SH: 2165798Srrh if (oval == NL){ 2175798Srrh /* 2185798Srrh * Attempt to recognize a C preprocessor 2195798Srrh * style comment '^#[ \t]*[0-9]*[ \t]*".*" 2205798Srrh */ 2215798Srrh ch = getchar(); /*bump the #*/ 2225798Srrh while (INCHARSET(ch, SPACE)) 2235798Srrh ch = getchar();/*bump white */ 2245798Srrh if (INCHARSET(ch, DIGIT)){ 2255798Srrh intval = 0; 2265798Srrh while(INCHARSET(ch, DIGIT)){ 2275798Srrh intval = intval*10 + ch - '0'; 2285798Srrh ch = getchar(); 2295798Srrh } 2305798Srrh while (INCHARSET(ch, SPACE)) 2315798Srrh ch = getchar(); 2325798Srrh if (ch == '"'){ 2335798Srrh ptoken(bufptr, ILINENO); 2345798Srrh ptoken(bufptr, INT); 2355798Srrh pint(bufptr, intval - 1); 2365798Srrh ptoken(bufptr, IFILE); 2375798Srrh /* 2385798Srrh * The '"' has already been 2395798Srrh * munched 2405798Srrh * 2415798Srrh * eatstr will not eat 2425798Srrh * the trailing \n, so 2435798Srrh * it is given to the parser 2445798Srrh * and counted. 2455798Srrh */ 2465798Srrh goto eatstr; 2475798Srrh } 2485798Srrh } 2495798Srrh } 2505798Srrh /* 2515798Srrh * Well, its just an ordinary decadent comment 2525798Srrh */ 2535798Srrh while ((ch != '\n') && (ch != EOFCHAR)) 2545798Srrh ch = getchar(); 2555798Srrh if (ch == EOFCHAR) 2565798Srrh goto endoffile; 2575798Srrh val = ryylval = oval = NL; 2585798Srrh scanlineno++; 2595798Srrh goto ret; 2605798Srrh 2615798Srrh case NL: 2625798Srrh scanlineno++; 2635798Srrh val = ryylval; 2645798Srrh goto ret; 2655798Srrh 2665798Srrh case SP: 2675798Srrh oval = SP; /*invalidate ^# meta comments*/ 2685798Srrh goto loop; 2695798Srrh 2705798Srrh case REGOP: /* % , could be used as modulo, or register*/ 2715798Srrh ch = getchar(); 2725798Srrh if (INCHARSET(ch, DIGIT)){ 2735798Srrh ryylval = ch-'0'; 2745798Srrh if (ch=='1') { 2755798Srrh if (INCHARSET( (ch = getchar()), REGDIGIT)) 2765798Srrh ryylval = 10+ch-'0'; 2775798Srrh else 2785798Srrh ungetc(ch); 2795798Srrh } 2805798Srrh /* 2815798Srrh * God only knows what the original author 2825798Srrh * wanted this undocumented feature to 2835798Srrh * do. 2845798Srrh * %5++ is really r7 2855798Srrh */ 2865798Srrh while(INCHARSET( (ch = getchar()), SIGN)) { 2875798Srrh if (ch=='+') 2885798Srrh ryylval++; 2895798Srrh else 2905798Srrh ryylval--; 2915798Srrh } 2925798Srrh ungetc(ch); 2935798Srrh val = REG; 2945798Srrh } else { 2955798Srrh ungetc(ch); 2965798Srrh val = REGOP; 2975798Srrh } 2985798Srrh goto ret; 2995798Srrh 3005798Srrh case ALPH: 3015798Srrh ch1 = ch; 3025798Srrh if (INCHARSET(ch, SZSPECBEGIN)){ 3035798Srrh if( (ch = getchar()) == '`' || ch == '^'){ 3045798Srrh ch1 |= 0100; /*convert to lower*/ 3055798Srrh switch(ch1){ 3065798Srrh case 'b': ryylval = 1; break; 3075798Srrh case 'w': ryylval = 2; break; 3085798Srrh case 'l': ryylval = 4; break; 3095798Srrh default: ryylval = d124; break; 3105798Srrh } 3115798Srrh val = SIZESPEC; 3125798Srrh goto ret; 3135798Srrh } else { 3145798Srrh ungetc(ch); 3155798Srrh ch = ch1; /*restore first character*/ 3165798Srrh } 3175798Srrh } 3185798Srrh rcp = yytext; 3195798Srrh do { 320*13462Srrh if (rcp < &yytext[NCPName]) 3215798Srrh *rcp++ = ch; 3225798Srrh } while (INCHARSET ( (ch = getchar()), ALPHA | DIGIT)); 3235798Srrh *rcp = '\0'; 3245798Srrh while (INCHARSET(ch, SPACE)) 3255798Srrh ch = getchar(); 3265798Srrh ungetc(ch); 3275798Srrh 3285798Srrh switch((op = *lookup(1))->s_tag){ 3295798Srrh case 0: 3305798Srrh case LABELID: 3315798Srrh /* 3325798Srrh * Its a name... (Labels are subsets ofname) 3335798Srrh */ 3345798Srrh ryylval = (int)op; 3355798Srrh val = NAME; 3365798Srrh break; 3375798Srrh case INST0: 3385798Srrh case INSTn: 3395798Srrh case IJXXX: 3405798Srrh opstruct.Op_popcode = ( (struct instab *)op)->i_popcode; 3415798Srrh opstruct.Op_eopcode = ( (struct instab *)op)->i_eopcode; 3425798Srrh val = op->s_tag; 3435798Srrh break; 3445798Srrh default: 3455798Srrh ryylval = ( (struct instab *)op)->i_popcode; 3465798Srrh val = op->s_tag; 3475798Srrh break; 3485798Srrh } 3495798Srrh goto ret; 3505798Srrh 3515798Srrh case DIG: 3525798Srrh /* 3535798Srrh * Implement call by reference on a reg variable 3545798Srrh */ 3555798Srrh cp = inbufptr; 3565798Srrh val = number(ch, &cp); 3575798Srrh /* 3585798Srrh * yylval or yybignum has been stuffed as a side 3595798Srrh * effect to number(); get the global yylval 3605798Srrh * into our fast local copy in case it was an INT. 3615798Srrh */ 3625798Srrh ryylval = yylval; 3635798Srrh inbufptr = cp; 3645798Srrh goto ret; 3655798Srrh 3665798Srrh case LSH: 3675798Srrh case RSH: 3685798Srrh /* 3695798Srrh * We allow the C style operators 3705798Srrh * << and >>, as well as < and > 3715798Srrh */ 3725798Srrh if ( (ch1 = getchar()) != ch) 3735798Srrh ungetc(ch1); 3745798Srrh val = ryylval; 3755798Srrh goto ret; 3765798Srrh 3775798Srrh case MINUS: 3785798Srrh if ( (ch = getchar()) =='(') 3795798Srrh ryylval=val=MP; 3805798Srrh else { 3815798Srrh ungetc(ch); 3825798Srrh val=MINUS; 3835798Srrh } 3845798Srrh goto ret; 3855798Srrh 3865798Srrh case SQ: 3875798Srrh if ((ryylval = getchar()) == '\n') 3885798Srrh scanlineno++; /*not entirely correct*/ 3895798Srrh val = INT; 3905798Srrh goto ret; 3915798Srrh 3925798Srrh case DQ: 3935798Srrh eatstr: 3945798Srrh linescrossed = 0; 395*13462Srrh for(rcp = strtext, maxstrlg = NCPString; maxstrlg > 0; --maxstrlg){ 39613448Srrh switch(ch = getchar()){ 39713448Srrh case '"': 39813448Srrh goto tailDQ; 39913448Srrh default: 40013448Srrh stuff: 40113448Srrh pchar(rcp, ch); 40213448Srrh break; 40313448Srrh case '\n': 40413448Srrh yywarning("New line in a string constant"); 4055798Srrh scanlineno++; 4065798Srrh linescrossed++; 4075798Srrh ch = getchar(); 40813448Srrh switch(ch){ 40913448Srrh case NEEDCHAR: 41013448Srrh if ( (inbufptr = fillinbuffer()) != 0){ 41113448Srrh ch = '\n'; 41213448Srrh goto stuff; 41313448Srrh } 41413448Srrh /*FALLTHROUGH*/ 41513448Srrh case EOFCHAR: 41613448Srrh pchar(rcp, '\n'); 4175798Srrh ungetc(EOFCHAR); 41813448Srrh goto tailDQ; 41913448Srrh default: 4205798Srrh ungetc(ch); 4215798Srrh ch = '\n'; 4225798Srrh goto stuff; 4235798Srrh } 42413448Srrh break; 42513448Srrh 42613448Srrh case '\\': 4275798Srrh ch = getchar(); /*skip the '\\'*/ 4285798Srrh if ( INCHARSET(ch, BSESCAPE)){ 4295798Srrh switch (ch){ 4305798Srrh case 'b': ch = '\b'; goto stuff; 4315798Srrh case 'f': ch = '\f'; goto stuff; 4325798Srrh case 'n': ch = '\n'; goto stuff; 4335798Srrh case 'r': ch = '\r'; goto stuff; 4345798Srrh case 't': ch = '\t'; goto stuff; 4355798Srrh } 4365798Srrh } 43713448Srrh if ( !(INCHARSET(ch, OCTDIGIT)) ) 43813448Srrh goto stuff; 4395798Srrh i = 0; 4405798Srrh intval = 0; 4415798Srrh while ( (i < 3) && (INCHARSET(ch, OCTDIGIT))){ 44213448Srrh i++; 44313448Srrh intval <<= 3; 44413448Srrh intval += ch - '0'; 4455798Srrh ch = getchar(); 4465798Srrh } 4475798Srrh ungetc(ch); 4486558Srrh ch = (char)intval; 4495798Srrh goto stuff; 45013448Srrh } 4515798Srrh } 45213448Srrh tailDQ: ; 4535798Srrh /* 45413448Srrh * account for any lines that were crossed 4555798Srrh */ 4565798Srrh if (linescrossed){ 45713448Srrh ptoken(bufptr, ILINESKIP); 45813448Srrh pint(bufptr, linescrossed); 45913448Srrh } 46013448Srrh /* 461*13462Srrh * put the string in strtext into the string pool 46213448Srrh * 46313448Srrh * The value in ryylval points to the string; 46413448Srrh * the previous 2 bytes is the length of the string 46513448Srrh * 46613448Srrh * Cheat: append a trailing null to the string 46713448Srrh * and then adjust the string length to ignore 46813448Srrh * the trailing null. If any STRING client requires 46913448Srrh * the trailing null, the client can just change STRLEN 47013448Srrh */ 47113448Srrh val = STRING; 47213448Srrh *rcp++ = 0; 473*13462Srrh ryylval = (int)savestr(strtext, rcp - strtext); 47413448Srrh STRLEN(((char *)ryylval)) -= 1; 47513448Srrh goto ret; 4765798Srrh 4775798Srrh case BADCHAR: 4785798Srrh linescrossed = lineno; 4795798Srrh lineno = scanlineno; 4805798Srrh yyerror("Illegal character mapped: %d, char read:(octal) %o", 4815798Srrh ryylval, ch); 4825798Srrh lineno = linescrossed; 4835798Srrh val = BADCHAR; 4845798Srrh goto ret; 4855798Srrh 4865798Srrh default: 4875798Srrh val = ryylval; 4885798Srrh goto ret; 4895798Srrh } /*end of the switch*/ 4905798Srrh /* 4915798Srrh * here with one token, so stuff it 4925798Srrh */ 4935798Srrh ret: 4945798Srrh oval = val; 4955798Srrh ptoken(bufptr, val); 4965798Srrh switch(val){ 4975798Srrh case ILINESKIP: 4985798Srrh pint(bufptr, ryylval); 4995798Srrh break; 5005798Srrh case SIZESPEC: 5015798Srrh pchar(bufptr, ryylval); 5025798Srrh break; 5035798Srrh case BFINT: plong(bufptr, ryylval); 5045798Srrh break; 5055798Srrh case INT: plong(bufptr, ryylval); 5065798Srrh break; 5075798Srrh case BIGNUM: pnumber(bufptr, yybignum); 5085798Srrh break; 50913448Srrh case STRING: pptr(bufptr, (int)(char *)ryylval); 51013448Srrh break; 5115798Srrh case NAME: pptr(bufptr, (int)(struct symtab *)ryylval); 5125798Srrh break; 5135798Srrh case REG: pchar(bufptr, ryylval); 5145798Srrh break; 5155798Srrh case INST0: 5165798Srrh case INSTn: 5175798Srrh popcode(bufptr, opstruct); 5185798Srrh break; 5195798Srrh case IJXXX: 5205798Srrh popcode(bufptr, opstruct); 5215798Srrh pptr(bufptr, (int)(struct symtab *)symalloc()); 5225798Srrh break; 5235798Srrh case ISTAB: 5245798Srrh case ISTABSTR: 5255798Srrh case ISTABNONE: 5265798Srrh case ISTABDOT: 5275798Srrh case IALIGN: 5285798Srrh pptr(bufptr, (int)(struct symtab *)symalloc()); 5295798Srrh break; 5305798Srrh /* 5315798Srrh * default: 5325798Srrh */ 5335798Srrh } 5345798Srrh builtval: ; 5355798Srrh } /*end of the while to stuff the buffer*/ 5365798Srrh done: 5375798Srrh bufferbox->tok_count = (bytetoktype *)bufptr - &(bufferbox->toks[0]); 5385798Srrh 5395798Srrh /* 5405798Srrh * This is a real kludge: 5415798Srrh * 5425798Srrh * We put the last token in the buffer to be a MINUS 5435798Srrh * symbol. This last token will never be picked up 5445798Srrh * in the normal way, but can be looked at during 5455798Srrh * a peekahead look that the short circuit expression 5465798Srrh * evaluator uses to see if an expression is complicated. 5475798Srrh * 5485798Srrh * Consider the following situation: 5495798Srrh * 5505798Srrh * .word 45 + 47 5515798Srrh * buffer 1 | buffer 0 5525798Srrh * the peekahead would want to look across the buffer, 5535798Srrh * but will look in the buffer end zone, see the minus, and 5545798Srrh * fail. 5555798Srrh */ 5565798Srrh ptoken(bufptr, MINUS); 5575798Srrh InBufPtr = inbufptr; /*copy this back*/ 5585798Srrh } 559