15798Srrh /* 25798Srrh * Copyright (c) 1982 Regents of the University of California 35798Srrh */ 45798Srrh #ifndef lint 5*6558Srrh static char sccsid[] = "@(#)asscan2.c 4.4 04/16/82"; 65798Srrh #endif not lint 75798Srrh 85798Srrh #include "asscanl.h" 95798Srrh static inttoktype oval = NL; 105798Srrh 115798Srrh #define NINBUFFERS 2 125798Srrh #define INBUFLG NINBUFFERS*BUFSIZ + 2 135798Srrh /* 145798Srrh * We have two input buffers; the first one is reserved 155798Srrh * for catching the tail of a line split across a buffer 165798Srrh * boundary; the other one are used for snarfing a buffer 175798Srrh * worth of .s source. 185798Srrh */ 195798Srrh static char inbuffer[INBUFLG]; 205798Srrh static char *InBufPtr = 0; 215798Srrh 225798Srrh /* 235798Srrh * fill the inbuffer from the standard input. 245798Srrh * Assert: there are always n COMPLETE! lines in the buffer area. 255798Srrh * Assert: there is always a \n terminating the last line 265798Srrh * in the buffer area. 275798Srrh * Assert: after the \n, there is an EOFCHAR (hard end of file) 285798Srrh * or a NEEDCHAR (end of buffer) 295798Srrh * Assert: fgets always null pads the string it reads. 305798Srrh * Assert: no ungetc's are done at the end of a line or at the 315798Srrh * beginning of a line. 325798Srrh * 335798Srrh * We read a complete buffer of characters in one single read. 345798Srrh * We then back scan within this buffer to find the end of the 355798Srrh * last complete line, and force the assertions, and save a pointer 365798Srrh * to the incomplete line. 375798Srrh * The next call to fillinbuffer will move the unread characters 385798Srrh * to the end of the first buffer, and then read another two buffers, 395798Srrh * completing the cycle. 405798Srrh */ 415798Srrh 425798Srrh static char p_swapped = '\0'; 435798Srrh static char *p_start = &inbuffer[NINBUFFERS * BUFSIZ]; 445798Srrh static char *p_stop = &inbuffer[NINBUFFERS * BUFSIZ]; 45*6558Srrh 465798Srrh char *fillinbuffer() 475798Srrh { 485798Srrh register char *to; 495798Srrh register char *from; 505798Srrh char *inbufptr; 515798Srrh int nread; 52*6558Srrh static int hadeof; 53*6558Srrh int goal; 54*6558Srrh int got; 555798Srrh 565798Srrh *p_start = p_swapped; 575798Srrh inbufptr = &inbuffer[1*BUFSIZ] - (p_stop - p_start); 585798Srrh 595798Srrh for (to = inbufptr, from = p_start; from < p_stop;) 605798Srrh *to++ = *from++; 615798Srrh /* 625798Srrh * Now, go read two full buffers (hopefully) 635798Srrh */ 64*6558Srrh if (hadeof){ 65*6558Srrh hadeof = 0; 66*6558Srrh return (0); 67*6558Srrh } 68*6558Srrh goal = (NINBUFFERS - 1)*BUFSIZ; 69*6558Srrh nread = 0; 70*6558Srrh do { 71*6558Srrh got = read(stdin->_file, &inbuffer[1*BUFSIZ + nread], goal); 72*6558Srrh if (got == 0) 73*6558Srrh hadeof = 1; 74*6558Srrh if (got <= 0) 75*6558Srrh break; 76*6558Srrh nread += got; 77*6558Srrh goal -= got; 78*6558Srrh } while (goal); 79*6558Srrh 805798Srrh if (nread == 0) 815798Srrh return(0); 825798Srrh p_stop = from = &inbuffer[1*BUFSIZ + nread]; 835798Srrh *from = '\0'; 84*6558Srrh 85*6558Srrh while (*--from != '\n'){ 86*6558Srrh /* 87*6558Srrh * back over the partial line 88*6558Srrh */ 89*6558Srrh if (from == &inbuffer[1*BUFSIZ]) { 90*6558Srrh from = p_stop; 91*6558Srrh *p_stop++ = '\n'; 92*6558Srrh break; 93*6558Srrh } else { 94*6558Srrh continue; 95*6558Srrh } 96*6558Srrh } 97*6558Srrh 985798Srrh from++; /* first char of partial line */ 995798Srrh p_start = from; 1005798Srrh p_swapped = *p_start; 1015798Srrh *p_start = NEEDCHAR; /* force assertion */ 1025798Srrh return(inbufptr); 1035798Srrh } 1045798Srrh 1055798Srrh scan_dot_s(bufferbox) 1065798Srrh struct tokbufdesc *bufferbox; 1075798Srrh { 1085798Srrh reg int ryylval; /* local copy of lexical value */ 1095798Srrh extern int yylval; /* global copy of lexical value */ 1105798Srrh reg int val; /* the value returned */ 1115798Srrh int i; /* simple counter */ 1125798Srrh reg char *rcp; 1135798Srrh char *cp; /* can have address taken */ 1145798Srrh reg int ch; /* treated as a character */ 1155798Srrh int ch1; /* shadow value */ 1165798Srrh reg char *inbufptr; 1175798Srrh struct symtab *op; 1185798Srrh 1195798Srrh reg ptrall bufptr; /* where to stuff tokens */ 1205798Srrh ptrall lgbackpatch; /* where to stuff a string length */ 1215798Srrh ptrall bufub; /* where not to stuff tokens */ 1225798Srrh int maxstrlg; /* how long a string can be */ 1235798Srrh long intval; /* value of int */ 1245798Srrh int linescrossed; /* when doing strings and comments */ 1255798Srrh struct Opcode opstruct; 1265798Srrh 1275798Srrh (bytetoktype *)bufptr = (bytetoktype *) & (bufferbox->toks[0]); 1285798Srrh (bytetoktype *)bufub = &(bufferbox->toks[AVAILTOKS]); 1295798Srrh 1305798Srrh inbufptr = InBufPtr; 1315798Srrh if (inbufptr == 0){ 1325798Srrh inbufptr = fillinbuffer(); 1335798Srrh if (inbufptr == 0){ /*end of file*/ 1345798Srrh endoffile: 1355798Srrh inbufptr = 0; 1365798Srrh ptoken(bufptr, PARSEEOF); 1375798Srrh goto done; 1385798Srrh } 1395798Srrh } 1405798Srrh 1415798Srrh if (newfflag){ 1425798Srrh ptoken(bufptr, IFILE); 1435798Srrh ptoken(bufptr, STRING); 1445798Srrh val = strlen(newfname) + 1; 1455798Srrh movestr( (char *)&( ( (lgtype *)bufptr)[1]), newfname, val); 1465798Srrh bstrlg(bufptr, val); 1475798Srrh 1485798Srrh ptoken(bufptr, ILINENO); 1495798Srrh ptoken(bufptr, INT); 1505798Srrh pint(bufptr, 1); 1515798Srrh newfflag = 0; 1525798Srrh } 1535798Srrh 1545798Srrh while (bufptr < bufub){ 1555798Srrh loop: 1565798Srrh switch(ryylval = (type+2)[ch = getchar()]) { 1575798Srrh case SCANEOF: 1585798Srrh inbufptr = 0; 1595798Srrh goto endoffile; 1605798Srrh 1615798Srrh case NEEDSBUF: 1625798Srrh inbufptr = fillinbuffer(); 1635798Srrh if (inbufptr == 0) 1645798Srrh goto endoffile; 1655798Srrh goto loop; 1665798Srrh 1675798Srrh case DIV: /*process C style comments*/ 1685798Srrh if ( (ch = getchar()) == '*') { /*comment prelude*/ 1695798Srrh int incomment; 1705798Srrh linescrossed = 0; 1715798Srrh incomment = 1; 1725798Srrh ch = getchar(); /*skip over the * */ 1735798Srrh while(incomment){ 1745798Srrh switch(ch){ 1755798Srrh case '*': 1765798Srrh ch = getchar(); 1775798Srrh incomment = (ch != '/'); 1785798Srrh break; 1795798Srrh case '\n': 1805798Srrh scanlineno++; 1815798Srrh linescrossed++; 1825798Srrh ch = getchar(); 1835798Srrh break; 1845798Srrh case EOFCHAR: 1855798Srrh goto endoffile; 1865798Srrh case NEEDCHAR: 1875798Srrh inbufptr = fillinbuffer(); 1885798Srrh if (inbufptr == 0) 1895798Srrh goto endoffile; 1905798Srrh lineno++; 1915798Srrh ch = getchar(); 1925798Srrh break; 1935798Srrh default: 1945798Srrh ch = getchar(); 1955798Srrh break; 1965798Srrh } 1975798Srrh } 1985798Srrh val = ILINESKIP; 1995798Srrh ryylval = linescrossed; 2005798Srrh goto ret; 2015798Srrh } else { /*just an ordinary DIV*/ 2025798Srrh ungetc(ch); 2035798Srrh val = ryylval = DIV; 2045798Srrh goto ret; 2055798Srrh } 2065798Srrh case SH: 2075798Srrh if (oval == NL){ 2085798Srrh /* 2095798Srrh * Attempt to recognize a C preprocessor 2105798Srrh * style comment '^#[ \t]*[0-9]*[ \t]*".*" 2115798Srrh */ 2125798Srrh ch = getchar(); /*bump the #*/ 2135798Srrh while (INCHARSET(ch, SPACE)) 2145798Srrh ch = getchar();/*bump white */ 2155798Srrh if (INCHARSET(ch, DIGIT)){ 2165798Srrh intval = 0; 2175798Srrh while(INCHARSET(ch, DIGIT)){ 2185798Srrh intval = intval*10 + ch - '0'; 2195798Srrh ch = getchar(); 2205798Srrh } 2215798Srrh while (INCHARSET(ch, SPACE)) 2225798Srrh ch = getchar(); 2235798Srrh if (ch == '"'){ 2245798Srrh ptoken(bufptr, ILINENO); 2255798Srrh ptoken(bufptr, INT); 2265798Srrh pint(bufptr, intval - 1); 2275798Srrh ptoken(bufptr, IFILE); 2285798Srrh /* 2295798Srrh * The '"' has already been 2305798Srrh * munched 2315798Srrh * 2325798Srrh * eatstr will not eat 2335798Srrh * the trailing \n, so 2345798Srrh * it is given to the parser 2355798Srrh * and counted. 2365798Srrh */ 2375798Srrh goto eatstr; 2385798Srrh } 2395798Srrh } 2405798Srrh } 2415798Srrh /* 2425798Srrh * Well, its just an ordinary decadent comment 2435798Srrh */ 2445798Srrh while ((ch != '\n') && (ch != EOFCHAR)) 2455798Srrh ch = getchar(); 2465798Srrh if (ch == EOFCHAR) 2475798Srrh goto endoffile; 2485798Srrh val = ryylval = oval = NL; 2495798Srrh scanlineno++; 2505798Srrh goto ret; 2515798Srrh 2525798Srrh case NL: 2535798Srrh scanlineno++; 2545798Srrh val = ryylval; 2555798Srrh goto ret; 2565798Srrh 2575798Srrh case SP: 2585798Srrh oval = SP; /*invalidate ^# meta comments*/ 2595798Srrh goto loop; 2605798Srrh 2615798Srrh case REGOP: /* % , could be used as modulo, or register*/ 2625798Srrh ch = getchar(); 2635798Srrh if (INCHARSET(ch, DIGIT)){ 2645798Srrh ryylval = ch-'0'; 2655798Srrh if (ch=='1') { 2665798Srrh if (INCHARSET( (ch = getchar()), REGDIGIT)) 2675798Srrh ryylval = 10+ch-'0'; 2685798Srrh else 2695798Srrh ungetc(ch); 2705798Srrh } 2715798Srrh /* 2725798Srrh * God only knows what the original author 2735798Srrh * wanted this undocumented feature to 2745798Srrh * do. 2755798Srrh * %5++ is really r7 2765798Srrh */ 2775798Srrh while(INCHARSET( (ch = getchar()), SIGN)) { 2785798Srrh if (ch=='+') 2795798Srrh ryylval++; 2805798Srrh else 2815798Srrh ryylval--; 2825798Srrh } 2835798Srrh ungetc(ch); 2845798Srrh val = REG; 2855798Srrh } else { 2865798Srrh ungetc(ch); 2875798Srrh val = REGOP; 2885798Srrh } 2895798Srrh goto ret; 2905798Srrh 2915798Srrh case ALPH: 2925798Srrh ch1 = ch; 2935798Srrh if (INCHARSET(ch, SZSPECBEGIN)){ 2945798Srrh if( (ch = getchar()) == '`' || ch == '^'){ 2955798Srrh ch1 |= 0100; /*convert to lower*/ 2965798Srrh switch(ch1){ 2975798Srrh case 'b': ryylval = 1; break; 2985798Srrh case 'w': ryylval = 2; break; 2995798Srrh case 'l': ryylval = 4; break; 3005798Srrh default: ryylval = d124; break; 3015798Srrh } 3025798Srrh val = SIZESPEC; 3035798Srrh goto ret; 3045798Srrh } else { 3055798Srrh ungetc(ch); 3065798Srrh ch = ch1; /*restore first character*/ 3075798Srrh } 3085798Srrh } 3095798Srrh rcp = yytext; 3105798Srrh do { 3115798Srrh if (rcp < &yytext[NCPS]) 3125798Srrh *rcp++ = ch; 3135798Srrh } while (INCHARSET ( (ch = getchar()), ALPHA | DIGIT)); 3145798Srrh *rcp = '\0'; 3155798Srrh while (INCHARSET(ch, SPACE)) 3165798Srrh ch = getchar(); 3175798Srrh ungetc(ch); 3185798Srrh 3195798Srrh switch((op = *lookup(1))->s_tag){ 3205798Srrh case 0: 3215798Srrh case LABELID: 3225798Srrh /* 3235798Srrh * Its a name... (Labels are subsets ofname) 3245798Srrh */ 3255798Srrh ryylval = (int)op; 3265798Srrh val = NAME; 3275798Srrh break; 3285798Srrh case INST0: 3295798Srrh case INSTn: 3305798Srrh case IJXXX: 3315798Srrh opstruct.Op_popcode = ( (struct instab *)op)->i_popcode; 3325798Srrh opstruct.Op_eopcode = ( (struct instab *)op)->i_eopcode; 3335798Srrh val = op->s_tag; 3345798Srrh break; 3355798Srrh default: 3365798Srrh ryylval = ( (struct instab *)op)->i_popcode; 3375798Srrh val = op->s_tag; 3385798Srrh break; 3395798Srrh } 3405798Srrh goto ret; 3415798Srrh 3425798Srrh case DIG: 3435798Srrh /* 3445798Srrh * Implement call by reference on a reg variable 3455798Srrh */ 3465798Srrh cp = inbufptr; 3475798Srrh val = number(ch, &cp); 3485798Srrh /* 3495798Srrh * yylval or yybignum has been stuffed as a side 3505798Srrh * effect to number(); get the global yylval 3515798Srrh * into our fast local copy in case it was an INT. 3525798Srrh */ 3535798Srrh ryylval = yylval; 3545798Srrh inbufptr = cp; 3555798Srrh goto ret; 3565798Srrh 3575798Srrh case LSH: 3585798Srrh case RSH: 3595798Srrh /* 3605798Srrh * We allow the C style operators 3615798Srrh * << and >>, as well as < and > 3625798Srrh */ 3635798Srrh if ( (ch1 = getchar()) != ch) 3645798Srrh ungetc(ch1); 3655798Srrh val = ryylval; 3665798Srrh goto ret; 3675798Srrh 3685798Srrh case MINUS: 3695798Srrh if ( (ch = getchar()) =='(') 3705798Srrh ryylval=val=MP; 3715798Srrh else { 3725798Srrh ungetc(ch); 3735798Srrh val=MINUS; 3745798Srrh } 3755798Srrh goto ret; 3765798Srrh 3775798Srrh case SQ: 3785798Srrh if ((ryylval = getchar()) == '\n') 3795798Srrh scanlineno++; /*not entirely correct*/ 3805798Srrh val = INT; 3815798Srrh goto ret; 3825798Srrh 3835798Srrh case DQ: 3845798Srrh eatstr: 3855798Srrh linescrossed = 0; 3865798Srrh maxstrlg = (char *)bufub - (char *)bufptr; 3875798Srrh 3885798Srrh if (maxstrlg < MAXSTRLG) { 3895798Srrh ungetc('"'); 3905798Srrh *(bytetoktype *)bufptr = VOID ; 3915798Srrh bufub = bufptr; 3925798Srrh goto done; 3935798Srrh } 3945798Srrh if (maxstrlg > MAXSTRLG) 3955798Srrh maxstrlg = MAXSTRLG; 3965798Srrh 3975798Srrh ptoken(bufptr, STRING); 3985798Srrh lgbackpatch = bufptr; /*this is where the size goes*/ 3995798Srrh bufptr += sizeof(lgtype); 4005798Srrh /* 4015798Srrh * bufptr is now set to 4025798Srrh * be stuffed with characters from 4035798Srrh * the input 4045798Srrh */ 4055798Srrh 4065798Srrh while ( (maxstrlg > 0) 4075798Srrh && !(INCHARSET( (ch = getchar()), STRESCAPE)) 4085798Srrh ){ 4095798Srrh stuff: 410*6558Srrh maxstrlg -= 1; 4115798Srrh pchar(bufptr, ch); 4125798Srrh } 4135798Srrh if (maxstrlg <= 0){ /*enough characters to fill a string buffer*/ 4145798Srrh ungetc('"'); /*will read it next*/ 4155798Srrh } 416*6558Srrh else if (ch == '"') 417*6558Srrh /*VOID*/ ; /*done*/ 4185798Srrh else if (ch == '\n'){ 4195798Srrh yywarning("New line embedded in a string constant."); 4205798Srrh scanlineno++; 4215798Srrh linescrossed++; 4225798Srrh ch = getchar(); 4235798Srrh if (ch == EOFCHAR){ 4245798Srrh do_eof: 4255798Srrh pchar(bufptr, '\n'); 4265798Srrh ungetc(EOFCHAR); 4275798Srrh } else 4285798Srrh if (ch == NEEDCHAR){ 4295798Srrh if ( (inbufptr = fillinbuffer()) == 0) 4305798Srrh goto do_eof; 4315798Srrh ch = '\n'; 4325798Srrh goto stuff; 4335798Srrh } else { /* simple case */ 4345798Srrh ungetc(ch); 4355798Srrh ch = '\n'; 4365798Srrh goto stuff; 4375798Srrh } 4385798Srrh } else { 4395798Srrh ch = getchar(); /*skip the '\\'*/ 4405798Srrh if ( INCHARSET(ch, BSESCAPE)){ 4415798Srrh switch (ch){ 4425798Srrh case 'b': ch = '\b'; goto stuff; 4435798Srrh case 'f': ch = '\f'; goto stuff; 4445798Srrh case 'n': ch = '\n'; goto stuff; 4455798Srrh case 'r': ch = '\r'; goto stuff; 4465798Srrh case 't': ch = '\t'; goto stuff; 4475798Srrh } 4485798Srrh } 4495798Srrh if ( !(INCHARSET(ch,OCTDIGIT)) ) goto stuff; 4505798Srrh i = 0; 4515798Srrh intval = 0; 4525798Srrh while ( (i < 3) && (INCHARSET(ch, OCTDIGIT))){ 4535798Srrh i++;intval <<= 3;intval += ch - '0'; 4545798Srrh ch = getchar(); 4555798Srrh } 4565798Srrh ungetc(ch); 457*6558Srrh ch = (char)intval; 4585798Srrh goto stuff; 4595798Srrh } 4605798Srrh /* 4615798Srrh * bufptr now points at the next free slot 4625798Srrh */ 4635798Srrh bstrfromto(lgbackpatch, bufptr); 4645798Srrh if (linescrossed){ 4655798Srrh val = ILINESKIP; 4665798Srrh ryylval = linescrossed; 4675798Srrh goto ret; 4685798Srrh } else 4695798Srrh goto builtval; 4705798Srrh 4715798Srrh case BADCHAR: 4725798Srrh linescrossed = lineno; 4735798Srrh lineno = scanlineno; 4745798Srrh yyerror("Illegal character mapped: %d, char read:(octal) %o", 4755798Srrh ryylval, ch); 4765798Srrh lineno = linescrossed; 4775798Srrh val = BADCHAR; 4785798Srrh goto ret; 4795798Srrh 4805798Srrh default: 4815798Srrh val = ryylval; 4825798Srrh goto ret; 4835798Srrh } /*end of the switch*/ 4845798Srrh /* 4855798Srrh * here with one token, so stuff it 4865798Srrh */ 4875798Srrh ret: 4885798Srrh oval = val; 4895798Srrh ptoken(bufptr, val); 4905798Srrh switch(val){ 4915798Srrh case ILINESKIP: 4925798Srrh pint(bufptr, ryylval); 4935798Srrh break; 4945798Srrh case SIZESPEC: 4955798Srrh pchar(bufptr, ryylval); 4965798Srrh break; 4975798Srrh case BFINT: plong(bufptr, ryylval); 4985798Srrh break; 4995798Srrh case INT: plong(bufptr, ryylval); 5005798Srrh break; 5015798Srrh case BIGNUM: pnumber(bufptr, yybignum); 5025798Srrh break; 5035798Srrh case NAME: pptr(bufptr, (int)(struct symtab *)ryylval); 5045798Srrh break; 5055798Srrh case REG: pchar(bufptr, ryylval); 5065798Srrh break; 5075798Srrh case INST0: 5085798Srrh case INSTn: 5095798Srrh popcode(bufptr, opstruct); 5105798Srrh break; 5115798Srrh case IJXXX: 5125798Srrh popcode(bufptr, opstruct); 5135798Srrh pptr(bufptr, (int)(struct symtab *)symalloc()); 5145798Srrh break; 5155798Srrh case ISTAB: 5165798Srrh case ISTABSTR: 5175798Srrh case ISTABNONE: 5185798Srrh case ISTABDOT: 5195798Srrh case IALIGN: 5205798Srrh pptr(bufptr, (int)(struct symtab *)symalloc()); 5215798Srrh break; 5225798Srrh /* 5235798Srrh * default: 5245798Srrh */ 5255798Srrh } 5265798Srrh builtval: ; 5275798Srrh } /*end of the while to stuff the buffer*/ 5285798Srrh done: 5295798Srrh bufferbox->tok_count = (bytetoktype *)bufptr - &(bufferbox->toks[0]); 5305798Srrh 5315798Srrh /* 5325798Srrh * This is a real kludge: 5335798Srrh * 5345798Srrh * We put the last token in the buffer to be a MINUS 5355798Srrh * symbol. This last token will never be picked up 5365798Srrh * in the normal way, but can be looked at during 5375798Srrh * a peekahead look that the short circuit expression 5385798Srrh * evaluator uses to see if an expression is complicated. 5395798Srrh * 5405798Srrh * Consider the following situation: 5415798Srrh * 5425798Srrh * .word 45 + 47 5435798Srrh * buffer 1 | buffer 0 5445798Srrh * the peekahead would want to look across the buffer, 5455798Srrh * but will look in the buffer end zone, see the minus, and 5465798Srrh * fail. 5475798Srrh */ 5485798Srrh ptoken(bufptr, MINUS); 5495798Srrh InBufPtr = inbufptr; /*copy this back*/ 5505798Srrh } 551