1 /* 2 * Copyright (c) 1982 Regents of the University of California 3 */ 4 #ifndef lint 5 static char sccsid[] = "@(#)asscan2.c 4.9 06/30/83"; 6 #endif not lint 7 8 #include "asscanl.h" 9 10 static inttoktype oval = NL; 11 12 char inbufunget[8]; 13 char inbuffer[ASINBUFSIZ]; 14 char *Ginbufptr = inbuffer; 15 int Ginbufcnt = 0; 16 17 fillinbuffer() 18 { 19 int nread; 20 static int hadeof; 21 int goal; 22 int got; 23 24 nread = 0; 25 if (hadeof == 0){ 26 goal = sizeof(inbuffer); 27 do { 28 got = read(stdin->_file, inbuffer + nread, goal); 29 if (got == 0) 30 hadeof = 1; 31 if (got <= 0) 32 break; 33 nread += got; 34 goal -= got; 35 } while (goal); 36 } 37 /* 38 * getchar assumes that Ginbufcnt and Ginbufptr 39 * are adjusted as if one character has been removed 40 * from the input. 41 */ 42 if (nread == 0){ 43 inbuffer[0] = EOFCHAR; 44 nread = 1; 45 } 46 Ginbufcnt = nread - 1; 47 Ginbufptr = inbuffer + 1; 48 } 49 50 #ifndef FLEXNAMES 51 char strtext[NCPString + 1]; 52 #else FLEXNAMES 53 # if NCPName < NCPString 54 char strtext[NCPString + 1]; 55 # else 56 #define strtext yytext 57 # endif 58 #endif FLEXNAMES 59 60 scan_dot_s(bufferbox) 61 struct tokbufdesc *bufferbox; 62 { 63 reg char *inbufptr; 64 reg int inbufcnt; 65 reg int ryylval; /* local copy of lexical value */ 66 extern int yylval; /* global copy of lexical value */ 67 reg int val; /* the value returned */ 68 int i; /* simple counter */ 69 reg char *rcp; 70 int ch; /* treated as a character */ 71 int ch1; /* shadow value */ 72 struct symtab *op; 73 ptrall lgbackpatch; /* where to stuff a string length */ 74 reg ptrall bufptr; /* where to stuff tokens */ 75 ptrall bufub; /* where not to stuff tokens */ 76 reg int maxstrlg; /* how long a string can be */ 77 long intval; /* value of int */ 78 int linescrossed; /* when doing strings and comments */ 79 struct Opcode opstruct; 80 81 (bytetoktype *)bufptr = (bytetoktype *) & (bufferbox->toks[0]); 82 (bytetoktype *)bufub = &(bufferbox->toks[AVAILTOKS]); 83 84 MEMTOREGBUF; 85 if (newfflag){ 86 newfflag = 0; 87 ryylval = (int)savestr(newfname, strlen(newfname) + 1); 88 89 ptoken(bufptr, IFILE); 90 ptoken(bufptr, STRING); 91 pptr(bufptr, ryylval); 92 93 ptoken(bufptr, ILINENO); 94 ptoken(bufptr, INT); 95 pint(bufptr, 1); 96 } 97 98 while (bufptr < bufub){ 99 loop: 100 switch(ryylval = (type+1)[ch = getchar()]) { 101 case SCANEOF: 102 endoffile: ; 103 inbufptr = 0; 104 ptoken(bufptr, PARSEEOF); 105 goto done; 106 107 case DIV: /*process C style comments*/ 108 if ( (ch = getchar()) == '*') { /*comment prelude*/ 109 int incomment; 110 linescrossed = 0; 111 incomment = 1; 112 ch = getchar(); /*skip over the * */ 113 while(incomment){ 114 switch(ch){ 115 case '*': 116 ch = getchar(); 117 incomment = (ch != '/'); 118 break; 119 case '\n': 120 scanlineno++; 121 linescrossed++; 122 ch = getchar(); 123 break; 124 case EOFCHAR: 125 goto endoffile; 126 default: 127 ch = getchar(); 128 break; 129 } 130 } 131 val = ILINESKIP; 132 ryylval = linescrossed; 133 goto ret; 134 } else { /*just an ordinary DIV*/ 135 ungetc(ch); 136 val = ryylval = DIV; 137 goto ret; 138 } 139 case SH: 140 if (oval == NL){ 141 /* 142 * Attempt to recognize a C preprocessor 143 * style comment '^#[ \t]*[0-9]*[ \t]*".*" 144 */ 145 ch = getchar(); /*bump the #*/ 146 while (INCHARSET(ch, SPACE)) 147 ch = getchar();/*bump white */ 148 if (INCHARSET(ch, DIGIT)){ 149 intval = 0; 150 while(INCHARSET(ch, DIGIT)){ 151 intval = intval*10 + ch - '0'; 152 ch = getchar(); 153 } 154 while (INCHARSET(ch, SPACE)) 155 ch = getchar(); 156 if (ch == '"'){ 157 ptoken(bufptr, ILINENO); 158 ptoken(bufptr, INT); 159 pint(bufptr, intval - 1); 160 ptoken(bufptr, IFILE); 161 /* 162 * The '"' has already been 163 * munched 164 * 165 * eatstr will not eat 166 * the trailing \n, so 167 * it is given to the parser 168 * and counted. 169 */ 170 goto eatstr; 171 } 172 } 173 } 174 /* 175 * Well, its just an ordinary decadent comment 176 */ 177 while ((ch != '\n') && (ch != EOFCHAR)) 178 ch = getchar(); 179 if (ch == EOFCHAR) 180 goto endoffile; 181 val = ryylval = oval = NL; 182 scanlineno++; 183 goto ret; 184 185 case NL: 186 scanlineno++; 187 val = ryylval; 188 goto ret; 189 190 case SP: 191 oval = SP; /*invalidate ^# meta comments*/ 192 goto loop; 193 194 case REGOP: /* % , could be used as modulo, or register*/ 195 ch = getchar(); 196 if (INCHARSET(ch, DIGIT)){ 197 ryylval = ch-'0'; 198 if (ch=='1') { 199 if (INCHARSET( (ch = getchar()), REGDIGIT)) 200 ryylval = 10+ch-'0'; 201 else 202 ungetc(ch); 203 } 204 /* 205 * God only knows what the original author 206 * wanted this undocumented feature to 207 * do. 208 * %5++ is really r7 209 */ 210 while(INCHARSET( (ch = getchar()), SIGN)) { 211 if (ch=='+') 212 ryylval++; 213 else 214 ryylval--; 215 } 216 ungetc(ch); 217 val = REG; 218 } else { 219 ungetc(ch); 220 val = REGOP; 221 } 222 goto ret; 223 224 case ALPH: 225 ch1 = ch; 226 if (INCHARSET(ch, SZSPECBEGIN)){ 227 if( (ch = getchar()) == '`' || ch == '^'){ 228 ch1 |= 0100; /*convert to lower*/ 229 switch(ch1){ 230 case 'b': ryylval = 1; break; 231 case 'w': ryylval = 2; break; 232 case 'l': ryylval = 4; break; 233 default: ryylval = d124; break; 234 } 235 val = SIZESPEC; 236 goto ret; 237 } else { 238 ungetc(ch); 239 ch = ch1; /*restore first character*/ 240 } 241 } 242 rcp = yytext; 243 do { 244 if (rcp < &yytext[NCPName]) 245 *rcp++ = ch; 246 } while (INCHARSET ( (ch = getchar()), ALPHA | DIGIT)); 247 *rcp = '\0'; 248 while (INCHARSET(ch, SPACE)) 249 ch = getchar(); 250 ungetc(ch); 251 252 switch((op = *lookup(1))->s_tag){ 253 case 0: 254 case LABELID: 255 /* 256 * Its a name... (Labels are subsets ofname) 257 */ 258 ryylval = (int)op; 259 val = NAME; 260 break; 261 case INST0: 262 case INSTn: 263 case IJXXX: 264 opstruct.Op_popcode = ( (struct instab *)op)->i_popcode; 265 opstruct.Op_eopcode = ( (struct instab *)op)->i_eopcode; 266 val = op->s_tag; 267 break; 268 default: 269 ryylval = ( (struct instab *)op)->i_popcode; 270 val = op->s_tag; 271 break; 272 } 273 goto ret; 274 275 case DIG: 276 /* 277 * restore local inbufptr and inbufcnt 278 */ 279 REGTOMEMBUF; 280 val = number(ch); 281 MEMTOREGBUF; 282 /* 283 * yylval or yybignum has been stuffed as a side 284 * effect to number(); get the global yylval 285 * into our fast local copy in case it was an INT. 286 */ 287 ryylval = yylval; 288 goto ret; 289 290 case LSH: 291 case RSH: 292 /* 293 * We allow the C style operators 294 * << and >>, as well as < and > 295 */ 296 if ( (ch1 = getchar()) != ch) 297 ungetc(ch1); 298 val = ryylval; 299 goto ret; 300 301 case MINUS: 302 if ( (ch = getchar()) =='(') 303 ryylval=val=MP; 304 else { 305 ungetc(ch); 306 val=MINUS; 307 } 308 goto ret; 309 310 case SQ: 311 if ((ryylval = getchar()) == '\n') 312 scanlineno++; /*not entirely correct*/ 313 val = INT; 314 goto ret; 315 316 case DQ: 317 eatstr: 318 linescrossed = 0; 319 for(rcp = strtext, maxstrlg = NCPString; maxstrlg > 0; --maxstrlg){ 320 switch(ch = getchar()){ 321 case '"': 322 goto tailDQ; 323 default: 324 stuff: 325 pchar(rcp, ch); 326 break; 327 case '\n': 328 yywarning("New line in a string constant"); 329 scanlineno++; 330 linescrossed++; 331 ch = getchar(); 332 switch(ch){ 333 case EOFCHAR: 334 pchar(rcp, '\n'); 335 ungetc(EOFCHAR); 336 goto tailDQ; 337 default: 338 ungetc(ch); 339 ch = '\n'; 340 goto stuff; 341 } 342 break; 343 344 case '\\': 345 ch = getchar(); /*skip the '\\'*/ 346 if ( INCHARSET(ch, BSESCAPE)){ 347 switch (ch){ 348 case 'b': ch = '\b'; goto stuff; 349 case 'f': ch = '\f'; goto stuff; 350 case 'n': ch = '\n'; goto stuff; 351 case 'r': ch = '\r'; goto stuff; 352 case 't': ch = '\t'; goto stuff; 353 } 354 } 355 if ( !(INCHARSET(ch, OCTDIGIT)) ) 356 goto stuff; 357 i = 0; 358 intval = 0; 359 while ( (i < 3) && (INCHARSET(ch, OCTDIGIT))){ 360 i++; 361 intval <<= 3; 362 intval += ch - '0'; 363 ch = getchar(); 364 } 365 ungetc(ch); 366 ch = (char)intval; 367 goto stuff; 368 } 369 } 370 tailDQ: ; 371 /* 372 * account for any lines that were crossed 373 */ 374 if (linescrossed){ 375 ptoken(bufptr, ILINESKIP); 376 pint(bufptr, linescrossed); 377 } 378 /* 379 * put the string in strtext into the string pool 380 * 381 * The value in ryylval points to the string; 382 * the previous 2 bytes is the length of the string 383 * 384 * Cheat: append a trailing null to the string 385 * and then adjust the string length to ignore 386 * the trailing null. If any STRING client requires 387 * the trailing null, the client can just change STRLEN 388 */ 389 val = STRING; 390 *rcp++ = 0; 391 ryylval = (int)savestr(strtext, rcp - strtext); 392 STRLEN(((char *)ryylval)) -= 1; 393 goto ret; 394 395 case BADCHAR: 396 linescrossed = lineno; 397 lineno = scanlineno; 398 yyerror("Illegal character mapped: %d, char read:(octal) %o", 399 ryylval, ch); 400 lineno = linescrossed; 401 val = BADCHAR; 402 goto ret; 403 404 default: 405 val = ryylval; 406 goto ret; 407 } /*end of the switch*/ 408 /* 409 * here with one token, so stuff it 410 */ 411 ret: 412 oval = val; 413 ptoken(bufptr, val); 414 switch(val){ 415 case ILINESKIP: 416 pint(bufptr, ryylval); 417 break; 418 case SIZESPEC: 419 pchar(bufptr, ryylval); 420 break; 421 case BFINT: plong(bufptr, ryylval); 422 break; 423 case INT: plong(bufptr, ryylval); 424 break; 425 case BIGNUM: pnumber(bufptr, yybignum); 426 break; 427 case STRING: pptr(bufptr, (int)(char *)ryylval); 428 break; 429 case NAME: pptr(bufptr, (int)(struct symtab *)ryylval); 430 break; 431 case REG: pchar(bufptr, ryylval); 432 break; 433 case INST0: 434 case INSTn: 435 popcode(bufptr, opstruct); 436 break; 437 case IJXXX: 438 popcode(bufptr, opstruct); 439 pptr(bufptr, (int)(struct symtab *)symalloc()); 440 break; 441 case ISTAB: 442 case ISTABSTR: 443 case ISTABNONE: 444 case ISTABDOT: 445 case IALIGN: 446 pptr(bufptr, (int)(struct symtab *)symalloc()); 447 break; 448 /* 449 * default: 450 */ 451 } 452 builtval: ; 453 } /*end of the while to stuff the buffer*/ 454 done: 455 bufferbox->tok_count = (bytetoktype *)bufptr - &(bufferbox->toks[0]); 456 /* 457 * This is a real kludge: 458 * 459 * We put the last token in the buffer to be a MINUS 460 * symbol. This last token will never be picked up 461 * in the normal way, but can be looked at during 462 * a peekahead look that the short circuit expression 463 * evaluator uses to see if an expression is complicated. 464 * 465 * Consider the following situation: 466 * 467 * .word 45 + 47 468 * buffer 1 | buffer 0 469 * the peekahead would want to look across the buffer, 470 * but will look in the buffer end zone, see the minus, and 471 * fail. 472 */ 473 ptoken(bufptr, MINUS); 474 REGTOMEMBUF; 475 } 476