1 %token CHAR CCL NCCL STR DELIM SCON ITER NEWE NULLS 2 %left SCON '/' NEWE 3 %left '|' 4 %left '$' '^' 5 %left CHAR CCL NCCL '(' '.' STR NULLS 6 %left ITER 7 %left CAT 8 %left '*' '+' '?' 9 10 %{ 11 # include "ldefs.h" 12 #define YYSTYPE union _yystype_ 13 union _yystype_ 14 { 15 int i; 16 uchar *cp; 17 }; 18 %} 19 %% 20 %{ 21 int i; 22 int j,k; 23 int g; 24 uchar *p; 25 %} 26 acc : lexinput 27 ={ 28 # ifdef DEBUG 29 if(debug) sect2dump(); 30 # endif 31 } 32 ; 33 lexinput: defns delim prods end 34 | defns delim end 35 ={ 36 if(!funcflag)phead2(); 37 funcflag = TRUE; 38 } 39 | error 40 ={ 41 # ifdef DEBUG 42 if(debug) { 43 sect1dump(); 44 sect2dump(); 45 } 46 # endif 47 } 48 ; 49 end: delim | ; 50 defns: defns STR STR 51 ={ strcpy((char*)dp,(char*)$2.cp); 52 def[dptr] = dp; 53 dp += strlen((char*)$2.cp) + 1; 54 strcpy((char*)dp,(char*)$3.cp); 55 subs[dptr++] = dp; 56 if(dptr >= DEFSIZE) 57 error("Too many definitions"); 58 dp += strlen((char*)$3.cp) + 1; 59 if(dp >= dchar+DEFCHAR) 60 error("Definitions too long"); 61 subs[dptr]=def[dptr]=0; /* for lookup - require ending null */ 62 } 63 | 64 ; 65 delim: DELIM 66 ={ 67 # ifdef DEBUG 68 if(sect == DEFSECTION && debug) sect1dump(); 69 # endif 70 sect++; 71 } 72 ; 73 prods: prods pr 74 ={ $$.i = mn2(RNEWE,$1.i,$2.i); 75 } 76 | pr 77 ={ $$.i = $1.i;} 78 ; 79 pr: r NEWE 80 ={ 81 if(divflg == TRUE) 82 i = mn1(S1FINAL,casecount); 83 else i = mn1(FINAL,casecount); 84 $$.i = mn2(RCAT,$1.i,i); 85 divflg = FALSE; 86 casecount++; 87 } 88 | error NEWE 89 ={ 90 # ifdef DEBUG 91 if(debug) sect2dump(); 92 # endif 93 } 94 r: CHAR 95 ={ $$.i = mn0($1.i); } 96 | STR 97 ={ 98 p = $1.cp; 99 i = mn0(*p++); 100 while(*p) 101 i = mn2(RSTR,i,*p++); 102 $$.i = i; 103 } 104 | '.' 105 ={ symbol['\n'] = 0; 106 if(psave == FALSE){ 107 p = ccptr; 108 psave = ccptr; 109 for(i=1;i<'\n';i++){ 110 symbol[i] = 1; 111 *ccptr++ = i; 112 } 113 for(i='\n'+1;i<NCH;i++){ 114 symbol[i] = 1; 115 *ccptr++ = i; 116 } 117 *ccptr++ = 0; 118 if(ccptr > ccl+CCLSIZE) 119 error("Too many large character classes"); 120 } 121 else 122 p = psave; 123 $$.i = mnp(RCCL, p); 124 cclinter(1); 125 } 126 | CCL 127 ={ $$.i = mnp(RCCL,$1.cp); } 128 | NCCL 129 ={ $$.i = mnp(RNCCL,$1.cp); } 130 | r '*' 131 ={ $$.i = mn1(STAR,$1.i); } 132 | r '+' 133 ={ $$.i = mn1(PLUS,$1.i); } 134 | r '?' 135 ={ $$.i = mn1(QUEST,$1.i); } 136 | r '|' r 137 ={ $$.i = mn2(BAR,$1.i,$3.i); } 138 | r r %prec CAT 139 ={ $$.i = mn2(RCAT,$1.i,$2.i); } 140 | r '/' r 141 ={ if(!divflg){ 142 j = mn1(S2FINAL,-casecount); 143 i = mn2(RCAT,$1.i,j); 144 $$.i = mn2(DIV,i,$3.i); 145 } 146 else { 147 $$.i = mn2(RCAT,$1.i,$3.i); 148 warning("Extra slash removed"); 149 } 150 divflg = TRUE; 151 } 152 | r ITER ',' ITER '}' 153 ={ if($2.i > $4.i){ 154 i = $2.i; 155 $2.i = $4.i; 156 $4.i = i; 157 } 158 if($4.i <= 0) 159 warning("Iteration range must be positive"); 160 else { 161 j = $1.i; 162 for(k = 2; k<=$2.i;k++) 163 j = mn2(RCAT,j,dupl($1.i)); 164 for(i = $2.i+1; i<=$4.i; i++){ 165 g = dupl($1.i); 166 for(k=2;k<=i;k++) 167 g = mn2(RCAT,g,dupl($1.i)); 168 j = mn2(BAR,j,g); 169 } 170 $$.i = j; 171 } 172 } 173 | r ITER '}' 174 ={ 175 if($2.i < 0)warning("Can't have negative iteration"); 176 else if($2.i == 0) $$.i = mn0(RNULLS); 177 else { 178 j = $1.i; 179 for(k=2;k<=$2.i;k++) 180 j = mn2(RCAT,j,dupl($1.i)); 181 $$.i = j; 182 } 183 } 184 | r ITER ',' '}' 185 ={ 186 /* from n to infinity */ 187 if($2.i < 0)warning("Can't have negative iteration"); 188 else if($2.i == 0) $$.i = mn1(STAR,$1.i); 189 else if($2.i == 1)$$.i = mn1(PLUS,$1.i); 190 else { /* >= 2 iterations minimum */ 191 j = $1.i; 192 for(k=2;k<$2.i;k++) 193 j = mn2(RCAT,j,dupl($1.i)); 194 k = mn1(PLUS,dupl($1.i)); 195 $$.i = mn2(RCAT,j,k); 196 } 197 } 198 | SCON r 199 ={ $$.i = mn2(RSCON,$2.i,(uintptr)$1.cp); } 200 | '^' r 201 ={ $$.i = mn1(CARAT,$2.i); } 202 | r '$' 203 ={ i = mn0('\n'); 204 if(!divflg){ 205 j = mn1(S2FINAL,-casecount); 206 k = mn2(RCAT,$1.i,j); 207 $$.i = mn2(DIV,k,i); 208 } 209 else $$.i = mn2(RCAT,$1.i,i); 210 divflg = TRUE; 211 } 212 | '(' r ')' 213 ={ $$.i = $2.i; } 214 | NULLS 215 ={ $$.i = mn0(RNULLS); } 216 ; 217 %% 218 int 219 yylex(void) 220 { 221 uchar *p; 222 int c, i; 223 uchar *t, *xp; 224 int n, j, k, x; 225 static int sectbegin; 226 static uchar token[TOKENSIZE]; 227 static int iter; 228 229 # ifdef DEBUG 230 yylval.i = 0; 231 yylval.p = 0; 232 # endif 233 234 if(sect == DEFSECTION) { /* definitions section */ 235 while(!eof) { 236 if(prev == '\n'){ /* next char is at beginning of line */ 237 getl(p=buf); 238 switch(*p){ 239 case '%': 240 switch(*(p+1)){ 241 case '%': 242 lgate(); 243 Bprint(&fout,"#define YYNEWLINE %d\n",'\n'); 244 Bprint(&fout,"yylex(void){\nint nstr; extern int yyprevious;\n"); 245 sectbegin = TRUE; 246 i = treesize*(sizeof(*name)+sizeof(*left)+ 247 sizeof(*right)+sizeof(*nullstr)+sizeof(*parent))+ALITTLEEXTRA; 248 p = myalloc(i,1); 249 if(p == 0) 250 error("Too little core for parse tree"); 251 free(p); 252 name = myalloc(treesize,sizeof(*name)); 253 left = myalloc(treesize,sizeof(*left)); 254 right = myalloc(treesize,sizeof(*right)); 255 nullstr = myalloc(treesize,sizeof(*nullstr)); 256 parent = myalloc(treesize,sizeof(*parent)); 257 ptr = myalloc(treesize,sizeof(*ptr)); 258 if(name == 0 || left == 0 || right == 0 || parent == 0 || nullstr == 0 || ptr == 0) 259 error("Too little core for parse tree"); 260 return(freturn(DELIM)); 261 case 'p': case 'P': /* has overridden number of positions */ 262 while(*p && !isdigit(*p))p++; 263 maxpos = atol((char*)p); 264 # ifdef DEBUG 265 if (debug) print("positions (%%p) now %d\n",maxpos); 266 # endif 267 if(report == 2)report = 1; 268 continue; 269 case 'n': case 'N': /* has overridden number of states */ 270 while(*p && !isdigit(*p))p++; 271 nstates = atol((char*)p); 272 # ifdef DEBUG 273 if(debug)print( " no. states (%%n) now %d\n",nstates); 274 # endif 275 if(report == 2)report = 1; 276 continue; 277 case 'e': case 'E': /* has overridden number of tree nodes */ 278 while(*p && !isdigit(*p))p++; 279 treesize = atol((char*)p); 280 # ifdef DEBUG 281 if (debug) print("treesize (%%e) now %d\n",treesize); 282 # endif 283 if(report == 2)report = 1; 284 continue; 285 case 'o': case 'O': 286 while (*p && !isdigit(*p))p++; 287 outsize = atol((char*)p); 288 if (report ==2) report=1; 289 continue; 290 case 'a': case 'A': /* has overridden number of transitions */ 291 while(*p && !isdigit(*p))p++; 292 if(report == 2)report = 1; 293 ntrans = atol((char*)p); 294 # ifdef DEBUG 295 if (debug)print("N. trans (%%a) now %d\n",ntrans); 296 # endif 297 continue; 298 case 'k': case 'K': /* overriden packed char classes */ 299 while (*p && !isdigit(*p))p++; 300 if (report==2) report=1; 301 free(pchar); 302 pchlen = atol((char*)p); 303 # ifdef DEBUG 304 if (debug) print( "Size classes (%%k) now %d\n",pchlen); 305 # endif 306 pchar=pcptr=myalloc(pchlen, sizeof(*pchar)); 307 continue; 308 case '{': 309 lgate(); 310 while(getl(p) && strcmp((char*)p,"%}") != 0) 311 Bprint(&fout, "%s\n",(char*)p); 312 if(p[0] == '%') continue; 313 error("Premature eof"); 314 case 's': case 'S': /* start conditions */ 315 lgate(); 316 while(*p && strchr(" \t,", *p) == 0) p++; 317 n = TRUE; 318 while(n){ 319 while(*p && strchr(" \t,", *p)) p++; 320 t = p; 321 while(*p && strchr(" \t,", *p) == 0)p++; 322 if(!*p) n = FALSE; 323 *p++ = 0; 324 if (*t == 0) continue; 325 i = sptr*2; 326 Bprint(&fout,"#define %s %d\n",(char*)t,i); 327 strcpy((char*)sp, (char*)t); 328 sname[sptr++] = sp; 329 sname[sptr] = 0; /* required by lookup */ 330 if(sptr >= STARTSIZE) 331 error("Too many start conditions"); 332 sp += strlen((char*)sp) + 1; 333 if(sp >= stchar+STARTCHAR) 334 error("Start conditions too long"); 335 } 336 continue; 337 default: 338 warning("Invalid request %s",p); 339 continue; 340 } /* end of switch after seeing '%' */ 341 case ' ': case '\t': /* must be code */ 342 lgate(); 343 Bprint(&fout, "%s\n",(char*)p); 344 continue; 345 default: /* definition */ 346 while(*p && !isspace(*p)) p++; 347 if(*p == 0) 348 continue; 349 prev = *p; 350 *p = 0; 351 bptr = p+1; 352 yylval.cp = buf; 353 if(isdigit(buf[0])) 354 warning("Substitution strings may not begin with digits"); 355 return(freturn(STR)); 356 } 357 } 358 /* still sect 1, but prev != '\n' */ 359 else { 360 p = bptr; 361 while(*p && isspace(*p)) p++; 362 if(*p == 0) 363 warning("No translation given - null string assumed"); 364 strcpy((char*)token, (char*)p); 365 yylval.cp = token; 366 prev = '\n'; 367 return(freturn(STR)); 368 } 369 } 370 /* end of section one processing */ 371 } else if(sect == RULESECTION){ /* rules and actions */ 372 while(!eof){ 373 switch(c=gch()){ 374 case '\0': 375 return(freturn(0)); 376 case '\n': 377 if(prev == '\n') continue; 378 x = NEWE; 379 break; 380 case ' ': 381 case '\t': 382 if(sectbegin == TRUE){ 383 cpyact(); 384 while((c=gch()) && c != '\n'); 385 continue; 386 } 387 if(!funcflag)phead2(); 388 funcflag = TRUE; 389 Bprint(&fout,"case %d:\n",casecount); 390 if(cpyact()) 391 Bprint(&fout,"break;\n"); 392 while((c=gch()) && c != '\n'); 393 if(peek == ' ' || peek == '\t' || sectbegin == TRUE){ 394 warning("Executable statements should occur right after %%"); 395 continue; 396 } 397 x = NEWE; 398 break; 399 case '%': 400 if(prev != '\n') goto character; 401 if(peek == '{'){ /* included code */ 402 getl(buf); 403 while(!eof && getl(buf) && strcmp("%}",(char*)buf) != 0) 404 Bprint(&fout,"%s\n",(char*)buf); 405 continue; 406 } 407 if(peek == '%'){ 408 gch(); 409 gch(); 410 x = DELIM; 411 break; 412 } 413 goto character; 414 case '|': 415 if(peek == ' ' || peek == '\t' || peek == '\n'){ 416 Bprint(&fout,"%d\n",30000+casecount++); 417 continue; 418 } 419 x = '|'; 420 break; 421 case '$': 422 if(peek == '\n' || peek == ' ' || peek == '\t' || peek == '|' || peek == '/'){ 423 x = c; 424 break; 425 } 426 goto character; 427 case '^': 428 if(prev != '\n' && scon != TRUE) goto character; /* valid only at line begin */ 429 x = c; 430 break; 431 case '?': 432 case '+': 433 case '.': 434 case '*': 435 case '(': 436 case ')': 437 case ',': 438 case '/': 439 x = c; 440 break; 441 case '}': 442 iter = FALSE; 443 x = c; 444 break; 445 case '{': /* either iteration or definition */ 446 if(isdigit(c=gch())){ /* iteration */ 447 iter = TRUE; 448 ieval: 449 i = 0; 450 while(isdigit(c)){ 451 token[i++] = c; 452 c = gch(); 453 } 454 token[i] = 0; 455 yylval.i = atol((char*)token); 456 munputc(c); 457 x = ITER; 458 break; 459 } else { /* definition */ 460 i = 0; 461 while(c && c!='}'){ 462 token[i++] = c; 463 c = gch(); 464 } 465 token[i] = 0; 466 i = lookup(token,def); 467 if(i < 0) 468 warning("Definition %s not found",token); 469 else 470 munputs(subs[i]); 471 continue; 472 } 473 case '<': /* start condition ? */ 474 if(prev != '\n') /* not at line begin, not start */ 475 goto character; 476 t = slptr; 477 do { 478 i = 0; 479 c = gch(); 480 while(c != ',' && c && c != '>'){ 481 token[i++] = c; 482 c = gch(); 483 } 484 token[i] = 0; 485 if(i == 0) 486 goto character; 487 i = lookup(token,sname); 488 if(i < 0) { 489 warning("Undefined start condition %s",token); 490 continue; 491 } 492 *slptr++ = i+1; 493 } while(c && c != '>'); 494 *slptr++ = 0; 495 /* check if previous value re-usable */ 496 for (xp=slist; xp<t; ){ 497 if (strcmp((char*)xp, (char*)t)==0) 498 break; 499 while (*xp++); 500 } 501 if (xp<t){ 502 /* re-use previous pointer to string */ 503 slptr=t; 504 t=xp; 505 } 506 if(slptr > slist+STARTSIZE) /* note not packed ! */ 507 error("Too many start conditions used"); 508 yylval.cp = t; 509 x = SCON; 510 break; 511 case '"': 512 i = 0; 513 while((c=gch()) && c != '"' && c != '\n'){ 514 if(c == '\\') c = usescape(gch()); 515 token[i++] = c; 516 if(i > TOKENSIZE){ 517 warning("String too long"); 518 i = TOKENSIZE-1; 519 break; 520 } 521 } 522 if(c == '\n') { 523 yyline--; 524 warning("Non-terminated string"); 525 yyline++; 526 } 527 token[i] = 0; 528 if(i == 0)x = NULLS; 529 else if(i == 1){ 530 yylval.i = token[0]; 531 x = CHAR; 532 } else { 533 yylval.cp = token; 534 x = STR; 535 } 536 break; 537 case '[': 538 for(i=1;i<NCH;i++) symbol[i] = 0; 539 x = CCL; 540 if((c = gch()) == '^'){ 541 x = NCCL; 542 c = gch(); 543 } 544 while(c != ']' && c){ 545 if(c == '\\') c = usescape(gch()); 546 symbol[c] = 1; 547 j = c; 548 if((c=gch()) == '-' && peek != ']'){ /* range specified */ 549 c = gch(); 550 if(c == '\\') c = usescape(gch()); 551 k = c; 552 if(j > k) { 553 n = j; 554 j = k; 555 k = n; 556 } 557 if(!(('A' <= j && k <= 'Z') || 558 ('a' <= j && k <= 'z') || 559 ('0' <= j && k <= '9'))) 560 warning("Non-portable Character Class"); 561 for(n=j+1;n<=k;n++) 562 symbol[n] = 1; /* implementation dependent */ 563 c = gch(); 564 } 565 } 566 /* try to pack ccl's */ 567 i = 0; 568 for(j=0;j<NCH;j++) 569 if(symbol[j])token[i++] = j; 570 token[i] = 0; 571 p = ccl; 572 while(p <ccptr && strcmp((char*)token,(char*)p) != 0)p++; 573 if(p < ccptr) /* found it */ 574 yylval.cp = p; 575 else { 576 yylval.cp = ccptr; 577 strcpy((char*)ccptr,(char*)token); 578 ccptr += strlen((char*)token) + 1; 579 if(ccptr >= ccl+CCLSIZE) 580 error("Too many large character classes"); 581 } 582 cclinter(x==CCL); 583 break; 584 case '\\': 585 c = usescape(gch()); 586 default: 587 character: 588 if(iter){ /* second part of an iteration */ 589 iter = FALSE; 590 if('0' <= c && c <= '9') 591 goto ieval; 592 } 593 if(isalpha(peek)){ 594 i = 0; 595 yylval.cp = token; 596 token[i++] = c; 597 while(isalpha(peek)) 598 token[i++] = gch(); 599 if(peek == '?' || peek == '*' || peek == '+') 600 munputc(token[--i]); 601 token[i] = 0; 602 if(i == 1){ 603 yylval.i = token[0]; 604 x = CHAR; 605 } 606 else x = STR; 607 } else { 608 yylval.i = c; 609 x = CHAR; 610 } 611 } 612 scon = FALSE; 613 if(x == SCON)scon = TRUE; 614 sectbegin = FALSE; 615 return(freturn(x)); 616 } 617 } 618 /* section three */ 619 ptail(); 620 # ifdef DEBUG 621 if(debug) 622 Bprint(&fout,"\n/*this comes from section three - debug */\n"); 623 # endif 624 while(getl(buf) && !eof) 625 Bprint(&fout,"%s\n",(char*)buf); 626 return(freturn(0)); 627 } 628 /* end of yylex */ 629 # ifdef DEBUG 630 int 631 freturn(int i) 632 { 633 if(yydebug) { 634 print("now return "); 635 if(i < NCH) allprint(i); 636 else print("%d",i); 637 print(" yylval = "); 638 switch(i){ 639 case STR: case CCL: case NCCL: 640 strpt(yylval.cp); 641 break; 642 case CHAR: 643 allprint(yylval.i); 644 break; 645 default: 646 print("%d",yylval.i); 647 break; 648 } 649 print("\n"); 650 } 651 return(i); 652 } 653 # endif 654