1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 #include <ctype.h> 5 6 #define Bungetrune Bungetc /* ok for now. */ 7 8 /* 9 * all these are 32 bit 10 */ 11 #define TBITSET ((32+NTERMS)/32) /* BOTCH?? +31 */ 12 #define BIT(a,i) ((a)[(i)>>5] & (1<<((i)&037))) 13 #define SETBIT(a,i) ((a)[(i)>>5] |= (1<<((i)&037))) 14 #define NWORDS(n) (((n)+32)/32) 15 16 #define PARSER "/sys/lib/yaccpar" 17 #define PARSERS "/sys/lib/yaccpars" 18 #define TEMPNAME "y.tmp.XXXXXX" 19 #define ACTNAME "y.acts.XXXXXX" 20 #define OFILE "tab.c" 21 #define FILEU "output" 22 #define FILED "tab.h" 23 #define FILEDEBUG "debug" 24 25 enum 26 { 27 /* 28 * the following are adjustable 29 * according to memory size 30 */ 31 ACTSIZE = 30000, 32 MEMSIZE = 30000, 33 NSTATES = 2000, 34 NTERMS = 255, 35 NPROD = 800, 36 NNONTERM = 300, 37 TEMPSIZE = 2000, 38 CNAMSZ = 5000, 39 LSETSIZE = 600, 40 WSETSIZE = 350, 41 42 NAMESIZE = 50, 43 NTYPES = 63, 44 ISIZE = 400, 45 46 PRIVATE = 0xE000, /* unicode private use */ 47 48 /* relationships which must hold: 49 TBITSET ints must hold NTERMS+1 bits... 50 WSETSIZE >= NNONTERM 51 LSETSIZE >= NNONTERM 52 TEMPSIZE >= NTERMS + NNONTERM + 1 53 TEMPSIZE >= NSTATES 54 */ 55 56 NTBASE = 010000, 57 ERRCODE = 8190, 58 ACCEPTCODE = 8191, 59 60 NOASC = 0, /* no assoc. */ 61 LASC = 1, /* left assoc. */ 62 RASC = 2, /* right assoc. */ 63 BASC = 3, /* binary assoc. */ 64 65 /* flags for state generation */ 66 67 DONE = 0, 68 MUSTDO = 1, 69 MUSTLOOKAHEAD = 2, 70 71 /* flags for a rule having an action, and being reduced */ 72 73 ACTFLAG = 04, 74 REDFLAG = 010, 75 76 /* output parser flags */ 77 YYFLAG1 = -1000, 78 79 /* parse tokens */ 80 IDENTIFIER = PRIVATE, 81 MARK, 82 TERM, 83 LEFT, 84 RIGHT, 85 BINARY, 86 PREC, 87 LCURLY, 88 IDENTCOLON, 89 NUMBER, 90 START, 91 TYPEDEF, 92 TYPENAME, 93 UNION, 94 95 ENDFILE = 0, 96 97 EMPTY = 1, 98 WHOKNOWS = 0, 99 OK = 1, 100 NOMORE = -1000, 101 }; 102 103 /* macros for getting associativity and precedence levels */ 104 105 #define ASSOC(i) ((i)&03) 106 #define PLEVEL(i) (((i)>>4)&077) 107 #define TYPE(i) (((i)>>10)&077) 108 109 /* macros for setting associativity and precedence levels */ 110 111 #define SETASC(i,j) i |= j 112 #define SETPLEV(i,j) i |= (j<<4) 113 #define SETTYPE(i,j) i |= (j<<10) 114 115 /* looping macros */ 116 117 #define TLOOP(i) for(i=1; i<=ntokens; i++) 118 #define NTLOOP(i) for(i=0; i<=nnonter; i++) 119 #define PLOOP(s,i) for(i=s; i<nprod; i++) 120 #define SLOOP(i) for(i=0; i<nstate; i++) 121 #define WSBUMP(x) x++ 122 #define WSLOOP(s,j) for(j=s; j<cwp; j++) 123 #define ITMLOOP(i,p,q) for(q=pstate[i+1], p=pstate[i]; p<q; p++) 124 #define SETLOOP(i) for(i=0; i<tbitset; i++) 125 126 /* command to clobber tempfiles after use */ 127 128 #define ZAPFILE(x) if(x) remove(x) 129 130 /* I/O descriptors */ 131 132 Biobuf* faction; /* file for saving actions */ 133 Biobuf* fdefine; /* file for #defines */ 134 Biobuf* fdebug; /* y.debug for strings for debugging */ 135 Biobuf* ftable; /* y.tab.c file */ 136 Biobuf* ftemp; /* tempfile to pass 2 */ 137 Biobuf* finput; /* input file */ 138 Biobuf* foutput; /* y.output file */ 139 140 /* communication variables between various I/O routines */ 141 142 char* infile; /* input file name */ 143 int numbval; /* value of an input number */ 144 char tokname[NAMESIZE+4]; /* input token name, slop for runes and 0 */ 145 146 /* structure declarations */ 147 148 typedef 149 struct 150 { 151 int lset[TBITSET]; 152 } Lkset; 153 154 typedef 155 struct 156 { 157 int* pitem; 158 Lkset* look; 159 } Item; 160 161 typedef 162 struct 163 { 164 char* name; 165 int value; 166 } Symb; 167 168 typedef 169 struct 170 { 171 int* pitem; 172 int flag; 173 Lkset ws; 174 } Wset; 175 176 /* storage of names */ 177 178 char cnames[CNAMSZ]; /* place where token and nonterminal names are stored */ 179 int cnamsz = CNAMSZ; /* size of cnames */ 180 char* cnamp = cnames; /* place where next name is to be put in */ 181 int ndefout = 4; /* number of defined symbols output */ 182 char* tempname; 183 char* actname; 184 char ttempname[] = TEMPNAME; 185 char tactname[] = ACTNAME; 186 char* parser = PARSER; 187 char* yydebug; 188 189 /* storage of types */ 190 int ntypes; /* number of types defined */ 191 char* typeset[NTYPES]; /* pointers to type tags */ 192 193 /* token information */ 194 195 int ntokens = 0 ; /* number of tokens */ 196 Symb tokset[NTERMS]; 197 int toklev[NTERMS]; /* vector with the precedence of the terminals */ 198 199 /* nonterminal information */ 200 201 int nnonter = -1; /* the number of nonterminals */ 202 Symb nontrst[NNONTERM]; 203 int start; /* start symbol */ 204 205 /* assigned token type values */ 206 int extval = 0; 207 208 char* ytabc = OFILE; /* name of y.tab.c */ 209 210 /* grammar rule information */ 211 212 int mem0[MEMSIZE] ; /* production storage */ 213 int* mem = mem0; 214 int nprod = 1; /* number of productions */ 215 int* prdptr[NPROD]; /* pointers to descriptions of productions */ 216 int levprd[NPROD]; /* precedence levels for the productions */ 217 int rlines[NPROD]; /* line number for this rule */ 218 219 /* state information */ 220 221 int nstate = 0; /* number of states */ 222 Item* pstate[NSTATES+2]; /* pointers to the descriptions of the states */ 223 int tystate[NSTATES]; /* contains type information about the states */ 224 int defact[NSTATES]; /* the default actions of states */ 225 int tstates[NTERMS]; /* states generated by terminal gotos */ 226 int ntstates[NNONTERM]; /* states generated by nonterminal gotos */ 227 int mstates[NSTATES]; /* chain of overflows of term/nonterm generation lists */ 228 int lastred; /* the number of the last reduction of a state */ 229 230 /* lookahead set information */ 231 232 Lkset lkst[LSETSIZE]; 233 int nolook; /* flag to turn off lookahead computations */ 234 int tbitset; /* size of lookahead sets */ 235 int nlset = 0; /* next lookahead set index */ 236 int nolook = 0; /* flag to suppress lookahead computations */ 237 Lkset clset; /* temporary storage for lookahead computations */ 238 239 /* working set information */ 240 241 Wset wsets[WSETSIZE]; 242 Wset* cwp; 243 244 /* storage for action table */ 245 246 int amem[ACTSIZE]; /* action table storage */ 247 int* memp = amem; /* next free action table position */ 248 int indgo[NSTATES]; /* index to the stored goto table */ 249 250 /* temporary vector, indexable by states, terms, or ntokens */ 251 252 int temp1[TEMPSIZE]; /* temporary storage, indexed by terms + ntokens or states */ 253 int lineno = 1; /* current input line number */ 254 int fatfl = 1; /* if on, error is fatal */ 255 int nerrors = 0; /* number of errors */ 256 257 /* statistics collection variables */ 258 259 int zzgoent; 260 int zzgobest; 261 int zzacent; 262 int zzexcp; 263 int zzclose; 264 int zzrrconf; 265 int zzsrconf; 266 267 int* ggreed = lkst[0].lset; 268 int* pgo = wsets[0].ws.lset; 269 int* yypgo = &nontrst[0].value; 270 271 int maxspr = 0; /* maximum spread of any entry */ 272 int maxoff = 0; /* maximum offset into a array */ 273 int* pmem = mem0; 274 int* maxa; 275 int nxdb = 0; 276 int adb = 0; 277 278 279 /* storage for information about the nonterminals */ 280 281 int** pres[NNONTERM+2]; /* vector of pointers to productions yielding each nonterminal */ 282 Lkset* pfirst[NNONTERM+2]; /* vector of pointers to first sets for each nonterminal */ 283 int pempty[NNONTERM+1]; /* vector of nonterminals nontrivially deriving e */ 284 285 /* random stuff picked out from between functions */ 286 287 int indebug = 0; 288 Wset* zzcwp = wsets; 289 int zzgoent = 0; 290 int zzgobest = 0; 291 int zzacent = 0; 292 int zzexcp = 0; 293 int zzclose = 0; 294 int zzsrconf = 0; 295 int* zzmemsz = mem0; 296 int zzrrconf = 0; 297 int pidebug = 0; /* debugging flag for putitem */ 298 int gsdebug = 0; 299 int cldebug = 0; /* debugging flag for closure */ 300 int pkdebug = 0; 301 int g2debug = 0; 302 303 struct 304 { 305 char* name; 306 long value; 307 } resrv[] = 308 { 309 "binary", BINARY, 310 "left", LEFT, 311 "nonassoc", BINARY, 312 "prec", PREC, 313 "right", RIGHT, 314 "start", START, 315 "term", TERM, 316 "token", TERM, 317 "type", TYPEDEF, 318 "union", UNION, 319 0, 320 }; 321 322 /* define functions */ 323 324 void main(int, char**); 325 void others(void); 326 char* chcopy(char*, char*); 327 char* writem(int*); 328 char* symnam(int); 329 void summary(void); 330 void error(char*, ...); 331 void aryfil(int*, int, int); 332 int setunion(int*, int*); 333 void prlook(Lkset*); 334 void cpres(void); 335 void cpfir(void); 336 int state(int); 337 void putitem(int*, Lkset*); 338 void cempty(void); 339 void stagen(void); 340 void closure(int); 341 Lkset* flset(Lkset*); 342 void cleantmp(void); 343 void intr(void); 344 void setup(int, char**); 345 void finact(void); 346 int defin(int, char*); 347 void defout(int); 348 char* cstash(char*); 349 long gettok(void); 350 int fdtype(int); 351 int chfind(int, char*); 352 void cpyunion(void); 353 void cpycode(void); 354 int skipcom(void); 355 void cpyact(int); 356 void openup(char*, int, int, int, char*); 357 void output(void); 358 int apack(int*, int); 359 void go2out(void); 360 void go2gen(int); 361 void precftn(int, int, int); 362 void wract(int); 363 void wrstate(int); 364 void warray(char*, int*, int); 365 void hideprod(void); 366 void callopt(void); 367 void gin(int); 368 void stin(int); 369 int nxti(void); 370 void osummary(void); 371 void aoutput(void); 372 void arout(char*, int*, int); 373 int gtnm(void); 374 375 void 376 main(int argc, char *argv[]) 377 { 378 379 setup(argc, argv); /* initialize and read productions */ 380 tbitset = NWORDS(ntokens); 381 cpres(); /* make table of which productions yield a given nonterminal */ 382 cempty(); /* make a table of which nonterminals can match the empty string */ 383 cpfir(); /* make a table of firsts of nonterminals */ 384 stagen(); /* generate the states */ 385 output(); /* write the states and the tables */ 386 go2out(); 387 hideprod(); 388 summary(); 389 callopt(); 390 others(); 391 exits(0); 392 } 393 394 /* 395 * put out other arrays, copy the parsers 396 */ 397 void 398 others(void) 399 { 400 int c, i, j; 401 402 finput = Bopen(parser, OREAD); 403 if(finput == 0) 404 error("cannot find parser %s", parser); 405 warray("yyr1", levprd, nprod); 406 aryfil(temp1, nprod, 0); 407 PLOOP(1, i) 408 temp1[i] = prdptr[i+1]-prdptr[i]-2; 409 warray("yyr2", temp1, nprod); 410 411 aryfil(temp1, nstate, -1000); 412 TLOOP(i) 413 for(j=tstates[i]; j!=0; j=mstates[j]) 414 temp1[j] = i; 415 NTLOOP(i) 416 for(j=ntstates[i]; j!=0; j=mstates[j]) 417 temp1[j] = -i; 418 warray("yychk", temp1, nstate); 419 warray("yydef", defact, nstate); 420 421 /* put out token translation tables */ 422 /* table 1 has 0-256 */ 423 aryfil(temp1, 256, 0); 424 c = 0; 425 TLOOP(i) { 426 j = tokset[i].value; 427 if(j >= 0 && j < 256) { 428 if(temp1[j]) { 429 print("yacc bug -- cant have 2 different Ts with same value\n"); 430 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name); 431 nerrors++; 432 } 433 temp1[j] = i; 434 if(j > c) 435 c = j; 436 } 437 } 438 warray("yytok1", temp1, c+1); 439 440 /* table 2 has PRIVATE-PRIVATE+256 */ 441 aryfil(temp1, 256, 0); 442 c = 0; 443 TLOOP(i) { 444 j = tokset[i].value - PRIVATE; 445 if(j >= 0 && j < 256) { 446 if(temp1[j]) { 447 print("yacc bug -- cant have 2 different Ts with same value\n"); 448 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name); 449 nerrors++; 450 } 451 temp1[j] = i; 452 if(j > c) 453 c = j; 454 } 455 } 456 warray("yytok2", temp1, c+1); 457 458 /* table 3 has everything else */ 459 Bprint(ftable, "long yytok3[] =\n{\n"); 460 c = 0; 461 TLOOP(i) { 462 j = tokset[i].value; 463 if(j >= 0 && j < 256) 464 continue; 465 if(j >= PRIVATE && j < 256+PRIVATE) 466 continue; 467 468 Bprint(ftable, "%4d,%4d,", j, i); 469 c++; 470 if(c%5 == 0) 471 Bprint(ftable, "\n"); 472 } 473 Bprint(ftable, "%4d\n};\n", 0); 474 475 /* copy parser text */ 476 while((c=Bgetrune(finput)) != Beof) { 477 if(c == '$') { 478 if((c = Bgetrune(finput)) != 'A') 479 Bputrune(ftable, '$'); 480 else { /* copy actions */ 481 faction = Bopen(actname, OREAD); 482 if(faction == 0) 483 error("cannot reopen action tempfile"); 484 while((c=Bgetrune(faction)) != Beof) 485 Bputrune(ftable, c); 486 Bterm(faction); 487 ZAPFILE(actname); 488 c = Bgetrune(finput); 489 } 490 } 491 Bputrune(ftable, c); 492 } 493 Bterm(ftable); 494 } 495 496 /* 497 * copies string q into p, returning next free char ptr 498 */ 499 char* 500 chcopy(char* p, char* q) 501 { 502 int c; 503 504 while(c = *q) { 505 if(c == '"') 506 *p++ = '\\'; 507 *p++ = c; 508 q++; 509 } 510 *p = 0; 511 return p; 512 } 513 514 /* 515 * creates output string for item pointed to by pp 516 */ 517 char* 518 writem(int *pp) 519 { 520 int i,*p; 521 static char sarr[ISIZE]; 522 char* q; 523 524 for(p=pp; *p>0; p++) 525 ; 526 p = prdptr[-*p]; 527 q = chcopy(sarr, nontrst[*p-NTBASE].name); 528 q = chcopy(q, ": "); 529 for(;;) { 530 *q = ' '; 531 p++; 532 if(p == pp) 533 *q = '.'; 534 q++; 535 *q = '\0'; 536 i = *p; 537 if(i <= 0) 538 break; 539 q = chcopy(q, symnam(i)); 540 if(q > &sarr[ISIZE-30]) 541 error("item too big"); 542 } 543 544 /* an item calling for a reduction */ 545 i = *pp; 546 if(i < 0 ) { 547 q = chcopy(q, " ("); 548 sprint(q, "%d)", -i); 549 } 550 return sarr; 551 } 552 553 /* 554 * return a pointer to the name of symbol i 555 */ 556 char* 557 symnam(int i) 558 { 559 char* cp; 560 561 cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name; 562 if(*cp == ' ') 563 cp++; 564 return cp; 565 } 566 567 /* 568 * output the summary on y.output 569 */ 570 void 571 summary(void) 572 { 573 574 if(foutput != 0) { 575 Bprint(foutput, "\n%d/%d terminals, %d/%d nonterminals\n", 576 ntokens, NTERMS, nnonter, NNONTERM); 577 Bprint(foutput, "%d/%d grammar rules, %d/%d states\n", 578 nprod, NPROD, nstate, NSTATES); 579 Bprint(foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n", 580 zzsrconf, zzrrconf); 581 Bprint(foutput, "%d/%d working sets used\n", 582 zzcwp-wsets, WSETSIZE); 583 Bprint(foutput, "memory: states,etc. %d/%d, parser %d/%d\n", 584 zzmemsz-mem0, MEMSIZE, memp-amem, ACTSIZE); 585 Bprint(foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE); 586 Bprint(foutput, "%d extra closures\n", zzclose - 2*nstate); 587 Bprint(foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp); 588 Bprint(foutput, "%d goto entries\n", zzgoent); 589 Bprint(foutput, "%d entries saved by goto default\n", zzgobest); 590 } 591 if(zzsrconf != 0 || zzrrconf != 0) { 592 print("\nconflicts: "); 593 if(zzsrconf) 594 print("%d shift/reduce", zzsrconf); 595 if(zzsrconf && zzrrconf) 596 print(", "); 597 if(zzrrconf) 598 print("%d reduce/reduce", zzrrconf); 599 print("\n"); 600 } 601 if(ftemp != 0) 602 Bterm(ftemp); 603 if(fdefine != 0) 604 Bterm(fdefine); 605 } 606 607 /* 608 * write out error comment -- NEEDS WORK 609 */ 610 void 611 error(char *s, ...) 612 { 613 614 nerrors++; 615 fprint(2, "\n fatal error:"); 616 fprint(2, s, (&s)[1]); 617 fprint(2, ", %s:%d\n", infile, lineno); 618 if(!fatfl) 619 return; 620 summary(); 621 cleantmp(); 622 exits("error"); 623 } 624 625 /* 626 * set elements 0 through n-1 to c 627 */ 628 void 629 aryfil(int *v, int n, int c) 630 { 631 int i; 632 633 for(i=0; i<n; i++) 634 v[i] = c; 635 } 636 637 /* 638 * set a to the union of a and b 639 * return 1 if b is not a subset of a, 0 otherwise 640 */ 641 int 642 setunion(int *a, int *b) 643 { 644 int i, x, sub; 645 646 sub = 0; 647 SETLOOP(i) { 648 x = *a; 649 *a |= *b; 650 if(*a != x) 651 sub = 1; 652 a++; 653 b++; 654 } 655 return sub; 656 } 657 658 void 659 prlook(Lkset* p) 660 { 661 int j, *pp; 662 663 pp = p->lset; 664 if(pp == 0) 665 Bprint(foutput, "\tNULL"); 666 else { 667 Bprint(foutput, " { "); 668 TLOOP(j) 669 if(BIT(pp,j)) 670 Bprint(foutput, "%s ", symnam(j)); 671 Bprint(foutput, "}"); 672 } 673 } 674 675 /* 676 * compute an array with the beginnings of productions yielding given nonterminals 677 * The array pres points to these lists 678 * the array pyield has the lists: the total size is only NPROD+1 679 */ 680 void 681 cpres(void) 682 { 683 int c, j, i, **pmem; 684 static int *pyield[NPROD]; 685 686 pmem = pyield; 687 NTLOOP(i) { 688 c = i+NTBASE; 689 pres[i] = pmem; 690 fatfl = 0; /* make undefined symbols nonfatal */ 691 PLOOP(0, j) 692 if(*prdptr[j] == c) 693 *pmem++ = prdptr[j]+1; 694 if(pres[i] == pmem) 695 error("nonterminal %s not defined!", nontrst[i].name); 696 } 697 pres[i] = pmem; 698 fatfl = 1; 699 if(nerrors) { 700 summary(); 701 cleantmp(); 702 exits("error"); 703 } 704 if(pmem != &pyield[nprod]) 705 error("internal Yacc error: pyield %d", pmem-&pyield[nprod]); 706 } 707 708 /* 709 * compute an array with the first of nonterminals 710 */ 711 void 712 cpfir(void) 713 { 714 int *p, **s, i, **t, ch, changes; 715 716 zzcwp = &wsets[nnonter]; 717 NTLOOP(i) { 718 aryfil(wsets[i].ws.lset, tbitset, 0); 719 t = pres[i+1]; 720 /* initially fill the sets */ 721 for(s=pres[i]; s<t; ++s) 722 for(p = *s; (ch = *p) > 0; ++p) { 723 if(ch < NTBASE) { 724 SETBIT(wsets[i].ws.lset, ch); 725 break; 726 } 727 if(!pempty[ch-NTBASE]) 728 break; 729 } 730 } 731 732 /* now, reflect transitivity */ 733 changes = 1; 734 while(changes) { 735 changes = 0; 736 NTLOOP(i) { 737 t = pres[i+1]; 738 for(s = pres[i]; s < t; ++s) 739 for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) { 740 changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset); 741 if(!pempty[ch]) 742 break; 743 } 744 } 745 } 746 747 NTLOOP(i) 748 pfirst[i] = flset(&wsets[i].ws); 749 if(!indebug) 750 return; 751 if(foutput != 0) 752 NTLOOP(i) { 753 Bprint(foutput, "\n%s: ", nontrst[i].name); 754 prlook(pfirst[i]); 755 Bprint(foutput, " %d\n", pempty[i]); 756 } 757 } 758 759 /* 760 * sorts last state,and sees if it equals earlier ones. returns state number 761 */ 762 int 763 state(int c) 764 { 765 Item *p1, *p2, *k, *l, *q1, *q2; 766 int size1, size2, i; 767 768 p1 = pstate[nstate]; 769 p2 = pstate[nstate+1]; 770 if(p1 == p2) 771 return 0; /* null state */ 772 /* sort the items */ 773 for(k = p2-1; k > p1; k--) /* make k the biggest */ 774 for(l = k-1; l >= p1; --l) 775 if(l->pitem > k->pitem) { 776 int *s; 777 Lkset *ss; 778 779 s = k->pitem; 780 k->pitem = l->pitem; 781 l->pitem = s; 782 ss = k->look; 783 k->look = l->look; 784 l->look = ss; 785 } 786 size1 = p2 - p1; /* size of state */ 787 788 for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) { 789 /* get ith state */ 790 q1 = pstate[i]; 791 q2 = pstate[i+1]; 792 size2 = q2 - q1; 793 if(size1 != size2) 794 continue; 795 k = p1; 796 for(l = q1; l < q2; l++) { 797 if(l->pitem != k->pitem) 798 break; 799 k++; 800 } 801 if(l != q2) 802 continue; 803 /* found it */ 804 pstate[nstate+1] = pstate[nstate]; /* delete last state */ 805 /* fix up lookaheads */ 806 if(nolook) 807 return i; 808 for(l = q1, k = p1; l < q2; ++l, ++k ) { 809 int s; 810 811 SETLOOP(s) 812 clset.lset[s] = l->look->lset[s]; 813 if(setunion(clset.lset, k->look->lset)) { 814 tystate[i] = MUSTDO; 815 /* register the new set */ 816 l->look = flset( &clset ); 817 } 818 } 819 return i; 820 } 821 /* state is new */ 822 if(nolook) 823 error("yacc state/nolook error"); 824 pstate[nstate+2] = p2; 825 if(nstate+1 >= NSTATES) 826 error("too many states"); 827 if(c >= NTBASE) { 828 mstates[nstate] = ntstates[c-NTBASE]; 829 ntstates[c-NTBASE] = nstate; 830 } else { 831 mstates[nstate] = tstates[c]; 832 tstates[c] = nstate; 833 } 834 tystate[nstate] = MUSTDO; 835 return nstate++; 836 } 837 838 void 839 putitem(int *ptr, Lkset *lptr) 840 { 841 Item *j; 842 843 if(pidebug && foutput != 0) 844 Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate); 845 j = pstate[nstate+1]; 846 j->pitem = ptr; 847 if(!nolook) 848 j->look = flset(lptr); 849 pstate[nstate+1] = ++j; 850 if((int*)j > zzmemsz) { 851 zzmemsz = (int*)j; 852 if(zzmemsz >= &mem0[MEMSIZE]) 853 error("out of state space"); 854 } 855 } 856 857 /* 858 * mark nonterminals which derive the empty string 859 * also, look for nonterminals which don't derive any token strings 860 */ 861 void 862 cempty(void) 863 { 864 865 int i, *p; 866 867 /* first, use the array pempty to detect productions that can never be reduced */ 868 /* set pempty to WHONOWS */ 869 aryfil(pempty, nnonter+1, WHOKNOWS); 870 871 /* now, look at productions, marking nonterminals which derive something */ 872 more: 873 PLOOP(0, i) { 874 if(pempty[*prdptr[i] - NTBASE]) 875 continue; 876 for(p = prdptr[i]+1; *p >= 0; ++p) 877 if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS) 878 break; 879 /* production can be derived */ 880 if(*p < 0) { 881 pempty[*prdptr[i]-NTBASE] = OK; 882 goto more; 883 } 884 } 885 886 /* now, look at the nonterminals, to see if they are all OK */ 887 NTLOOP(i) { 888 /* the added production rises or falls as the start symbol ... */ 889 if(i == 0) 890 continue; 891 if(pempty[i] != OK) { 892 fatfl = 0; 893 error("nonterminal %s never derives any token string", nontrst[i].name); 894 } 895 } 896 897 if(nerrors) { 898 summary(); 899 cleantmp(); 900 exits("error"); 901 } 902 903 /* now, compute the pempty array, to see which nonterminals derive the empty string */ 904 /* set pempty to WHOKNOWS */ 905 aryfil( pempty, nnonter+1, WHOKNOWS); 906 907 /* loop as long as we keep finding empty nonterminals */ 908 909 again: 910 PLOOP(1, i) { 911 /* not known to be empty */ 912 if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) { 913 for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p) 914 ; 915 /* we have a nontrivially empty nonterminal */ 916 if(*p < 0) { 917 pempty[*prdptr[i]-NTBASE] = EMPTY; 918 /* got one ... try for another */ 919 goto again; 920 } 921 } 922 } 923 } 924 925 /* 926 * generate the states 927 */ 928 void 929 stagen(void) 930 { 931 932 int c, i, j; 933 Wset *p, *q; 934 935 /* initialize */ 936 nstate = 0; 937 938 /* THIS IS FUNNY from the standpoint of portability 939 * it represents the magic moment when the mem0 array, which has 940 * been holding the productions, starts to hold item pointers, of a 941 * different type... 942 * someday, alloc should be used to allocate all this stuff... for now, we 943 * accept that if pointers don't fit in integers, there is a problem... 944 */ 945 946 pstate[0] = pstate[1] = (Item*)mem; 947 aryfil(clset.lset, tbitset, 0); 948 putitem(prdptr[0]+1, &clset); 949 tystate[0] = MUSTDO; 950 nstate = 1; 951 pstate[2] = pstate[1]; 952 953 aryfil(amem, ACTSIZE, 0); 954 955 /* now, the main state generation loop */ 956 more: 957 SLOOP(i) { 958 if(tystate[i] != MUSTDO) 959 continue; 960 tystate[i] = DONE; 961 aryfil(temp1, nnonter+1, 0); 962 /* take state i, close it, and do gotos */ 963 closure(i); 964 /* generate goto's */ 965 WSLOOP(wsets, p) { 966 if(p->flag) 967 continue; 968 p->flag = 1; 969 c = *(p->pitem); 970 if(c <= 1) { 971 if(pstate[i+1]-pstate[i] <= p-wsets) 972 tystate[i] = MUSTLOOKAHEAD; 973 continue; 974 } 975 /* do a goto on c */ 976 WSLOOP(p, q) 977 /* this item contributes to the goto */ 978 if(c == *(q->pitem)) { 979 putitem(q->pitem+1, &q->ws); 980 q->flag = 1; 981 } 982 if(c < NTBASE) 983 state(c); /* register new state */ 984 else 985 temp1[c-NTBASE] = state(c); 986 } 987 if(gsdebug && foutput != 0) { 988 Bprint(foutput, "%d: ", i); 989 NTLOOP(j) 990 if(temp1[j]) 991 Bprint(foutput, "%s %d, ", nontrst[j].name, temp1[j]); 992 Bprint(foutput, "\n"); 993 } 994 indgo[i] = apack(&temp1[1], nnonter-1) - 1; 995 /* we have done one goto; do some more */ 996 goto more; 997 } 998 /* no more to do... stop */ 999 } 1000 1001 /* 1002 * generate the closure of state i 1003 */ 1004 void 1005 closure(int i) 1006 { 1007 1008 Wset *u, *v; 1009 Item *p, *q; 1010 int c, ch, work, k, *pi, **s, **t; 1011 1012 zzclose++; 1013 1014 /* first, copy kernel of state i to wsets */ 1015 cwp = wsets; 1016 ITMLOOP(i, p, q) { 1017 cwp->pitem = p->pitem; 1018 cwp->flag = 1; /* this item must get closed */ 1019 SETLOOP(k) 1020 cwp->ws.lset[k] = p->look->lset[k]; 1021 WSBUMP(cwp); 1022 } 1023 1024 /* now, go through the loop, closing each item */ 1025 work = 1; 1026 while(work) { 1027 work = 0; 1028 WSLOOP(wsets, u) { 1029 if(u->flag == 0) 1030 continue; 1031 /* dot is before c */ 1032 c = *(u->pitem); 1033 if(c < NTBASE) { 1034 u->flag = 0; 1035 /* only interesting case is where . is before nonterminal */ 1036 continue; 1037 } 1038 1039 /* compute the lookahead */ 1040 aryfil(clset.lset, tbitset, 0); 1041 1042 /* find items involving c */ 1043 WSLOOP(u, v) 1044 if(v->flag == 1 && *(pi=v->pitem) == c) { 1045 v->flag = 0; 1046 if(nolook) 1047 continue; 1048 while((ch = *++pi) > 0) { 1049 /* terminal symbol */ 1050 if(ch < NTBASE) { 1051 SETBIT(clset.lset, ch); 1052 break; 1053 } 1054 /* nonterminal symbol */ 1055 setunion(clset.lset, pfirst[ch-NTBASE]->lset); 1056 if(!pempty[ch-NTBASE]) 1057 break; 1058 } 1059 if(ch <= 0) 1060 setunion(clset.lset, v->ws.lset); 1061 } 1062 1063 /* 1064 * now loop over productions derived from c 1065 * c is now nonterminal number 1066 */ 1067 c -= NTBASE; 1068 t = pres[c+1]; 1069 for(s = pres[c]; s < t; ++s) { 1070 /* 1071 * put these items into the closure 1072 * is the item there 1073 */ 1074 WSLOOP(wsets, v) 1075 /* yes, it is there */ 1076 if(v->pitem == *s) { 1077 if(nolook) 1078 goto nexts; 1079 if(setunion(v->ws.lset, clset.lset)) 1080 v->flag = work = 1; 1081 goto nexts; 1082 } 1083 1084 /* not there; make a new entry */ 1085 if(cwp-wsets+1 >= WSETSIZE) 1086 error( "working set overflow"); 1087 cwp->pitem = *s; 1088 cwp->flag = 1; 1089 if(!nolook) { 1090 work = 1; 1091 SETLOOP(k) cwp->ws.lset[k] = clset.lset[k]; 1092 } 1093 WSBUMP(cwp); 1094 1095 nexts:; 1096 } 1097 } 1098 } 1099 1100 /* have computed closure; flags are reset; return */ 1101 if(cwp > zzcwp) 1102 zzcwp = cwp; 1103 if(cldebug && foutput != 0) { 1104 Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook); 1105 WSLOOP(wsets, u) { 1106 if(u->flag) 1107 Bprint(foutput, "flag set!\n"); 1108 u->flag = 0; 1109 Bprint(foutput, "\t%s", writem(u->pitem)); 1110 prlook(&u->ws); 1111 Bprint(foutput, "\n"); 1112 } 1113 } 1114 } 1115 1116 /* 1117 * decide if the lookahead set pointed to by p is known 1118 * return pointer to a perminent location for the set 1119 */ 1120 Lkset* 1121 flset(Lkset *p) 1122 { 1123 Lkset *q; 1124 int *u, *v, *w, j; 1125 1126 for(q = &lkst[nlset]; q-- > lkst;) { 1127 u = p->lset; 1128 v = q->lset; 1129 w = &v[tbitset]; 1130 while(v < w) 1131 if(*u++ != *v++) 1132 goto more; 1133 /* we have matched */ 1134 return q; 1135 more:; 1136 } 1137 /* add a new one */ 1138 q = &lkst[nlset++]; 1139 if(nlset >= LSETSIZE) 1140 error("too many lookahead sets"); 1141 SETLOOP(j) 1142 q->lset[j] = p->lset[j]; 1143 return q; 1144 } 1145 1146 void 1147 cleantmp(void) 1148 { 1149 ZAPFILE(actname); 1150 ZAPFILE(tempname); 1151 } 1152 1153 void 1154 intr(void) 1155 { 1156 cleantmp(); 1157 exits("interrupted"); 1158 } 1159 1160 void 1161 setup(int argc, char *argv[]) 1162 { 1163 long c, t; 1164 int i, j, lev, ty, ytab, *p; 1165 int vflag, dflag, stem; 1166 char actnm[8], *stemc; 1167 1168 ytab = 0; 1169 vflag = 0; 1170 dflag = 0; 1171 stem = 0; 1172 stemc = "y"; 1173 foutput = 0; 1174 fdefine = 0; 1175 fdebug = 0; 1176 ARGBEGIN{ 1177 case 'v': 1178 case 'V': 1179 vflag++; 1180 break; 1181 case 'D': 1182 yydebug = ARGF(); 1183 break; 1184 case 'd': 1185 dflag++; 1186 break; 1187 case 'o': 1188 ytab++; 1189 ytabc = ARGF(); 1190 break; 1191 case 's': 1192 stem++; 1193 stemc = ARGF(); 1194 break; 1195 case 'S': 1196 parser = PARSERS; 1197 break; 1198 default: 1199 error("illegal option: %c", ARGC()); 1200 }ARGEND 1201 openup(stemc, dflag, vflag, ytab, ytabc); 1202 1203 ftemp = Bopen(tempname = mktemp(ttempname), OWRITE); 1204 faction = Bopen(actname = mktemp(tactname), OWRITE); 1205 if(ftemp == 0 || faction == 0) 1206 error("cannot open temp file"); 1207 if(argc < 1) 1208 error("no input file"); 1209 infile = argv[0]; 1210 finput = Bopen(infile, OREAD); 1211 if(finput == 0) 1212 error("cannot open '%s'", argv[0]); 1213 cnamp = cnames; 1214 1215 defin(0, "$end"); 1216 extval = PRIVATE; /* tokens start in unicode 'private use' */ 1217 defin(0, "error"); 1218 defin(1, "$accept"); 1219 defin(0, "$unk"); 1220 mem = mem0; 1221 i = 0; 1222 1223 for(t = gettok(); t != MARK && t != ENDFILE;) 1224 switch(t) { 1225 case ';': 1226 t = gettok(); 1227 break; 1228 1229 case START: 1230 if(gettok() != IDENTIFIER) 1231 error("bad %%start construction"); 1232 start = chfind(1, tokname); 1233 t = gettok(); 1234 continue; 1235 1236 case TYPEDEF: 1237 if(gettok() != TYPENAME) 1238 error("bad syntax in %%type"); 1239 ty = numbval; 1240 for(;;) { 1241 t = gettok(); 1242 switch(t) { 1243 case IDENTIFIER: 1244 if((t=chfind(1, tokname)) < NTBASE) { 1245 j = TYPE(toklev[t]); 1246 if(j != 0 && j != ty) 1247 error("type redeclaration of token %s", 1248 tokset[t].name); 1249 else 1250 SETTYPE(toklev[t], ty); 1251 } else { 1252 j = nontrst[t-NTBASE].value; 1253 if(j != 0 && j != ty) 1254 error("type redeclaration of nonterminal %s", 1255 nontrst[t-NTBASE].name ); 1256 else 1257 nontrst[t-NTBASE].value = ty; 1258 } 1259 case ',': 1260 continue; 1261 case ';': 1262 t = gettok(); 1263 default: 1264 break; 1265 } 1266 break; 1267 } 1268 continue; 1269 1270 case UNION: 1271 /* copy the union declaration to the output */ 1272 cpyunion(); 1273 t = gettok(); 1274 continue; 1275 1276 case LEFT: 1277 case BINARY: 1278 case RIGHT: 1279 i++; 1280 1281 case TERM: 1282 /* nonzero means new prec. and assoc. */ 1283 lev = t-TERM; 1284 ty = 0; 1285 1286 /* get identifiers so defined */ 1287 t = gettok(); 1288 1289 /* there is a type defined */ 1290 if(t == TYPENAME) { 1291 ty = numbval; 1292 t = gettok(); 1293 } 1294 for(;;) { 1295 switch(t) { 1296 case ',': 1297 t = gettok(); 1298 continue; 1299 1300 case ';': 1301 break; 1302 1303 case IDENTIFIER: 1304 j = chfind(0, tokname); 1305 if(j >= NTBASE) 1306 error("%s defined earlier as nonterminal", tokname); 1307 if(lev) { 1308 if(ASSOC(toklev[j])) 1309 error("redeclaration of precedence of %s", tokname); 1310 SETASC(toklev[j], lev); 1311 SETPLEV(toklev[j], i); 1312 } 1313 if(ty) { 1314 if(TYPE(toklev[j])) 1315 error("redeclaration of type of %s", tokname); 1316 SETTYPE(toklev[j],ty); 1317 } 1318 t = gettok(); 1319 if(t == NUMBER) { 1320 tokset[j].value = numbval; 1321 if(j < ndefout && j > 3) 1322 error("please define type number of %s earlier", 1323 tokset[j].name); 1324 t = gettok(); 1325 } 1326 continue; 1327 } 1328 break; 1329 } 1330 continue; 1331 1332 case LCURLY: 1333 defout(0); 1334 cpycode(); 1335 t = gettok(); 1336 continue; 1337 1338 default: 1339 error("syntax error"); 1340 } 1341 if(t == ENDFILE) 1342 error("unexpected EOF before %%"); 1343 1344 /* t is MARK */ 1345 Bprint(ftable, "extern int yyerrflag;\n"); 1346 Bprint(ftable, "#ifndef YYMAXDEPTH\n"); 1347 Bprint(ftable, "#define YYMAXDEPTH 150\n"); 1348 Bprint(ftable, "#endif\n" ); 1349 if(!ntypes) { 1350 Bprint(ftable, "#ifndef YYSTYPE\n"); 1351 Bprint(ftable, "#define YYSTYPE int\n"); 1352 Bprint(ftable, "#endif\n"); 1353 } 1354 Bprint(ftable, "YYSTYPE yylval;\n"); 1355 Bprint(ftable, "YYSTYPE yyval;\n"); 1356 1357 prdptr[0] = mem; 1358 1359 /* added production */ 1360 *mem++ = NTBASE; 1361 1362 /* if start is 0, we will overwrite with the lhs of the first rule */ 1363 *mem++ = start; 1364 *mem++ = 1; 1365 *mem++ = 0; 1366 prdptr[1] = mem; 1367 while((t=gettok()) == LCURLY) 1368 cpycode(); 1369 if(t != IDENTCOLON) 1370 error("bad syntax on first rule"); 1371 1372 if(!start) 1373 prdptr[0][1] = chfind(1, tokname); 1374 1375 /* read rules */ 1376 while(t != MARK && t != ENDFILE) { 1377 /* process a rule */ 1378 rlines[nprod] = lineno; 1379 if(t == '|') 1380 *mem++ = *prdptr[nprod-1]; 1381 else 1382 if(t == IDENTCOLON) { 1383 *mem = chfind(1, tokname); 1384 if(*mem < NTBASE) 1385 error("token illegal on LHS of grammar rule"); 1386 mem++; 1387 } else 1388 error("illegal rule: missing semicolon or | ?"); 1389 /* read rule body */ 1390 t = gettok(); 1391 1392 more_rule: 1393 while(t == IDENTIFIER) { 1394 *mem = chfind(1, tokname); 1395 if(*mem < NTBASE) 1396 levprd[nprod] = toklev[*mem]; 1397 mem++; 1398 t = gettok(); 1399 } 1400 if(t == PREC) { 1401 if(gettok() != IDENTIFIER) 1402 error("illegal %%prec syntax"); 1403 j = chfind(2, tokname); 1404 if(j >= NTBASE) 1405 error("nonterminal %s illegal after %%prec", 1406 nontrst[j-NTBASE].name); 1407 levprd[nprod] = toklev[j]; 1408 t = gettok(); 1409 } 1410 if(t == '=') { 1411 levprd[nprod] |= ACTFLAG; 1412 Bprint(faction, "\ncase %d:", nprod); 1413 cpyact(mem-prdptr[nprod]-1); 1414 Bprint(faction, " break;"); 1415 if((t=gettok()) == IDENTIFIER) { 1416 1417 /* action within rule... */ 1418 sprint(actnm, "$$%d", nprod); 1419 1420 /* make it a nonterminal */ 1421 j = chfind(1, actnm); 1422 1423 /* 1424 * the current rule will become rule number nprod+1 1425 * move the contents down, and make room for the null 1426 */ 1427 for(p = mem; p >= prdptr[nprod]; --p) 1428 p[2] = *p; 1429 mem += 2; 1430 1431 /* enter null production for action */ 1432 p = prdptr[nprod]; 1433 *p++ = j; 1434 *p++ = -nprod; 1435 1436 /* update the production information */ 1437 levprd[nprod+1] = levprd[nprod] & ~ACTFLAG; 1438 levprd[nprod] = ACTFLAG; 1439 if(++nprod >= NPROD) 1440 error("more than %d rules", NPROD); 1441 prdptr[nprod] = p; 1442 1443 /* make the action appear in the original rule */ 1444 *mem++ = j; 1445 1446 /* get some more of the rule */ 1447 goto more_rule; 1448 } 1449 } 1450 1451 while(t == ';') 1452 t = gettok(); 1453 *mem++ = -nprod; 1454 1455 /* check that default action is reasonable */ 1456 if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].value) { 1457 1458 /* no explicit action, LHS has value */ 1459 int tempty; 1460 1461 tempty = prdptr[nprod][1]; 1462 if(tempty < 0) 1463 error("must return a value, since LHS has a type"); 1464 else 1465 if(tempty >= NTBASE) 1466 tempty = nontrst[tempty-NTBASE].value; 1467 else 1468 tempty = TYPE(toklev[tempty]); 1469 if(tempty != nontrst[*prdptr[nprod]-NTBASE].value) 1470 error("default action causes potential type clash"); 1471 } 1472 nprod++; 1473 if(nprod >= NPROD) 1474 error("more than %d rules", NPROD); 1475 prdptr[nprod] = mem; 1476 levprd[nprod] = 0; 1477 } 1478 1479 /* end of all rules */ 1480 defout(1); 1481 1482 finact(); 1483 if(t == MARK) { 1484 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile); 1485 while((c=Bgetrune(finput)) != Beof) 1486 Bputrune(ftable, c); 1487 } 1488 Bterm(finput); 1489 } 1490 1491 /* 1492 * finish action routine 1493 */ 1494 void 1495 finact(void) 1496 { 1497 1498 Bterm(faction); 1499 Bprint(ftable, "#define YYEOFCODE %d\n", 1); 1500 Bprint(ftable, "#define YYERRCODE %d\n", 2); 1501 } 1502 1503 /* 1504 * define s to be a terminal if t=0 1505 * or a nonterminal if t=1 1506 */ 1507 int 1508 defin(int nt, char *s) 1509 { 1510 int val; 1511 Rune rune; 1512 1513 val = 0; 1514 if(nt) { 1515 nnonter++; 1516 if(nnonter >= NNONTERM) 1517 error("too many nonterminals, limit %d",NNONTERM); 1518 nontrst[nnonter].name = cstash(s); 1519 return NTBASE + nnonter; 1520 } 1521 1522 /* must be a token */ 1523 ntokens++; 1524 if(ntokens >= NTERMS) 1525 error("too many terminals, limit %d", NTERMS); 1526 tokset[ntokens].name = cstash(s); 1527 1528 /* establish value for token */ 1529 /* single character literal */ 1530 if(s[0] == ' ') { 1531 val = chartorune(&rune, &s[1]); 1532 if(s[val+1] == 0) { 1533 val = rune; 1534 goto out; 1535 } 1536 } 1537 1538 /* escape sequence */ 1539 if(s[0] == ' ' && s[1] == '\\') { 1540 if(s[3] == 0) { 1541 /* single character escape sequence */ 1542 switch(s[2]) { 1543 case 'n': val = '\n'; break; 1544 case 'r': val = '\r'; break; 1545 case 'b': val = '\b'; break; 1546 case 't': val = '\t'; break; 1547 case 'f': val = '\f'; break; 1548 case '\'': val = '\''; break; 1549 case '"': val = '"'; break; 1550 case '\\': val = '\\'; break; 1551 default: error("invalid escape"); 1552 } 1553 goto out; 1554 } 1555 1556 /* \nnn sequence */ 1557 if(s[2] >= '0' && s[2] <= '7') { 1558 if(s[3] < '0' || 1559 s[3] > '7' || 1560 s[4] < '0' || 1561 s[4] > '7' || 1562 s[5] != 0) 1563 error("illegal \\nnn construction"); 1564 val = 64*s[2] + 8*s[3] + s[4] - 73*'0'; 1565 if(val == 0) 1566 error("'\\000' is illegal"); 1567 goto out; 1568 } 1569 error("unknown escape"); 1570 } 1571 val = extval++; 1572 1573 out: 1574 tokset[ntokens].value = val; 1575 toklev[ntokens] = 0; 1576 return ntokens; 1577 } 1578 1579 /* 1580 * write out the defines (at the end of the declaration section) 1581 */ 1582 void 1583 defout(int last) 1584 { 1585 int i, c; 1586 char sar[NAMESIZE+10]; 1587 1588 for(i=ndefout; i<=ntokens; i++) { 1589 /* non-literals */ 1590 c = tokset[i].name[0]; 1591 if(c != ' ' && c != '$') { 1592 Bprint(ftable, "#define %s %d\n", 1593 tokset[i].name, tokset[i].value); 1594 if(fdefine) 1595 Bprint(fdefine, "#define\t%s\t%d\n", 1596 tokset[i].name, tokset[i].value); 1597 } 1598 } 1599 ndefout = ntokens+1; 1600 if(last && fdebug) { 1601 Bprint(fdebug, "char* yytoknames[] =\n{\n"); 1602 TLOOP(i) { 1603 if(tokset[i].name) { 1604 chcopy(sar, tokset[i].name); 1605 Bprint(fdebug, "\t\"%s\",\n", sar); 1606 continue; 1607 } 1608 Bprint(fdebug, "\t0,\n"); 1609 } 1610 Bprint(fdebug, "};\n"); 1611 } 1612 } 1613 1614 char* 1615 cstash(char *s) 1616 { 1617 char *temp; 1618 1619 temp = cnamp; 1620 do { 1621 if(cnamp >= &cnames[cnamsz]) 1622 error("too many characters in id's and literals"); 1623 else 1624 *cnamp++ = *s; 1625 } while(*s++); 1626 return temp; 1627 } 1628 1629 long 1630 gettok(void) 1631 { 1632 long c; 1633 Rune rune; 1634 int i, base, match, reserve; 1635 static int peekline; 1636 1637 begin: 1638 reserve = 0; 1639 lineno += peekline; 1640 peekline = 0; 1641 c = Bgetrune(finput); 1642 while(c == ' ' || c == '\n' || c == '\t' || c == '\f') { 1643 if(c == '\n') 1644 lineno++; 1645 c = Bgetrune(finput); 1646 } 1647 1648 /* skip comment */ 1649 if(c == '/') { 1650 lineno += skipcom(); 1651 goto begin; 1652 } 1653 switch(c) { 1654 case Beof: 1655 return ENDFILE; 1656 1657 case '{': 1658 Bungetrune(finput); 1659 return '='; 1660 1661 case '<': 1662 /* get, and look up, a type name (union member name) */ 1663 i = 0; 1664 while((c=Bgetrune(finput)) != '>' && c >= 0 && c != '\n') { 1665 rune = c; 1666 c = runetochar(&tokname[i], &rune); 1667 if(i < NAMESIZE) 1668 i += c; 1669 } 1670 if(c != '>') 1671 error("unterminated < ... > clause"); 1672 tokname[i] = 0; 1673 for(i=1; i<=ntypes; i++) 1674 if(!strcmp(typeset[i], tokname)) { 1675 numbval = i; 1676 return TYPENAME; 1677 } 1678 ntypes++; 1679 numbval = ntypes; 1680 typeset[numbval] = cstash(tokname); 1681 return TYPENAME; 1682 1683 case '"': 1684 case '\'': 1685 match = c; 1686 tokname[0] = ' '; 1687 i = 1; 1688 for(;;) { 1689 c = Bgetrune(finput); 1690 if(c == '\n' || c <= 0) 1691 error("illegal or missing ' or \"" ); 1692 if(c == '\\') { 1693 tokname[i] = '\\'; 1694 if(i < NAMESIZE) 1695 i++; 1696 c = Bgetrune(finput); 1697 } else 1698 if(c == match) 1699 break; 1700 rune = c; 1701 c = runetochar(&tokname[i], &rune); 1702 if(i < NAMESIZE) 1703 i += c; 1704 } 1705 break; 1706 1707 case '%': 1708 case '\\': 1709 switch(c = Bgetrune(finput)) { 1710 case '0': return TERM; 1711 case '<': return LEFT; 1712 case '2': return BINARY; 1713 case '>': return RIGHT; 1714 case '%': 1715 case '\\': return MARK; 1716 case '=': return PREC; 1717 case '{': return LCURLY; 1718 default: reserve = 1; 1719 } 1720 1721 default: 1722 /* number */ 1723 if(isdigit(c)) { 1724 numbval = c-'0'; 1725 base = (c=='0')? 8: 10; 1726 for(c = Bgetrune(finput); isdigit(c); c = Bgetrune(finput)) 1727 numbval = numbval*base + (c-'0'); 1728 Bungetrune(finput); 1729 return NUMBER; 1730 } 1731 if(islower(c) || isupper(c) || c=='_' || c=='.' || c=='$') { 1732 i = 0; 1733 while(islower(c) || isupper(c) || isdigit(c) || 1734 c == '-' || c=='_' || c=='.' || c=='$') { 1735 if(reserve && isupper(c)) 1736 c += 'a'-'A'; 1737 rune = c; 1738 c = runetochar(&tokname[i], &rune); 1739 if(i < NAMESIZE) 1740 i += c; 1741 c = Bgetrune(finput); 1742 } 1743 } else 1744 return c; 1745 Bungetrune(finput); 1746 } 1747 tokname[i] = 0; 1748 1749 /* find a reserved word */ 1750 if(reserve) { 1751 for(c=0; resrv[c].name; c++) 1752 if(strcmp(tokname, resrv[c].name) == 0) 1753 return resrv[c].value; 1754 error("invalid escape, or illegal reserved word: %s", tokname); 1755 } 1756 1757 /* look ahead to distinguish IDENTIFIER from IDENTCOLON */ 1758 c = Bgetrune(finput); 1759 while(c == ' ' || c == '\t'|| c == '\n' || c == '\f' || c == '/') { 1760 if(c == '\n') 1761 peekline++; 1762 /* look for comments */ 1763 if(c == '/') 1764 peekline += skipcom(); 1765 c = Bgetrune(finput); 1766 } 1767 if(c == ':') 1768 return IDENTCOLON; 1769 Bungetrune(finput); 1770 return IDENTIFIER; 1771 } 1772 1773 /* 1774 * determine the type of a symbol 1775 */ 1776 int 1777 fdtype(int t) 1778 { 1779 int v; 1780 1781 if(t >= NTBASE) 1782 v = nontrst[t-NTBASE].value; 1783 else 1784 v = TYPE(toklev[t]); 1785 if(v <= 0) 1786 error("must specify type for %s", (t>=NTBASE)? 1787 nontrst[t-NTBASE].name: tokset[t].name); 1788 return v; 1789 } 1790 1791 int 1792 chfind(int t, char *s) 1793 { 1794 int i; 1795 1796 if(s[0] == ' ') 1797 t = 0; 1798 TLOOP(i) 1799 if(!strcmp(s, tokset[i].name)) 1800 return i; 1801 NTLOOP(i) 1802 if(!strcmp(s, nontrst[i].name)) 1803 return NTBASE+i; 1804 1805 /* cannot find name */ 1806 if(t > 1) 1807 error("%s should have been defined earlier", s); 1808 return defin(t, s); 1809 } 1810 1811 /* 1812 * copy the union declaration to the output, and the define file if present 1813 */ 1814 void 1815 cpyunion(void) 1816 { 1817 long c; 1818 int level; 1819 1820 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile); 1821 Bprint(ftable, "typedef union "); 1822 if(fdefine != 0) 1823 Bprint(fdefine, "\ntypedef union "); 1824 1825 level = 0; 1826 for(;;) { 1827 if((c=Bgetrune(finput)) == Beof) 1828 error("EOF encountered while processing %%union"); 1829 Bputrune(ftable, c); 1830 if(fdefine != 0) 1831 Bputrune(fdefine, c); 1832 switch(c) { 1833 case '\n': 1834 lineno++; 1835 break; 1836 case '{': 1837 level++; 1838 break; 1839 case '}': 1840 level--; 1841 1842 /* we are finished copying */ 1843 if(level == 0) { 1844 Bprint(ftable, " YYSTYPE;\n"); 1845 if(fdefine != 0) 1846 Bprint(fdefine, "\tYYSTYPE;\nextern\tYYSTYPE\tyylval;\n"); 1847 return; 1848 } 1849 } 1850 } 1851 } 1852 1853 /* 1854 * copies code between \{ and \} 1855 */ 1856 void 1857 cpycode(void) 1858 { 1859 1860 long c; 1861 1862 c = Bgetrune(finput); 1863 if(c == '\n') { 1864 c = Bgetrune(finput); 1865 lineno++; 1866 } 1867 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile); 1868 while(c != Beof) { 1869 if(c == '\\') { 1870 if((c=Bgetrune(finput)) == '}') 1871 return; 1872 Bputc(ftable, '\\'); 1873 } 1874 if(c == '%') { 1875 if((c=Bgetrune(finput)) == '}') 1876 return; 1877 Bputc(ftable, '%'); 1878 } 1879 Bputrune(ftable, c); 1880 if(c == '\n') 1881 lineno++; 1882 c = Bgetrune(finput); 1883 } 1884 error("eof before %%}"); 1885 } 1886 1887 /* 1888 * skip over comments 1889 * skipcom is called after reading a '/' 1890 */ 1891 int 1892 skipcom(void) 1893 { 1894 long c; 1895 int i; 1896 1897 /* i is the number of lines skipped */ 1898 i = 0; 1899 if(Bgetrune(finput) != '*') 1900 error("illegal comment"); 1901 c = Bgetrune(finput); 1902 while(c != Beof) { 1903 while(c == '*') 1904 if((c=Bgetrune(finput)) == '/') 1905 return i; 1906 if(c == '\n') 1907 i++; 1908 c = Bgetrune(finput); 1909 } 1910 error("EOF inside comment"); 1911 return 0; 1912 } 1913 1914 /* 1915 * copy C action to the next ; or closing } 1916 */ 1917 void 1918 cpyact(int offset) 1919 { 1920 long c; 1921 int brac, match, j, s, fnd, tok; 1922 1923 Bprint(faction, "\n#line\t%d\t\"%s\"\n", lineno, infile); 1924 brac = 0; 1925 1926 loop: 1927 c = Bgetrune(finput); 1928 swt: 1929 switch(c) { 1930 case ';': 1931 if(brac == 0) { 1932 Bputrune(faction, c); 1933 return; 1934 } 1935 goto lcopy; 1936 1937 case '{': 1938 brac++; 1939 goto lcopy; 1940 1941 case '$': 1942 s = 1; 1943 tok = -1; 1944 c = Bgetrune(finput); 1945 1946 /* type description */ 1947 if(c == '<') { 1948 Bungetrune(finput); 1949 if(gettok() != TYPENAME) 1950 error("bad syntax on $<ident> clause"); 1951 tok = numbval; 1952 c = Bgetrune(finput); 1953 } 1954 if(c == '$') { 1955 Bprint(faction, "yyval"); 1956 1957 /* put out the proper tag... */ 1958 if(ntypes) { 1959 if(tok < 0) 1960 tok = fdtype(*prdptr[nprod]); 1961 Bprint(faction, ".%s", typeset[tok]); 1962 } 1963 goto loop; 1964 } 1965 if(c == '-') { 1966 s = -s; 1967 c = Bgetrune(finput); 1968 } 1969 if(isdigit(c)) { 1970 j = 0; 1971 while(isdigit(c)) { 1972 j = j*10 + (c-'0'); 1973 c = Bgetrune(finput); 1974 } 1975 Bungetrune(finput); 1976 j = j*s - offset; 1977 if(j > 0) 1978 error("Illegal use of $%d", j+offset); 1979 1980 dollar: 1981 Bprint(faction, "yypt[-%d].yyv", -j); 1982 1983 /* put out the proper tag */ 1984 if(ntypes) { 1985 if(j+offset <= 0 && tok < 0) 1986 error("must specify type of $%d", j+offset); 1987 if(tok < 0) 1988 tok = fdtype(prdptr[nprod][j+offset]); 1989 Bprint(faction, ".%s", typeset[tok]); 1990 } 1991 goto loop; 1992 } 1993 if(isupper(c) || islower(c) || c == '_' || c == '.') { 1994 int tok; /* tok used oustide for type info */ 1995 1996 /* look for $name */ 1997 Bungetrune(finput); 1998 if(gettok() != IDENTIFIER) 1999 error("$ must be followed by an identifier"); 2000 tok = chfind(2, tokname); 2001 if((c = Bgetrune(finput)) != '#') { 2002 Bungetrune(finput); 2003 fnd = -1; 2004 } else 2005 if(gettok() != NUMBER) { 2006 error("# must be followed by number"); 2007 fnd = -1; 2008 } else 2009 fnd = numbval; 2010 for(j=1; j<=offset; ++j) 2011 if(tok == prdptr[nprod][j]) { 2012 if(--fnd <= 0) { 2013 j -= offset; 2014 goto dollar; 2015 } 2016 } 2017 error("$name or $name#number not found"); 2018 } 2019 Bputc(faction, '$'); 2020 if(s < 0 ) 2021 Bputc(faction, '-'); 2022 goto swt; 2023 2024 case '}': 2025 brac--; 2026 if(brac) 2027 goto lcopy; 2028 Bputrune(faction, c); 2029 return; 2030 2031 case '/': 2032 /* look for comments */ 2033 Bputrune(faction, c); 2034 c = Bgetrune(finput); 2035 if(c != '*') 2036 goto swt; 2037 2038 /* it really is a comment */ 2039 Bputrune(faction, c); 2040 c = Bgetrune(finput); 2041 while(c >= 0) { 2042 while(c == '*') { 2043 Bputrune(faction, c); 2044 if((c=Bgetrune(finput)) == '/') 2045 goto lcopy; 2046 } 2047 Bputrune(faction, c); 2048 if(c == '\n') 2049 lineno++; 2050 c = Bgetrune(finput); 2051 } 2052 error("EOF inside comment"); 2053 2054 case '\'': 2055 /* character constant */ 2056 match = '\''; 2057 goto string; 2058 2059 case '"': 2060 /* character string */ 2061 match = '"'; 2062 2063 string: 2064 Bputrune(faction, c); 2065 while(c = Bgetrune(finput)) { 2066 if(c == '\\') { 2067 Bputrune(faction, c); 2068 c = Bgetrune(finput); 2069 if(c == '\n') 2070 lineno++; 2071 } else 2072 if(c == match) 2073 goto lcopy; 2074 if(c == '\n') 2075 error("newline in string or char. const."); 2076 Bputrune(faction, c); 2077 } 2078 error("EOF in string or character constant"); 2079 2080 case Beof: 2081 error("action does not terminate"); 2082 2083 case '\n': 2084 lineno++; 2085 goto lcopy; 2086 } 2087 2088 lcopy: 2089 Bputrune(faction, c); 2090 goto loop; 2091 } 2092 2093 void 2094 openup(char *stem, int dflag, int vflag, int ytab, char *ytabc) 2095 { 2096 char buf[256]; 2097 2098 if(vflag) { 2099 sprint(buf, "%s.%s", stem, FILEU); 2100 foutput = Bopen(buf, OWRITE); 2101 if(foutput == 0) 2102 error("cannot open %s", buf); 2103 } 2104 if(yydebug) { 2105 sprint(buf, "%s.%s", stem, FILEDEBUG); 2106 if((fdebug = Bopen(buf, OWRITE)) == 0) 2107 error("can't open %s", buf); 2108 } 2109 if(dflag) { 2110 sprint(buf, "%s.%s", stem, FILED); 2111 fdefine = Bopen(buf, OWRITE); 2112 if(fdefine == 0) 2113 error("can't create %s", buf); 2114 } 2115 if(ytab == 0) 2116 sprint(buf, "%s.%s", stem, OFILE); 2117 else 2118 strcpy(buf, ytabc); 2119 ftable = Bopen(buf, OWRITE); 2120 if(ftable == 0) 2121 error("cannot open table file %s", buf); 2122 } 2123 2124 /* 2125 * print the output for the states 2126 */ 2127 void 2128 output(void) 2129 { 2130 int i, k, c; 2131 Wset *u, *v; 2132 2133 Bprint(ftable, "short yyexca[] =\n{"); 2134 if(fdebug) 2135 Bprint(fdebug, "char* yystates[] =\n{\n"); 2136 2137 /* output the stuff for state i */ 2138 SLOOP(i) { 2139 nolook = tystate[i]!=MUSTLOOKAHEAD; 2140 closure(i); 2141 2142 /* output actions */ 2143 nolook = 1; 2144 aryfil(temp1, ntokens+nnonter+1, 0); 2145 WSLOOP(wsets, u) { 2146 c = *(u->pitem); 2147 if(c > 1 && c < NTBASE && temp1[c] == 0) { 2148 WSLOOP(u, v) 2149 if(c == *(v->pitem)) 2150 putitem(v->pitem+1, (Lkset*)0); 2151 temp1[c] = state(c); 2152 } else 2153 if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0) 2154 temp1[c+ntokens] = amem[indgo[i]+c]; 2155 } 2156 if(i == 1) 2157 temp1[1] = ACCEPTCODE; 2158 2159 /* now, we have the shifts; look at the reductions */ 2160 lastred = 0; 2161 WSLOOP(wsets, u) { 2162 c = *u->pitem; 2163 2164 /* reduction */ 2165 if(c <= 0) { 2166 lastred = -c; 2167 TLOOP(k) 2168 if(BIT(u->ws.lset, k)) { 2169 if(temp1[k] == 0) 2170 temp1[k] = c; 2171 else 2172 if(temp1[k] < 0) { /* reduce/reduce conflict */ 2173 if(foutput) 2174 Bprint(foutput, 2175 "\n%d: reduce/reduce conflict" 2176 " (red'ns %d and %d ) on %s", 2177 i, -temp1[k], lastred, 2178 symnam(k)); 2179 if(-temp1[k] > lastred) 2180 temp1[k] = -lastred; 2181 zzrrconf++; 2182 } else 2183 /* potential shift/reduce conflict */ 2184 precftn( lastred, k, i ); 2185 } 2186 } 2187 } 2188 wract(i); 2189 } 2190 2191 if(fdebug) 2192 Bprint(fdebug, "};\n"); 2193 Bprint(ftable, "};\n"); 2194 Bprint(ftable, "#define YYNPROD %d\n", nprod); 2195 Bprint(ftable, "#define YYPRIVATE %d\n", PRIVATE); 2196 if(yydebug) 2197 Bprint(ftable, "#define yydebug %s\n", yydebug); 2198 } 2199 2200 /* 2201 * pack state i from temp1 into amem 2202 */ 2203 int 2204 apack(int *p, int n) 2205 { 2206 int *pp, *qq, *rr, off, *q, *r; 2207 2208 /* we don't need to worry about checking because 2209 * we will only look at entries known to be there... 2210 * eliminate leading and trailing 0's 2211 */ 2212 2213 q = p+n; 2214 for(pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off) 2215 ; 2216 /* no actions */ 2217 if(pp > q) 2218 return 0; 2219 p = pp; 2220 2221 /* now, find a place for the elements from p to q, inclusive */ 2222 r = &amem[ACTSIZE-1]; 2223 for(rr = amem; rr <= r; rr++, off++) { 2224 for(qq = rr, pp = p; pp <= q; pp++, qq++) 2225 if(*pp != 0) 2226 if(*pp != *qq && *qq != 0) 2227 goto nextk; 2228 2229 /* we have found an acceptable k */ 2230 if(pkdebug && foutput != 0) 2231 Bprint(foutput, "off = %d, k = %d\n", off, rr-amem); 2232 for(qq = rr, pp = p; pp <= q; pp++, qq++) 2233 if(*pp) { 2234 if(qq > r) 2235 error("action table overflow"); 2236 if(qq > memp) 2237 memp = qq; 2238 *qq = *pp; 2239 } 2240 if(pkdebug && foutput != 0) 2241 for(pp = amem; pp <= memp; pp += 10) { 2242 Bprint(foutput, "\t"); 2243 for(qq = pp; qq <= pp+9; qq++) 2244 Bprint(foutput, "%d ", *qq); 2245 Bprint(foutput, "\n"); 2246 } 2247 return(off); 2248 nextk:; 2249 } 2250 error("no space in action table"); 2251 return 0; 2252 } 2253 2254 /* 2255 * output the gotos for the nontermninals 2256 */ 2257 void 2258 go2out(void) 2259 { 2260 int i, j, k, best, count, cbest, times; 2261 2262 /* mark begining of gotos */ 2263 Bprint(ftemp, "$\n"); 2264 for(i = 1; i <= nnonter; i++) { 2265 go2gen(i); 2266 2267 /* find the best one to make default */ 2268 best = -1; 2269 times = 0; 2270 2271 /* is j the most frequent */ 2272 for(j = 0; j <= nstate; j++) { 2273 if(tystate[j] == 0) 2274 continue; 2275 if(tystate[j] == best) 2276 continue; 2277 2278 /* is tystate[j] the most frequent */ 2279 count = 0; 2280 cbest = tystate[j]; 2281 for(k = j; k <= nstate; k++) 2282 if(tystate[k] == cbest) 2283 count++; 2284 if(count > times) { 2285 best = cbest; 2286 times = count; 2287 } 2288 } 2289 2290 /* best is now the default entry */ 2291 zzgobest += times-1; 2292 for(j = 0; j <= nstate; j++) 2293 if(tystate[j] != 0 && tystate[j] != best) { 2294 Bprint(ftemp, "%d,%d,", j, tystate[j]); 2295 zzgoent++; 2296 } 2297 2298 /* now, the default */ 2299 zzgoent++; 2300 Bprint(ftemp, "%d\n", best); 2301 } 2302 } 2303 2304 /* 2305 * output the gotos for nonterminal c 2306 */ 2307 void 2308 go2gen(int c) 2309 { 2310 int i, work, cc; 2311 Item *p, *q; 2312 2313 2314 /* first, find nonterminals with gotos on c */ 2315 aryfil(temp1, nnonter+1, 0); 2316 temp1[c] = 1; 2317 work = 1; 2318 while(work) { 2319 work = 0; 2320 PLOOP(0, i) 2321 2322 /* cc is a nonterminal */ 2323 if((cc=prdptr[i][1]-NTBASE) >= 0) 2324 /* cc has a goto on c */ 2325 if(temp1[cc] != 0) { 2326 2327 /* thus, the left side of production i does too */ 2328 cc = *prdptr[i]-NTBASE; 2329 if(temp1[cc] == 0) { 2330 work = 1; 2331 temp1[cc] = 1; 2332 } 2333 } 2334 } 2335 2336 /* now, we have temp1[c] = 1 if a goto on c in closure of cc */ 2337 if(g2debug && foutput != 0) { 2338 Bprint(foutput, "%s: gotos on ", nontrst[c].name); 2339 NTLOOP(i) 2340 if(temp1[i]) 2341 Bprint(foutput, "%s ", nontrst[i].name); 2342 Bprint(foutput, "\n"); 2343 } 2344 2345 /* now, go through and put gotos into tystate */ 2346 aryfil(tystate, nstate, 0); 2347 SLOOP(i) 2348 ITMLOOP(i, p, q) 2349 if((cc = *p->pitem) >= NTBASE) 2350 /* goto on c is possible */ 2351 if(temp1[cc-NTBASE]) { 2352 tystate[i] = amem[indgo[i]+c]; 2353 break; 2354 } 2355 } 2356 2357 /* 2358 * decide a shift/reduce conflict by precedence. 2359 * r is a rule number, t a token number 2360 * the conflict is in state s 2361 * temp1[t] is changed to reflect the action 2362 */ 2363 void 2364 precftn(int r, int t, int s) 2365 { 2366 int lp, lt, action; 2367 2368 lp = levprd[r]; 2369 lt = toklev[t]; 2370 if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) { 2371 2372 /* conflict */ 2373 if(foutput != 0) 2374 Bprint(foutput, 2375 "\n%d: shift/reduce conflict (shift %d(%d), red'n %d(%d)) on %s", 2376 s, temp1[t], PLEVEL(lt), r, PLEVEL(lp), symnam(t)); 2377 zzsrconf++; 2378 return; 2379 } 2380 if(PLEVEL(lt) == PLEVEL(lp)) 2381 action = ASSOC(lt); 2382 else 2383 if(PLEVEL(lt) > PLEVEL(lp)) 2384 action = RASC; /* shift */ 2385 else 2386 action = LASC; /* reduce */ 2387 switch(action) { 2388 case BASC: /* error action */ 2389 temp1[t] = ERRCODE; 2390 break; 2391 case LASC: /* reduce */ 2392 temp1[t] = -r; 2393 break; 2394 } 2395 } 2396 2397 /* 2398 * output state i 2399 * temp1 has the actions, lastred the default 2400 */ 2401 void 2402 wract(int i) 2403 { 2404 int p, p0, p1, ntimes, tred, count, j, flag; 2405 2406 /* find the best choice for lastred */ 2407 lastred = 0; 2408 ntimes = 0; 2409 TLOOP(j) { 2410 if(temp1[j] >= 0) 2411 continue; 2412 if(temp1[j]+lastred == 0) 2413 continue; 2414 /* count the number of appearances of temp1[j] */ 2415 count = 0; 2416 tred = -temp1[j]; 2417 levprd[tred] |= REDFLAG; 2418 TLOOP(p) 2419 if(temp1[p]+tred == 0) 2420 count++; 2421 if(count > ntimes) { 2422 lastred = tred; 2423 ntimes = count; 2424 } 2425 } 2426 2427 /* 2428 * for error recovery, arrange that, if there is a shift on the 2429 * error recovery token, `error', that the default be the error action 2430 */ 2431 if(temp1[2] > 0) 2432 lastred = 0; 2433 2434 /* clear out entries in temp1 which equal lastred */ 2435 TLOOP(p) 2436 if(temp1[p]+lastred == 0) 2437 temp1[p] = 0; 2438 2439 wrstate(i); 2440 defact[i] = lastred; 2441 flag = 0; 2442 TLOOP(p0) 2443 if((p1=temp1[p0]) != 0) { 2444 if(p1 < 0) { 2445 p1 = -p1; 2446 goto exc; 2447 } 2448 if(p1 == ACCEPTCODE) { 2449 p1 = -1; 2450 goto exc; 2451 } 2452 if(p1 == ERRCODE) { 2453 p1 = 0; 2454 exc: 2455 if(flag++ == 0) 2456 Bprint(ftable, "-1, %d,\n", i); 2457 Bprint(ftable, "\t%d, %d,\n", p0, p1); 2458 zzexcp++; 2459 continue; 2460 } 2461 Bprint(ftemp, "%d,%d,", p0, p1); 2462 zzacent++; 2463 } 2464 if(flag) { 2465 defact[i] = -2; 2466 Bprint(ftable, "\t-2, %d,\n", lastred); 2467 } 2468 Bprint(ftemp, "\n"); 2469 } 2470 2471 /* 2472 * writes state i 2473 */ 2474 void 2475 wrstate(int i) 2476 { 2477 int j0, j1; 2478 Item *pp, *qq; 2479 Wset *u; 2480 2481 if(fdebug) { 2482 if(lastred) { 2483 Bprint(fdebug, " 0, /*%d*/\n", i); 2484 } else { 2485 Bprint(fdebug, " \""); 2486 ITMLOOP(i, pp, qq) 2487 Bprint(fdebug, "%s\\n", writem(pp->pitem)); 2488 if(tystate[i] == MUSTLOOKAHEAD) 2489 WSLOOP(wsets + (pstate[i+1] - pstate[i]), u) 2490 if(*u->pitem < 0) 2491 Bprint(fdebug, "%s\\n", writem(u->pitem)); 2492 Bprint(fdebug, "\", /*%d*/\n", i); 2493 } 2494 } 2495 if(foutput == 0) 2496 return; 2497 Bprint(foutput, "\nstate %d\n", i); 2498 ITMLOOP(i, pp, qq) 2499 Bprint(foutput, "\t%s\n", writem(pp->pitem)); 2500 if(tystate[i] == MUSTLOOKAHEAD) 2501 /* print out empty productions in closure */ 2502 WSLOOP(wsets+(pstate[i+1]-pstate[i]), u) 2503 if(*u->pitem < 0) 2504 Bprint(foutput, "\t%s\n", writem(u->pitem)); 2505 2506 /* check for state equal to another */ 2507 TLOOP(j0) 2508 if((j1=temp1[j0]) != 0) { 2509 Bprint(foutput, "\n\t%s ", symnam(j0)); 2510 /* shift, error, or accept */ 2511 if(j1 > 0) { 2512 if(j1 == ACCEPTCODE) 2513 Bprint(foutput, "accept"); 2514 else 2515 if(j1 == ERRCODE) 2516 Bprint(foutput, "error"); 2517 else 2518 Bprint(foutput, "shift %d", j1); 2519 } else 2520 Bprint(foutput, "reduce %d (src line %d)", -j1, rlines[-j1]); 2521 } 2522 2523 /* output the final production */ 2524 if(lastred) 2525 Bprint(foutput, "\n\t. reduce %d (src line %d)\n\n", 2526 lastred, rlines[lastred]); 2527 else 2528 Bprint(foutput, "\n\t. error\n\n"); 2529 2530 /* now, output nonterminal actions */ 2531 j1 = ntokens; 2532 for(j0 = 1; j0 <= nnonter; j0++) { 2533 j1++; 2534 if(temp1[j1]) 2535 Bprint(foutput, "\t%s goto %d\n", symnam(j0+NTBASE), temp1[j1]); 2536 } 2537 } 2538 2539 void 2540 warray(char *s, int *v, int n) 2541 { 2542 int i; 2543 2544 Bprint(ftable, "short %s[] =\n{", s); 2545 for(i=0;;) { 2546 if(i%10 == 0) 2547 Bprint(ftable, "\n"); 2548 Bprint(ftable, "%4d", v[i]); 2549 i++; 2550 if(i >= n) { 2551 Bprint(ftable, "\n};\n"); 2552 break; 2553 } 2554 Bprint(ftable, ","); 2555 } 2556 } 2557 2558 /* 2559 * in order to free up the mem and amem arrays for the optimizer, 2560 * and still be able to output yyr1, etc., after the sizes of 2561 * the action array is known, we hide the nonterminals 2562 * derived by productions in levprd. 2563 */ 2564 2565 void 2566 hideprod(void) 2567 { 2568 int i, j; 2569 2570 j = 0; 2571 levprd[0] = 0; 2572 PLOOP(1, i) { 2573 if(!(levprd[i] & REDFLAG)) { 2574 j++; 2575 if(foutput != 0) 2576 Bprint(foutput, "Rule not reduced: %s\n", writem(prdptr[i])); 2577 } 2578 levprd[i] = *prdptr[i] - NTBASE; 2579 } 2580 if(j) 2581 print("%d rules never reduced\n", j); 2582 } 2583 2584 2585 void 2586 callopt(void) 2587 { 2588 int i, *p, j, k, *q; 2589 2590 /* read the arrays from tempfile and set parameters */ 2591 if((finput=Bopen(tempname, OREAD)) == 0) 2592 error("optimizer cannot open tempfile"); 2593 pgo[0] = 0; 2594 temp1[0] = 0; 2595 nstate = 0; 2596 nnonter = 0; 2597 for(;;) { 2598 switch(gtnm()) { 2599 case '\n': 2600 nstate++; 2601 pmem--; 2602 temp1[nstate] = pmem - mem0; 2603 case ',': 2604 continue; 2605 case '$': 2606 break; 2607 default: 2608 error("bad tempfile"); 2609 } 2610 break; 2611 } 2612 2613 pmem--; 2614 temp1[nstate] = yypgo[0] = pmem - mem0; 2615 for(;;) { 2616 switch(gtnm()) { 2617 case '\n': 2618 nnonter++; 2619 yypgo[nnonter] = pmem-mem0; 2620 case ',': 2621 continue; 2622 case -1: 2623 break; 2624 default: 2625 error("bad tempfile"); 2626 } 2627 break; 2628 } 2629 pmem--; 2630 yypgo[nnonter--] = pmem - mem0; 2631 for(i = 0; i < nstate; i++) { 2632 k = 32000; 2633 j = 0; 2634 q = mem0 + temp1[i+1]; 2635 for(p = mem0 + temp1[i]; p < q ; p += 2) { 2636 if(*p > j) 2637 j = *p; 2638 if(*p < k) 2639 k = *p; 2640 } 2641 /* nontrivial situation */ 2642 if(k <= j) { 2643 /* j is now the range */ 2644 /* j -= k; /* call scj */ 2645 if(k > maxoff) 2646 maxoff = k; 2647 } 2648 tystate[i] = (temp1[i+1]-temp1[i]) + 2*j; 2649 if(j > maxspr) 2650 maxspr = j; 2651 } 2652 2653 /* initialize ggreed table */ 2654 for(i = 1; i <= nnonter; i++) { 2655 ggreed[i] = 1; 2656 j = 0; 2657 2658 /* minimum entry index is always 0 */ 2659 q = mem0 + yypgo[i+1] - 1; 2660 for(p = mem0+yypgo[i]; p < q ; p += 2) { 2661 ggreed[i] += 2; 2662 if(*p > j) 2663 j = *p; 2664 } 2665 ggreed[i] = ggreed[i] + 2*j; 2666 if(j > maxoff) 2667 maxoff = j; 2668 } 2669 2670 /* now, prepare to put the shift actions into the amem array */ 2671 for(i = 0; i < ACTSIZE; i++) 2672 amem[i] = 0; 2673 maxa = amem; 2674 for(i = 0; i < nstate; i++) { 2675 if(tystate[i] == 0 && adb > 1) 2676 Bprint(ftable, "State %d: null\n", i); 2677 indgo[i] = YYFLAG1; 2678 } 2679 while((i = nxti()) != NOMORE) 2680 if(i >= 0) 2681 stin(i); 2682 else 2683 gin(-i); 2684 2685 /* print amem array */ 2686 if(adb > 2 ) 2687 for(p = amem; p <= maxa; p += 10) { 2688 Bprint(ftable, "%4d ", p-amem); 2689 for(i = 0; i < 10; ++i) 2690 Bprint(ftable, "%4d ", p[i]); 2691 Bprint(ftable, "\n"); 2692 } 2693 2694 /* write out the output appropriate to the language */ 2695 aoutput(); 2696 osummary(); 2697 ZAPFILE(tempname); 2698 } 2699 2700 void 2701 gin(int i) 2702 { 2703 int *p, *r, *s, *q1, *q2; 2704 2705 /* enter gotos on nonterminal i into array amem */ 2706 ggreed[i] = 0; 2707 2708 q2 = mem0+ yypgo[i+1] - 1; 2709 q1 = mem0 + yypgo[i]; 2710 2711 /* now, find amem place for it */ 2712 for(p = amem; p < &amem[ACTSIZE]; p++) { 2713 if(*p) 2714 continue; 2715 for(r = q1; r < q2; r += 2) { 2716 s = p + *r + 1; 2717 if(*s) 2718 goto nextgp; 2719 if(s > maxa) 2720 if((maxa = s) > &amem[ACTSIZE]) 2721 error("a array overflow"); 2722 } 2723 /* we have found amem spot */ 2724 *p = *q2; 2725 if(p > maxa) 2726 if((maxa = p) > &amem[ACTSIZE]) 2727 error("a array overflow"); 2728 for(r = q1; r < q2; r += 2) { 2729 s = p + *r + 1; 2730 *s = r[1]; 2731 } 2732 pgo[i] = p-amem; 2733 if(adb > 1) 2734 Bprint(ftable, "Nonterminal %d, entry at %d\n", i, pgo[i]); 2735 return; 2736 2737 nextgp:; 2738 } 2739 error("cannot place goto %d\n", i); 2740 } 2741 2742 void 2743 stin(int i) 2744 { 2745 int *r, *s, n, flag, j, *q1, *q2; 2746 2747 tystate[i] = 0; 2748 2749 /* enter state i into the amem array */ 2750 q2 = mem0+temp1[i+1]; 2751 q1 = mem0+temp1[i]; 2752 /* find an acceptable place */ 2753 for(n = -maxoff; n < ACTSIZE; n++) { 2754 flag = 0; 2755 for(r = q1; r < q2; r += 2) { 2756 if((s = *r + n + amem) < amem) 2757 goto nextn; 2758 if(*s == 0) 2759 flag++; 2760 else 2761 if(*s != r[1]) 2762 goto nextn; 2763 } 2764 2765 /* check that the position equals another only if the states are identical */ 2766 for(j=0; j<nstate; j++) { 2767 if(indgo[j] == n) { 2768 2769 /* we have some disagreement */ 2770 if(flag) 2771 goto nextn; 2772 if(temp1[j+1]+temp1[i] == temp1[j]+temp1[i+1]) { 2773 2774 /* states are equal */ 2775 indgo[i] = n; 2776 if(adb > 1) 2777 Bprint(ftable, 2778 "State %d: entry at %d equals state %d\n", 2779 i, n, j); 2780 return; 2781 } 2782 2783 /* we have some disagreement */ 2784 goto nextn; 2785 } 2786 } 2787 2788 for(r = q1; r < q2; r += 2) { 2789 if((s = *r+n+amem) >= &amem[ACTSIZE]) 2790 error("out of space in optimizer a array"); 2791 if(s > maxa) 2792 maxa = s; 2793 if(*s != 0 && *s != r[1]) 2794 error("clobber of a array, pos'n %d, by %d", s-amem, r[1]); 2795 *s = r[1]; 2796 } 2797 indgo[i] = n; 2798 if(adb > 1) 2799 Bprint(ftable, "State %d: entry at %d\n", i, indgo[i]); 2800 return; 2801 nextn:; 2802 } 2803 error("Error; failure to place state %d\n", i); 2804 } 2805 2806 /* 2807 * finds the next i 2808 */ 2809 int 2810 nxti(void) 2811 { 2812 int i, max, maxi; 2813 2814 max = 0; 2815 maxi = 0; 2816 for(i = 1; i <= nnonter; i++) 2817 if(ggreed[i] >= max) { 2818 max = ggreed[i]; 2819 maxi = -i; 2820 } 2821 for(i = 0; i < nstate; ++i) 2822 if(tystate[i] >= max) { 2823 max = tystate[i]; 2824 maxi = i; 2825 } 2826 if(nxdb) 2827 Bprint(ftable, "nxti = %d, max = %d\n", maxi, max); 2828 if(max == 0) 2829 return NOMORE; 2830 return maxi; 2831 } 2832 2833 /* 2834 * write summary 2835 */ 2836 void 2837 osummary(void) 2838 { 2839 2840 int i, *p; 2841 2842 if(foutput == 0) 2843 return; 2844 i = 0; 2845 for(p = maxa; p >= amem; p--) 2846 if(*p == 0) 2847 i++; 2848 2849 Bprint(foutput, "Optimizer space used: input %d/%d, output %d/%d\n", 2850 pmem-mem0+1, MEMSIZE, maxa-amem+1, ACTSIZE); 2851 Bprint(foutput, "%d table entries, %d zero\n", (maxa-amem)+1, i); 2852 Bprint(foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff); 2853 } 2854 2855 /* 2856 * this version is for C 2857 * write out the optimized parser 2858 */ 2859 void 2860 aoutput(void) 2861 { 2862 Bprint(ftable, "#define\tYYLAST\t%d\n", maxa-amem+1); 2863 arout("yyact", amem, (maxa-amem)+1); 2864 arout("yypact", indgo, nstate); 2865 arout("yypgo", pgo, nnonter+1); 2866 } 2867 2868 void 2869 arout(char *s, int *v, int n) 2870 { 2871 int i; 2872 2873 Bprint(ftable, "short %s[] =\n{", s); 2874 for(i = 0; i < n;) { 2875 if(i%10 == 0) 2876 Bprint(ftable, "\n"); 2877 Bprint(ftable, "%4d", v[i]); 2878 i++; 2879 if(i == n) 2880 Bprint(ftable, "\n};\n"); 2881 else 2882 Bprint(ftable, ","); 2883 } 2884 } 2885 2886 /* 2887 * read and convert an integer from the standard input 2888 * return the terminating character 2889 * blanks, tabs, and newlines are ignored 2890 */ 2891 int 2892 gtnm(void) 2893 { 2894 int s, val, c; 2895 2896 s = 1; 2897 val = 0; 2898 while((c=Bgetrune(finput)) != Beof) { 2899 if(isdigit(c)) 2900 val = val*10 + c-'0'; 2901 else 2902 if(c == '-') 2903 s = -1; 2904 else 2905 break; 2906 } 2907 *pmem++ = s*val; 2908 if(pmem > &mem0[MEMSIZE]) 2909 error("out of space"); 2910 return c; 2911 } 2912