1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 #include <ctype.h> 5 6 #define Bungetrune Bungetc /* ok for now. */ 7 8 /* 9 * all these are 32 bit 10 */ 11 #define TBITSET ((32+NTERMS)/32) /* BOTCH?? +31 */ 12 #define BIT(a,i) ((a)[(i)>>5] & (1<<((i)&037))) 13 #define SETBIT(a,i) ((a)[(i)>>5] |= (1<<((i)&037))) 14 #define NWORDS(n) (((n)+32)/32) 15 16 #define PARSER "/sys/lib/yaccpar" 17 #define PARSERS "/sys/lib/yaccpars" 18 #define TEMPNAME "y.tmp.XXXXXX" 19 #define ACTNAME "y.acts.XXXXXX" 20 #define OFILE "tab.c" 21 #define FILEU "output" 22 #define FILED "tab.h" 23 #define FILEDEBUG "debug" 24 25 enum 26 { 27 /* 28 * the following are adjustable 29 * according to memory size 30 */ 31 ACTSIZE = 40000, 32 MEMSIZE = 40000, 33 NSTATES = 2000, 34 NTERMS = 511, 35 NPROD = 1600, 36 NNONTERM = 600, 37 TEMPSIZE = 2000, 38 CNAMSZ = 10000, 39 LSETSIZE = 2400, 40 WSETSIZE = 350, 41 42 NAMESIZE = 50, 43 NTYPES = 63, 44 ISIZE = 400, 45 46 PRIVATE = 0xE000, /* unicode private use */ 47 48 /* relationships which must hold: 49 TBITSET ints must hold NTERMS+1 bits... 50 WSETSIZE >= NNONTERM 51 LSETSIZE >= NNONTERM 52 TEMPSIZE >= NTERMS + NNONTERM + 1 53 TEMPSIZE >= NSTATES 54 */ 55 56 NTBASE = 010000, 57 ERRCODE = 8190, 58 ACCEPTCODE = 8191, 59 60 NOASC = 0, /* no assoc. */ 61 LASC = 1, /* left assoc. */ 62 RASC = 2, /* right assoc. */ 63 BASC = 3, /* binary assoc. */ 64 65 /* flags for state generation */ 66 67 DONE = 0, 68 MUSTDO = 1, 69 MUSTLOOKAHEAD = 2, 70 71 /* flags for a rule having an action, and being reduced */ 72 73 ACTFLAG = 04, 74 REDFLAG = 010, 75 76 /* output parser flags */ 77 YYFLAG1 = -1000, 78 79 /* parse tokens */ 80 IDENTIFIER = PRIVATE, 81 MARK, 82 TERM, 83 LEFT, 84 RIGHT, 85 BINARY, 86 PREC, 87 LCURLY, 88 IDENTCOLON, 89 NUMBER, 90 START, 91 TYPEDEF, 92 TYPENAME, 93 UNION, 94 95 ENDFILE = 0, 96 97 EMPTY = 1, 98 WHOKNOWS = 0, 99 OK = 1, 100 NOMORE = -1000, 101 }; 102 103 /* macros for getting associativity and precedence levels */ 104 105 #define ASSOC(i) ((i)&03) 106 #define PLEVEL(i) (((i)>>4)&077) 107 #define TYPE(i) (((i)>>10)&077) 108 109 /* macros for setting associativity and precedence levels */ 110 111 #define SETASC(i,j) i |= j 112 #define SETPLEV(i,j) i |= (j<<4) 113 #define SETTYPE(i,j) i |= (j<<10) 114 115 /* looping macros */ 116 117 #define TLOOP(i) for(i=1; i<=ntokens; i++) 118 #define NTLOOP(i) for(i=0; i<=nnonter; i++) 119 #define PLOOP(s,i) for(i=s; i<nprod; i++) 120 #define SLOOP(i) for(i=0; i<nstate; i++) 121 #define WSBUMP(x) x++ 122 #define WSLOOP(s,j) for(j=s; j<cwp; j++) 123 #define ITMLOOP(i,p,q) for(q=pstate[i+1], p=pstate[i]; p<q; p++) 124 #define SETLOOP(i) for(i=0; i<tbitset; i++) 125 126 /* command to clobber tempfiles after use */ 127 128 #define ZAPFILE(x) if(x) remove(x) 129 130 /* I/O descriptors */ 131 132 Biobuf* faction; /* file for saving actions */ 133 Biobuf* fdefine; /* file for #defines */ 134 Biobuf* fdebug; /* y.debug for strings for debugging */ 135 Biobuf* ftable; /* y.tab.c file */ 136 Biobuf* ftemp; /* tempfile to pass 2 */ 137 Biobuf* finput; /* input file */ 138 Biobuf* foutput; /* y.output file */ 139 140 /* communication variables between various I/O routines */ 141 142 char* infile; /* input file name */ 143 int numbval; /* value of an input number */ 144 char tokname[NAMESIZE+4]; /* input token name, slop for runes and 0 */ 145 146 /* structure declarations */ 147 148 typedef 149 struct 150 { 151 int lset[TBITSET]; 152 } Lkset; 153 154 typedef 155 struct 156 { 157 int* pitem; 158 Lkset* look; 159 } Item; 160 161 typedef 162 struct 163 { 164 char* name; 165 int value; 166 } Symb; 167 168 typedef 169 struct 170 { 171 int* pitem; 172 int flag; 173 Lkset ws; 174 } Wset; 175 176 /* storage of names */ 177 178 char cnames[CNAMSZ]; /* place where token and nonterminal names are stored */ 179 int cnamsz = CNAMSZ; /* size of cnames */ 180 char* cnamp = cnames; /* place where next name is to be put in */ 181 int ndefout = 4; /* number of defined symbols output */ 182 char* tempname; 183 char* actname; 184 char ttempname[] = TEMPNAME; 185 char tactname[] = ACTNAME; 186 char* parser = PARSER; 187 char* yydebug; 188 189 /* storage of types */ 190 int ntypes; /* number of types defined */ 191 char* typeset[NTYPES]; /* pointers to type tags */ 192 193 /* token information */ 194 195 int ntokens = 0 ; /* number of tokens */ 196 Symb tokset[NTERMS]; 197 int toklev[NTERMS]; /* vector with the precedence of the terminals */ 198 199 /* nonterminal information */ 200 201 int nnonter = -1; /* the number of nonterminals */ 202 Symb nontrst[NNONTERM]; 203 int start; /* start symbol */ 204 205 /* assigned token type values */ 206 int extval = 0; 207 208 char* ytabc = OFILE; /* name of y.tab.c */ 209 210 /* grammar rule information */ 211 212 int mem0[MEMSIZE] ; /* production storage */ 213 int* mem = mem0; 214 int nprod = 1; /* number of productions */ 215 int* prdptr[NPROD]; /* pointers to descriptions of productions */ 216 int levprd[NPROD]; /* precedence levels for the productions */ 217 int rlines[NPROD]; /* line number for this rule */ 218 219 /* state information */ 220 221 int nstate = 0; /* number of states */ 222 Item* pstate[NSTATES+2]; /* pointers to the descriptions of the states */ 223 int tystate[NSTATES]; /* contains type information about the states */ 224 int defact[NSTATES]; /* the default actions of states */ 225 int tstates[NTERMS]; /* states generated by terminal gotos */ 226 int ntstates[NNONTERM]; /* states generated by nonterminal gotos */ 227 int mstates[NSTATES]; /* chain of overflows of term/nonterm generation lists */ 228 int lastred; /* the number of the last reduction of a state */ 229 230 /* lookahead set information */ 231 232 Lkset lkst[LSETSIZE]; 233 int nolook; /* flag to turn off lookahead computations */ 234 int tbitset; /* size of lookahead sets */ 235 int nlset = 0; /* next lookahead set index */ 236 int nolook = 0; /* flag to suppress lookahead computations */ 237 Lkset clset; /* temporary storage for lookahead computations */ 238 239 /* working set information */ 240 241 Wset wsets[WSETSIZE]; 242 Wset* cwp; 243 244 /* storage for action table */ 245 246 int amem[ACTSIZE]; /* action table storage */ 247 int* memp = amem; /* next free action table position */ 248 int indgo[NSTATES]; /* index to the stored goto table */ 249 250 /* temporary vector, indexable by states, terms, or ntokens */ 251 252 int temp1[TEMPSIZE]; /* temporary storage, indexed by terms + ntokens or states */ 253 int lineno = 1; /* current input line number */ 254 int fatfl = 1; /* if on, error is fatal */ 255 int nerrors = 0; /* number of errors */ 256 257 /* statistics collection variables */ 258 259 int zzgoent; 260 int zzgobest; 261 int zzacent; 262 int zzexcp; 263 int zzclose; 264 int zzrrconf; 265 int zzsrconf; 266 267 int* ggreed = lkst[0].lset; 268 int* pgo = wsets[0].ws.lset; 269 int* yypgo = &nontrst[0].value; 270 271 int maxspr = 0; /* maximum spread of any entry */ 272 int maxoff = 0; /* maximum offset into a array */ 273 int* pmem = mem0; 274 int* maxa; 275 int nxdb = 0; 276 int adb = 0; 277 278 279 /* storage for information about the nonterminals */ 280 281 int** pres[NNONTERM+2]; /* vector of pointers to productions yielding each nonterminal */ 282 Lkset* pfirst[NNONTERM+2]; /* vector of pointers to first sets for each nonterminal */ 283 int pempty[NNONTERM+1]; /* vector of nonterminals nontrivially deriving e */ 284 285 /* random stuff picked out from between functions */ 286 287 int indebug = 0; 288 Wset* zzcwp = wsets; 289 int zzgoent = 0; 290 int zzgobest = 0; 291 int zzacent = 0; 292 int zzexcp = 0; 293 int zzclose = 0; 294 int zzsrconf = 0; 295 int* zzmemsz = mem0; 296 int zzrrconf = 0; 297 int pidebug = 0; /* debugging flag for putitem */ 298 int gsdebug = 0; 299 int cldebug = 0; /* debugging flag for closure */ 300 int pkdebug = 0; 301 int g2debug = 0; 302 303 struct 304 { 305 char* name; 306 long value; 307 } resrv[] = 308 { 309 "binary", BINARY, 310 "left", LEFT, 311 "nonassoc", BINARY, 312 "prec", PREC, 313 "right", RIGHT, 314 "start", START, 315 "term", TERM, 316 "token", TERM, 317 "type", TYPEDEF, 318 "union", UNION, 319 0, 320 }; 321 322 /* define functions */ 323 324 void main(int, char**); 325 void others(void); 326 char* chcopy(char*, char*); 327 char* writem(int*); 328 char* symnam(int); 329 void summary(void); 330 void error(char*, ...); 331 void aryfil(int*, int, int); 332 int setunion(int*, int*); 333 void prlook(Lkset*); 334 void cpres(void); 335 void cpfir(void); 336 int state(int); 337 void putitem(int*, Lkset*); 338 void cempty(void); 339 void stagen(void); 340 void closure(int); 341 Lkset* flset(Lkset*); 342 void cleantmp(void); 343 void intr(void); 344 void setup(int, char**); 345 void finact(void); 346 int defin(int, char*); 347 void defout(int); 348 char* cstash(char*); 349 long gettok(void); 350 int fdtype(int); 351 int chfind(int, char*); 352 void cpyunion(void); 353 void cpycode(void); 354 int skipcom(void); 355 void cpyact(int); 356 void openup(char*, int, int, int, char*); 357 void output(void); 358 int apack(int*, int); 359 void go2out(void); 360 void go2gen(int); 361 void precftn(int, int, int); 362 void wract(int); 363 void wrstate(int); 364 void warray(char*, int*, int); 365 void hideprod(void); 366 void callopt(void); 367 void gin(int); 368 void stin(int); 369 int nxti(void); 370 void osummary(void); 371 void aoutput(void); 372 void arout(char*, int*, int); 373 int gtnm(void); 374 375 void 376 main(int argc, char *argv[]) 377 { 378 379 setup(argc, argv); /* initialize and read productions */ 380 tbitset = NWORDS(ntokens); 381 cpres(); /* make table of which productions yield a given nonterminal */ 382 cempty(); /* make a table of which nonterminals can match the empty string */ 383 cpfir(); /* make a table of firsts of nonterminals */ 384 stagen(); /* generate the states */ 385 output(); /* write the states and the tables */ 386 go2out(); 387 hideprod(); 388 summary(); 389 callopt(); 390 others(); 391 exits(0); 392 } 393 394 /* 395 * put out other arrays, copy the parsers 396 */ 397 void 398 others(void) 399 { 400 int c, i, j; 401 402 finput = Bopen(parser, OREAD); 403 if(finput == 0) 404 error("cannot find parser %s", parser); 405 warray("yyr1", levprd, nprod); 406 aryfil(temp1, nprod, 0); 407 PLOOP(1, i) 408 temp1[i] = prdptr[i+1]-prdptr[i]-2; 409 warray("yyr2", temp1, nprod); 410 411 aryfil(temp1, nstate, -1000); 412 TLOOP(i) 413 for(j=tstates[i]; j!=0; j=mstates[j]) 414 temp1[j] = i; 415 NTLOOP(i) 416 for(j=ntstates[i]; j!=0; j=mstates[j]) 417 temp1[j] = -i; 418 warray("yychk", temp1, nstate); 419 warray("yydef", defact, nstate); 420 421 /* put out token translation tables */ 422 /* table 1 has 0-256 */ 423 aryfil(temp1, 256, 0); 424 c = 0; 425 TLOOP(i) { 426 j = tokset[i].value; 427 if(j >= 0 && j < 256) { 428 if(temp1[j]) { 429 print("yacc bug -- cant have 2 different Ts with same value\n"); 430 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name); 431 nerrors++; 432 } 433 temp1[j] = i; 434 if(j > c) 435 c = j; 436 } 437 } 438 warray("yytok1", temp1, c+1); 439 440 /* table 2 has PRIVATE-PRIVATE+256 */ 441 aryfil(temp1, 256, 0); 442 c = 0; 443 TLOOP(i) { 444 j = tokset[i].value - PRIVATE; 445 if(j >= 0 && j < 256) { 446 if(temp1[j]) { 447 print("yacc bug -- cant have 2 different Ts with same value\n"); 448 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name); 449 nerrors++; 450 } 451 temp1[j] = i; 452 if(j > c) 453 c = j; 454 } 455 } 456 warray("yytok2", temp1, c+1); 457 458 /* table 3 has everything else */ 459 Bprint(ftable, "long yytok3[] =\n{\n"); 460 c = 0; 461 TLOOP(i) { 462 j = tokset[i].value; 463 if(j >= 0 && j < 256) 464 continue; 465 if(j >= PRIVATE && j < 256+PRIVATE) 466 continue; 467 468 Bprint(ftable, "%4d,%4d,", j, i); 469 c++; 470 if(c%5 == 0) 471 Bprint(ftable, "\n"); 472 } 473 Bprint(ftable, "%4d\n};\n", 0); 474 475 /* copy parser text */ 476 while((c=Bgetrune(finput)) != Beof) { 477 if(c == '$') { 478 if((c = Bgetrune(finput)) != 'A') 479 Bputrune(ftable, '$'); 480 else { /* copy actions */ 481 faction = Bopen(actname, OREAD); 482 if(faction == 0) 483 error("cannot reopen action tempfile"); 484 while((c=Bgetrune(faction)) != Beof) 485 Bputrune(ftable, c); 486 Bterm(faction); 487 ZAPFILE(actname); 488 c = Bgetrune(finput); 489 } 490 } 491 Bputrune(ftable, c); 492 } 493 Bterm(ftable); 494 } 495 496 /* 497 * copies string q into p, returning next free char ptr 498 */ 499 char* 500 chcopy(char* p, char* q) 501 { 502 int c; 503 504 while(c = *q) { 505 if(c == '"') 506 *p++ = '\\'; 507 *p++ = c; 508 q++; 509 } 510 *p = 0; 511 return p; 512 } 513 514 /* 515 * creates output string for item pointed to by pp 516 */ 517 char* 518 writem(int *pp) 519 { 520 int i,*p; 521 static char sarr[ISIZE]; 522 char* q; 523 524 for(p=pp; *p>0; p++) 525 ; 526 p = prdptr[-*p]; 527 q = chcopy(sarr, nontrst[*p-NTBASE].name); 528 q = chcopy(q, ": "); 529 for(;;) { 530 *q = ' '; 531 p++; 532 if(p == pp) 533 *q = '.'; 534 q++; 535 *q = '\0'; 536 i = *p; 537 if(i <= 0) 538 break; 539 q = chcopy(q, symnam(i)); 540 if(q > &sarr[ISIZE-30]) 541 error("item too big"); 542 } 543 544 /* an item calling for a reduction */ 545 i = *pp; 546 if(i < 0 ) { 547 q = chcopy(q, " ("); 548 sprint(q, "%d)", -i); 549 } 550 return sarr; 551 } 552 553 /* 554 * return a pointer to the name of symbol i 555 */ 556 char* 557 symnam(int i) 558 { 559 char* cp; 560 561 cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name; 562 if(*cp == ' ') 563 cp++; 564 return cp; 565 } 566 567 /* 568 * output the summary on y.output 569 */ 570 void 571 summary(void) 572 { 573 574 if(foutput != 0) { 575 Bprint(foutput, "\n%d/%d terminals, %d/%d nonterminals\n", 576 ntokens, NTERMS, nnonter, NNONTERM); 577 Bprint(foutput, "%d/%d grammar rules, %d/%d states\n", 578 nprod, NPROD, nstate, NSTATES); 579 Bprint(foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n", 580 zzsrconf, zzrrconf); 581 Bprint(foutput, "%d/%d working sets used\n", 582 (int)(zzcwp-wsets), WSETSIZE); 583 Bprint(foutput, "memory: states,etc. %d/%d, parser %d/%d\n", 584 (int)(zzmemsz-mem0), MEMSIZE, (int)(memp-amem), ACTSIZE); 585 Bprint(foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE); 586 Bprint(foutput, "%d extra closures\n", zzclose - 2*nstate); 587 Bprint(foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp); 588 Bprint(foutput, "%d goto entries\n", zzgoent); 589 Bprint(foutput, "%d entries saved by goto default\n", zzgobest); 590 } 591 if(zzsrconf != 0 || zzrrconf != 0) { 592 print("\nconflicts: "); 593 if(zzsrconf) 594 print("%d shift/reduce", zzsrconf); 595 if(zzsrconf && zzrrconf) 596 print(", "); 597 if(zzrrconf) 598 print("%d reduce/reduce", zzrrconf); 599 print("\n"); 600 } 601 if(ftemp != 0) { 602 Bterm(ftemp); 603 ftemp = 0; 604 } 605 if(fdefine != 0) { 606 Bterm(fdefine); 607 fdefine = 0; 608 } 609 } 610 611 /* 612 * write out error comment -- NEEDS WORK 613 */ 614 void 615 error(char *s, ...) 616 { 617 618 nerrors++; 619 fprint(2, "\n fatal error:"); 620 fprint(2, s, (&s)[1]); 621 fprint(2, ", %s:%d\n", infile, lineno); 622 if(!fatfl) 623 return; 624 summary(); 625 cleantmp(); 626 exits("error"); 627 } 628 629 /* 630 * set elements 0 through n-1 to c 631 */ 632 void 633 aryfil(int *v, int n, int c) 634 { 635 int i; 636 637 for(i=0; i<n; i++) 638 v[i] = c; 639 } 640 641 /* 642 * set a to the union of a and b 643 * return 1 if b is not a subset of a, 0 otherwise 644 */ 645 int 646 setunion(int *a, int *b) 647 { 648 int i, x, sub; 649 650 sub = 0; 651 SETLOOP(i) { 652 x = *a; 653 *a |= *b; 654 if(*a != x) 655 sub = 1; 656 a++; 657 b++; 658 } 659 return sub; 660 } 661 662 void 663 prlook(Lkset* p) 664 { 665 int j, *pp; 666 667 pp = p->lset; 668 if(pp == 0) 669 Bprint(foutput, "\tNULL"); 670 else { 671 Bprint(foutput, " { "); 672 TLOOP(j) 673 if(BIT(pp,j)) 674 Bprint(foutput, "%s ", symnam(j)); 675 Bprint(foutput, "}"); 676 } 677 } 678 679 /* 680 * compute an array with the beginnings of productions yielding given nonterminals 681 * The array pres points to these lists 682 * the array pyield has the lists: the total size is only NPROD+1 683 */ 684 void 685 cpres(void) 686 { 687 int c, j, i, **pmem; 688 static int *pyield[NPROD]; 689 690 pmem = pyield; 691 NTLOOP(i) { 692 c = i+NTBASE; 693 pres[i] = pmem; 694 fatfl = 0; /* make undefined symbols nonfatal */ 695 PLOOP(0, j) 696 if(*prdptr[j] == c) 697 *pmem++ = prdptr[j]+1; 698 if(pres[i] == pmem) 699 error("nonterminal %s not defined!", nontrst[i].name); 700 } 701 pres[i] = pmem; 702 fatfl = 1; 703 if(nerrors) { 704 summary(); 705 cleantmp(); 706 exits("error"); 707 } 708 if(pmem != &pyield[nprod]) 709 error("internal Yacc error: pyield %d", pmem-&pyield[nprod]); 710 } 711 712 /* 713 * compute an array with the first of nonterminals 714 */ 715 void 716 cpfir(void) 717 { 718 int *p, **s, i, **t, ch, changes; 719 720 zzcwp = &wsets[nnonter]; 721 NTLOOP(i) { 722 aryfil(wsets[i].ws.lset, tbitset, 0); 723 t = pres[i+1]; 724 /* initially fill the sets */ 725 for(s=pres[i]; s<t; ++s) 726 for(p = *s; (ch = *p) > 0; ++p) { 727 if(ch < NTBASE) { 728 SETBIT(wsets[i].ws.lset, ch); 729 break; 730 } 731 if(!pempty[ch-NTBASE]) 732 break; 733 } 734 } 735 736 /* now, reflect transitivity */ 737 changes = 1; 738 while(changes) { 739 changes = 0; 740 NTLOOP(i) { 741 t = pres[i+1]; 742 for(s = pres[i]; s < t; ++s) 743 for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) { 744 changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset); 745 if(!pempty[ch]) 746 break; 747 } 748 } 749 } 750 751 NTLOOP(i) 752 pfirst[i] = flset(&wsets[i].ws); 753 if(!indebug) 754 return; 755 if(foutput != 0) 756 NTLOOP(i) { 757 Bprint(foutput, "\n%s: ", nontrst[i].name); 758 prlook(pfirst[i]); 759 Bprint(foutput, " %d\n", pempty[i]); 760 } 761 } 762 763 /* 764 * sorts last state,and sees if it equals earlier ones. returns state number 765 */ 766 int 767 state(int c) 768 { 769 Item *p1, *p2, *k, *l, *q1, *q2; 770 int size1, size2, i; 771 772 p1 = pstate[nstate]; 773 p2 = pstate[nstate+1]; 774 if(p1 == p2) 775 return 0; /* null state */ 776 /* sort the items */ 777 for(k = p2-1; k > p1; k--) /* make k the biggest */ 778 for(l = k-1; l >= p1; --l) 779 if(l->pitem > k->pitem) { 780 int *s; 781 Lkset *ss; 782 783 s = k->pitem; 784 k->pitem = l->pitem; 785 l->pitem = s; 786 ss = k->look; 787 k->look = l->look; 788 l->look = ss; 789 } 790 size1 = p2 - p1; /* size of state */ 791 792 for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) { 793 /* get ith state */ 794 q1 = pstate[i]; 795 q2 = pstate[i+1]; 796 size2 = q2 - q1; 797 if(size1 != size2) 798 continue; 799 k = p1; 800 for(l = q1; l < q2; l++) { 801 if(l->pitem != k->pitem) 802 break; 803 k++; 804 } 805 if(l != q2) 806 continue; 807 /* found it */ 808 pstate[nstate+1] = pstate[nstate]; /* delete last state */ 809 /* fix up lookaheads */ 810 if(nolook) 811 return i; 812 for(l = q1, k = p1; l < q2; ++l, ++k ) { 813 int s; 814 815 SETLOOP(s) 816 clset.lset[s] = l->look->lset[s]; 817 if(setunion(clset.lset, k->look->lset)) { 818 tystate[i] = MUSTDO; 819 /* register the new set */ 820 l->look = flset( &clset ); 821 } 822 } 823 return i; 824 } 825 /* state is new */ 826 if(nolook) 827 error("yacc state/nolook error"); 828 pstate[nstate+2] = p2; 829 if(nstate+1 >= NSTATES) 830 error("too many states"); 831 if(c >= NTBASE) { 832 mstates[nstate] = ntstates[c-NTBASE]; 833 ntstates[c-NTBASE] = nstate; 834 } else { 835 mstates[nstate] = tstates[c]; 836 tstates[c] = nstate; 837 } 838 tystate[nstate] = MUSTDO; 839 return nstate++; 840 } 841 842 void 843 putitem(int *ptr, Lkset *lptr) 844 { 845 Item *j; 846 847 if(pidebug && foutput != 0) 848 Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate); 849 j = pstate[nstate+1]; 850 j->pitem = ptr; 851 if(!nolook) 852 j->look = flset(lptr); 853 pstate[nstate+1] = ++j; 854 if((int*)j > zzmemsz) { 855 zzmemsz = (int*)j; 856 if(zzmemsz >= &mem0[MEMSIZE]) 857 error("out of state space"); 858 } 859 } 860 861 /* 862 * mark nonterminals which derive the empty string 863 * also, look for nonterminals which don't derive any token strings 864 */ 865 void 866 cempty(void) 867 { 868 869 int i, *p; 870 871 /* first, use the array pempty to detect productions that can never be reduced */ 872 /* set pempty to WHONOWS */ 873 aryfil(pempty, nnonter+1, WHOKNOWS); 874 875 /* now, look at productions, marking nonterminals which derive something */ 876 more: 877 PLOOP(0, i) { 878 if(pempty[*prdptr[i] - NTBASE]) 879 continue; 880 for(p = prdptr[i]+1; *p >= 0; ++p) 881 if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS) 882 break; 883 /* production can be derived */ 884 if(*p < 0) { 885 pempty[*prdptr[i]-NTBASE] = OK; 886 goto more; 887 } 888 } 889 890 /* now, look at the nonterminals, to see if they are all OK */ 891 NTLOOP(i) { 892 /* the added production rises or falls as the start symbol ... */ 893 if(i == 0) 894 continue; 895 if(pempty[i] != OK) { 896 fatfl = 0; 897 error("nonterminal %s never derives any token string", nontrst[i].name); 898 } 899 } 900 901 if(nerrors) { 902 summary(); 903 cleantmp(); 904 exits("error"); 905 } 906 907 /* now, compute the pempty array, to see which nonterminals derive the empty string */ 908 /* set pempty to WHOKNOWS */ 909 aryfil( pempty, nnonter+1, WHOKNOWS); 910 911 /* loop as long as we keep finding empty nonterminals */ 912 913 again: 914 PLOOP(1, i) { 915 /* not known to be empty */ 916 if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) { 917 for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p) 918 ; 919 /* we have a nontrivially empty nonterminal */ 920 if(*p < 0) { 921 pempty[*prdptr[i]-NTBASE] = EMPTY; 922 /* got one ... try for another */ 923 goto again; 924 } 925 } 926 } 927 } 928 929 /* 930 * generate the states 931 */ 932 void 933 stagen(void) 934 { 935 936 int c, i, j, more; 937 Wset *p, *q; 938 939 /* initialize */ 940 nstate = 0; 941 942 /* THIS IS FUNNY from the standpoint of portability 943 * it represents the magic moment when the mem0 array, which has 944 * been holding the productions, starts to hold item pointers, of a 945 * different type... 946 * someday, alloc should be used to allocate all this stuff... for now, we 947 * accept that if pointers don't fit in integers, there is a problem... 948 */ 949 950 pstate[0] = pstate[1] = (Item*)mem; 951 aryfil(clset.lset, tbitset, 0); 952 putitem(prdptr[0]+1, &clset); 953 tystate[0] = MUSTDO; 954 nstate = 1; 955 pstate[2] = pstate[1]; 956 957 aryfil(amem, ACTSIZE, 0); 958 959 /* now, the main state generation loop */ 960 for(more=1; more;) { 961 more = 0; 962 SLOOP(i) { 963 if(tystate[i] != MUSTDO) 964 continue; 965 tystate[i] = DONE; 966 aryfil(temp1, nnonter+1, 0); 967 /* take state i, close it, and do gotos */ 968 closure(i); 969 /* generate goto's */ 970 WSLOOP(wsets, p) { 971 if(p->flag) 972 continue; 973 p->flag = 1; 974 c = *(p->pitem); 975 if(c <= 1) { 976 if(pstate[i+1]-pstate[i] <= p-wsets) 977 tystate[i] = MUSTLOOKAHEAD; 978 continue; 979 } 980 /* do a goto on c */ 981 WSLOOP(p, q) 982 /* this item contributes to the goto */ 983 if(c == *(q->pitem)) { 984 putitem(q->pitem+1, &q->ws); 985 q->flag = 1; 986 } 987 if(c < NTBASE) 988 state(c); /* register new state */ 989 else 990 temp1[c-NTBASE] = state(c); 991 } 992 if(gsdebug && foutput != 0) { 993 Bprint(foutput, "%d: ", i); 994 NTLOOP(j) 995 if(temp1[j]) 996 Bprint(foutput, "%s %d, ", 997 nontrst[j].name, temp1[j]); 998 Bprint(foutput, "\n"); 999 } 1000 indgo[i] = apack(&temp1[1], nnonter-1) - 1; 1001 /* do some more */ 1002 more = 1; 1003 } 1004 } 1005 } 1006 1007 /* 1008 * generate the closure of state i 1009 */ 1010 void 1011 closure(int i) 1012 { 1013 1014 Wset *u, *v; 1015 Item *p, *q; 1016 int c, ch, work, k, *pi, **s, **t; 1017 1018 zzclose++; 1019 1020 /* first, copy kernel of state i to wsets */ 1021 cwp = wsets; 1022 ITMLOOP(i, p, q) { 1023 cwp->pitem = p->pitem; 1024 cwp->flag = 1; /* this item must get closed */ 1025 SETLOOP(k) 1026 cwp->ws.lset[k] = p->look->lset[k]; 1027 WSBUMP(cwp); 1028 } 1029 1030 /* now, go through the loop, closing each item */ 1031 work = 1; 1032 while(work) { 1033 work = 0; 1034 WSLOOP(wsets, u) { 1035 if(u->flag == 0) 1036 continue; 1037 /* dot is before c */ 1038 c = *(u->pitem); 1039 if(c < NTBASE) { 1040 u->flag = 0; 1041 /* only interesting case is where . is before nonterminal */ 1042 continue; 1043 } 1044 1045 /* compute the lookahead */ 1046 aryfil(clset.lset, tbitset, 0); 1047 1048 /* find items involving c */ 1049 WSLOOP(u, v) 1050 if(v->flag == 1 && *(pi=v->pitem) == c) { 1051 v->flag = 0; 1052 if(nolook) 1053 continue; 1054 while((ch = *++pi) > 0) { 1055 /* terminal symbol */ 1056 if(ch < NTBASE) { 1057 SETBIT(clset.lset, ch); 1058 break; 1059 } 1060 /* nonterminal symbol */ 1061 setunion(clset.lset, pfirst[ch-NTBASE]->lset); 1062 if(!pempty[ch-NTBASE]) 1063 break; 1064 } 1065 if(ch <= 0) 1066 setunion(clset.lset, v->ws.lset); 1067 } 1068 1069 /* 1070 * now loop over productions derived from c 1071 * c is now nonterminal number 1072 */ 1073 c -= NTBASE; 1074 t = pres[c+1]; 1075 for(s = pres[c]; s < t; ++s) { 1076 /* 1077 * put these items into the closure 1078 * is the item there 1079 */ 1080 WSLOOP(wsets, v) 1081 /* yes, it is there */ 1082 if(v->pitem == *s) { 1083 if(nolook) 1084 goto nexts; 1085 if(setunion(v->ws.lset, clset.lset)) 1086 v->flag = work = 1; 1087 goto nexts; 1088 } 1089 1090 /* not there; make a new entry */ 1091 if(cwp-wsets+1 >= WSETSIZE) 1092 error( "working set overflow"); 1093 cwp->pitem = *s; 1094 cwp->flag = 1; 1095 if(!nolook) { 1096 work = 1; 1097 SETLOOP(k) cwp->ws.lset[k] = clset.lset[k]; 1098 } 1099 WSBUMP(cwp); 1100 1101 nexts:; 1102 } 1103 } 1104 } 1105 1106 /* have computed closure; flags are reset; return */ 1107 if(cwp > zzcwp) 1108 zzcwp = cwp; 1109 if(cldebug && foutput != 0) { 1110 Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook); 1111 WSLOOP(wsets, u) { 1112 if(u->flag) 1113 Bprint(foutput, "flag set!\n"); 1114 u->flag = 0; 1115 Bprint(foutput, "\t%s", writem(u->pitem)); 1116 prlook(&u->ws); 1117 Bprint(foutput, "\n"); 1118 } 1119 } 1120 } 1121 1122 /* 1123 * decide if the lookahead set pointed to by p is known 1124 * return pointer to a perminent location for the set 1125 */ 1126 Lkset* 1127 flset(Lkset *p) 1128 { 1129 Lkset *q; 1130 int *u, *v, *w, j; 1131 1132 for(q = &lkst[nlset]; q-- > lkst;) { 1133 u = p->lset; 1134 v = q->lset; 1135 w = &v[tbitset]; 1136 while(v < w) 1137 if(*u++ != *v++) 1138 goto more; 1139 /* we have matched */ 1140 return q; 1141 more:; 1142 } 1143 /* add a new one */ 1144 q = &lkst[nlset++]; 1145 if(nlset >= LSETSIZE) 1146 error("too many lookahead sets"); 1147 SETLOOP(j) 1148 q->lset[j] = p->lset[j]; 1149 return q; 1150 } 1151 1152 void 1153 cleantmp(void) 1154 { 1155 ZAPFILE(actname); 1156 ZAPFILE(tempname); 1157 } 1158 1159 void 1160 intr(void) 1161 { 1162 cleantmp(); 1163 exits("interrupted"); 1164 } 1165 1166 void 1167 setup(int argc, char *argv[]) 1168 { 1169 long c, t; 1170 int i, j, lev, ty, ytab, *p; 1171 int vflag, dflag, stem; 1172 char actnm[8], *stemc, *s, dirbuf[128]; 1173 1174 ytab = 0; 1175 vflag = 0; 1176 dflag = 0; 1177 stem = 0; 1178 stemc = "y"; 1179 foutput = 0; 1180 fdefine = 0; 1181 fdebug = 0; 1182 ARGBEGIN{ 1183 case 'v': 1184 case 'V': 1185 vflag++; 1186 break; 1187 case 'D': 1188 yydebug = ARGF(); 1189 break; 1190 case 'd': 1191 dflag++; 1192 break; 1193 case 'o': 1194 ytab++; 1195 ytabc = ARGF(); 1196 break; 1197 case 's': 1198 stem++; 1199 stemc = ARGF(); 1200 break; 1201 case 'S': 1202 parser = PARSERS; 1203 break; 1204 default: 1205 error("illegal option: %c", ARGC()); 1206 }ARGEND 1207 openup(stemc, dflag, vflag, ytab, ytabc); 1208 1209 ftemp = Bopen(tempname = mktemp(ttempname), OWRITE); 1210 faction = Bopen(actname = mktemp(tactname), OWRITE); 1211 if(ftemp == 0 || faction == 0) 1212 error("cannot open temp file"); 1213 if(argc < 1) 1214 error("no input file"); 1215 infile = argv[0]; 1216 if(infile[0] != '/' && getwd(dirbuf, sizeof dirbuf)!=nil){ 1217 i = strlen(infile)+1+strlen(dirbuf)+1+10; 1218 s = malloc(i); 1219 if(s != nil){ 1220 snprint(s, i, "%s/%s", dirbuf, infile); 1221 cleanname(s); 1222 infile = s; 1223 } 1224 } 1225 finput = Bopen(infile, OREAD); 1226 if(finput == 0) 1227 error("cannot open '%s'", argv[0]); 1228 cnamp = cnames; 1229 1230 defin(0, "$end"); 1231 extval = PRIVATE; /* tokens start in unicode 'private use' */ 1232 defin(0, "error"); 1233 defin(1, "$accept"); 1234 defin(0, "$unk"); 1235 mem = mem0; 1236 i = 0; 1237 1238 for(t = gettok(); t != MARK && t != ENDFILE;) 1239 switch(t) { 1240 case ';': 1241 t = gettok(); 1242 break; 1243 1244 case START: 1245 if(gettok() != IDENTIFIER) 1246 error("bad %%start construction"); 1247 start = chfind(1, tokname); 1248 t = gettok(); 1249 continue; 1250 1251 case TYPEDEF: 1252 if(gettok() != TYPENAME) 1253 error("bad syntax in %%type"); 1254 ty = numbval; 1255 for(;;) { 1256 t = gettok(); 1257 switch(t) { 1258 case IDENTIFIER: 1259 if((t=chfind(1, tokname)) < NTBASE) { 1260 j = TYPE(toklev[t]); 1261 if(j != 0 && j != ty) 1262 error("type redeclaration of token %s", 1263 tokset[t].name); 1264 else 1265 SETTYPE(toklev[t], ty); 1266 } else { 1267 j = nontrst[t-NTBASE].value; 1268 if(j != 0 && j != ty) 1269 error("type redeclaration of nonterminal %s", 1270 nontrst[t-NTBASE].name ); 1271 else 1272 nontrst[t-NTBASE].value = ty; 1273 } 1274 case ',': 1275 continue; 1276 case ';': 1277 t = gettok(); 1278 default: 1279 break; 1280 } 1281 break; 1282 } 1283 continue; 1284 1285 case UNION: 1286 /* copy the union declaration to the output */ 1287 cpyunion(); 1288 t = gettok(); 1289 continue; 1290 1291 case LEFT: 1292 case BINARY: 1293 case RIGHT: 1294 i++; 1295 1296 case TERM: 1297 /* nonzero means new prec. and assoc. */ 1298 lev = t-TERM; 1299 ty = 0; 1300 1301 /* get identifiers so defined */ 1302 t = gettok(); 1303 1304 /* there is a type defined */ 1305 if(t == TYPENAME) { 1306 ty = numbval; 1307 t = gettok(); 1308 } 1309 for(;;) { 1310 switch(t) { 1311 case ',': 1312 t = gettok(); 1313 continue; 1314 1315 case ';': 1316 break; 1317 1318 case IDENTIFIER: 1319 j = chfind(0, tokname); 1320 if(j >= NTBASE) 1321 error("%s defined earlier as nonterminal", tokname); 1322 if(lev) { 1323 if(ASSOC(toklev[j])) 1324 error("redeclaration of precedence of %s", tokname); 1325 SETASC(toklev[j], lev); 1326 SETPLEV(toklev[j], i); 1327 } 1328 if(ty) { 1329 if(TYPE(toklev[j])) 1330 error("redeclaration of type of %s", tokname); 1331 SETTYPE(toklev[j],ty); 1332 } 1333 t = gettok(); 1334 if(t == NUMBER) { 1335 tokset[j].value = numbval; 1336 if(j < ndefout && j > 3) 1337 error("please define type number of %s earlier", 1338 tokset[j].name); 1339 t = gettok(); 1340 } 1341 continue; 1342 } 1343 break; 1344 } 1345 continue; 1346 1347 case LCURLY: 1348 defout(0); 1349 cpycode(); 1350 t = gettok(); 1351 continue; 1352 1353 default: 1354 error("syntax error"); 1355 } 1356 if(t == ENDFILE) 1357 error("unexpected EOF before %%"); 1358 1359 /* t is MARK */ 1360 Bprint(ftable, "extern int yyerrflag;\n"); 1361 Bprint(ftable, "#ifndef YYMAXDEPTH\n"); 1362 Bprint(ftable, "#define YYMAXDEPTH 150\n"); 1363 Bprint(ftable, "#endif\n" ); 1364 if(!ntypes) { 1365 Bprint(ftable, "#ifndef YYSTYPE\n"); 1366 Bprint(ftable, "#define YYSTYPE int\n"); 1367 Bprint(ftable, "#endif\n"); 1368 } 1369 Bprint(ftable, "YYSTYPE yylval;\n"); 1370 Bprint(ftable, "YYSTYPE yyval;\n"); 1371 1372 prdptr[0] = mem; 1373 1374 /* added production */ 1375 *mem++ = NTBASE; 1376 1377 /* if start is 0, we will overwrite with the lhs of the first rule */ 1378 *mem++ = start; 1379 *mem++ = 1; 1380 *mem++ = 0; 1381 prdptr[1] = mem; 1382 while((t=gettok()) == LCURLY) 1383 cpycode(); 1384 if(t != IDENTCOLON) 1385 error("bad syntax on first rule"); 1386 1387 if(!start) 1388 prdptr[0][1] = chfind(1, tokname); 1389 1390 /* read rules */ 1391 while(t != MARK && t != ENDFILE) { 1392 /* process a rule */ 1393 rlines[nprod] = lineno; 1394 if(t == '|') 1395 *mem++ = *prdptr[nprod-1]; 1396 else 1397 if(t == IDENTCOLON) { 1398 *mem = chfind(1, tokname); 1399 if(*mem < NTBASE) 1400 error("token illegal on LHS of grammar rule"); 1401 mem++; 1402 } else 1403 error("illegal rule: missing semicolon or | ?"); 1404 /* read rule body */ 1405 t = gettok(); 1406 1407 more_rule: 1408 while(t == IDENTIFIER) { 1409 *mem = chfind(1, tokname); 1410 if(*mem < NTBASE) 1411 levprd[nprod] = toklev[*mem]; 1412 mem++; 1413 t = gettok(); 1414 } 1415 if(t == PREC) { 1416 if(gettok() != IDENTIFIER) 1417 error("illegal %%prec syntax"); 1418 j = chfind(2, tokname); 1419 if(j >= NTBASE) 1420 error("nonterminal %s illegal after %%prec", 1421 nontrst[j-NTBASE].name); 1422 levprd[nprod] = toklev[j]; 1423 t = gettok(); 1424 } 1425 if(t == '=') { 1426 levprd[nprod] |= ACTFLAG; 1427 Bprint(faction, "\ncase %d:", nprod); 1428 cpyact(mem-prdptr[nprod]-1); 1429 Bprint(faction, " break;"); 1430 if((t=gettok()) == IDENTIFIER) { 1431 1432 /* action within rule... */ 1433 sprint(actnm, "$$%d", nprod); 1434 1435 /* make it a nonterminal */ 1436 j = chfind(1, actnm); 1437 1438 /* 1439 * the current rule will become rule number nprod+1 1440 * move the contents down, and make room for the null 1441 */ 1442 for(p = mem; p >= prdptr[nprod]; --p) 1443 p[2] = *p; 1444 mem += 2; 1445 1446 /* enter null production for action */ 1447 p = prdptr[nprod]; 1448 *p++ = j; 1449 *p++ = -nprod; 1450 1451 /* update the production information */ 1452 levprd[nprod+1] = levprd[nprod] & ~ACTFLAG; 1453 levprd[nprod] = ACTFLAG; 1454 if(++nprod >= NPROD) 1455 error("more than %d rules", NPROD); 1456 prdptr[nprod] = p; 1457 1458 /* make the action appear in the original rule */ 1459 *mem++ = j; 1460 1461 /* get some more of the rule */ 1462 goto more_rule; 1463 } 1464 } 1465 1466 while(t == ';') 1467 t = gettok(); 1468 *mem++ = -nprod; 1469 1470 /* check that default action is reasonable */ 1471 if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].value) { 1472 1473 /* no explicit action, LHS has value */ 1474 int tempty; 1475 1476 tempty = prdptr[nprod][1]; 1477 if(tempty < 0) 1478 error("must return a value, since LHS has a type"); 1479 else 1480 if(tempty >= NTBASE) 1481 tempty = nontrst[tempty-NTBASE].value; 1482 else 1483 tempty = TYPE(toklev[tempty]); 1484 if(tempty != nontrst[*prdptr[nprod]-NTBASE].value) 1485 error("default action causes potential type clash"); 1486 } 1487 nprod++; 1488 if(nprod >= NPROD) 1489 error("more than %d rules", NPROD); 1490 prdptr[nprod] = mem; 1491 levprd[nprod] = 0; 1492 } 1493 1494 /* end of all rules */ 1495 defout(1); 1496 1497 finact(); 1498 if(t == MARK) { 1499 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile); 1500 while((c=Bgetrune(finput)) != Beof) 1501 Bputrune(ftable, c); 1502 } 1503 Bterm(finput); 1504 } 1505 1506 /* 1507 * finish action routine 1508 */ 1509 void 1510 finact(void) 1511 { 1512 1513 Bterm(faction); 1514 Bprint(ftable, "#define YYEOFCODE %d\n", 1); 1515 Bprint(ftable, "#define YYERRCODE %d\n", 2); 1516 } 1517 1518 /* 1519 * define s to be a terminal if t=0 1520 * or a nonterminal if t=1 1521 */ 1522 int 1523 defin(int nt, char *s) 1524 { 1525 int val; 1526 Rune rune; 1527 1528 val = 0; 1529 if(nt) { 1530 nnonter++; 1531 if(nnonter >= NNONTERM) 1532 error("too many nonterminals, limit %d",NNONTERM); 1533 nontrst[nnonter].name = cstash(s); 1534 return NTBASE + nnonter; 1535 } 1536 1537 /* must be a token */ 1538 ntokens++; 1539 if(ntokens >= NTERMS) 1540 error("too many terminals, limit %d", NTERMS); 1541 tokset[ntokens].name = cstash(s); 1542 1543 /* establish value for token */ 1544 /* single character literal */ 1545 if(s[0] == ' ') { 1546 val = chartorune(&rune, &s[1]); 1547 if(s[val+1] == 0) { 1548 val = rune; 1549 goto out; 1550 } 1551 } 1552 1553 /* escape sequence */ 1554 if(s[0] == ' ' && s[1] == '\\') { 1555 if(s[3] == 0) { 1556 /* single character escape sequence */ 1557 switch(s[2]) { 1558 case 'n': val = '\n'; break; 1559 case 'r': val = '\r'; break; 1560 case 'b': val = '\b'; break; 1561 case 't': val = '\t'; break; 1562 case 'f': val = '\f'; break; 1563 case '\'': val = '\''; break; 1564 case '"': val = '"'; break; 1565 case '\\': val = '\\'; break; 1566 default: error("invalid escape"); 1567 } 1568 goto out; 1569 } 1570 1571 /* \nnn sequence */ 1572 if(s[2] >= '0' && s[2] <= '7') { 1573 if(s[3] < '0' || 1574 s[3] > '7' || 1575 s[4] < '0' || 1576 s[4] > '7' || 1577 s[5] != 0) 1578 error("illegal \\nnn construction"); 1579 val = 64*s[2] + 8*s[3] + s[4] - 73*'0'; 1580 if(val == 0) 1581 error("'\\000' is illegal"); 1582 goto out; 1583 } 1584 error("unknown escape"); 1585 } 1586 val = extval++; 1587 1588 out: 1589 tokset[ntokens].value = val; 1590 toklev[ntokens] = 0; 1591 return ntokens; 1592 } 1593 1594 /* 1595 * write out the defines (at the end of the declaration section) 1596 */ 1597 void 1598 defout(int last) 1599 { 1600 int i, c; 1601 char sar[NAMESIZE+10]; 1602 1603 for(i=ndefout; i<=ntokens; i++) { 1604 /* non-literals */ 1605 c = tokset[i].name[0]; 1606 if(c != ' ' && c != '$') { 1607 Bprint(ftable, "#define %s %d\n", 1608 tokset[i].name, tokset[i].value); 1609 if(fdefine) 1610 Bprint(fdefine, "#define\t%s\t%d\n", 1611 tokset[i].name, tokset[i].value); 1612 } 1613 } 1614 ndefout = ntokens+1; 1615 if(last && fdebug) { 1616 Bprint(fdebug, "char* yytoknames[] =\n{\n"); 1617 TLOOP(i) { 1618 if(tokset[i].name) { 1619 chcopy(sar, tokset[i].name); 1620 Bprint(fdebug, "\t\"%s\",\n", sar); 1621 continue; 1622 } 1623 Bprint(fdebug, "\t0,\n"); 1624 } 1625 Bprint(fdebug, "};\n"); 1626 } 1627 } 1628 1629 char* 1630 cstash(char *s) 1631 { 1632 char *temp; 1633 1634 temp = cnamp; 1635 do { 1636 if(cnamp >= &cnames[cnamsz]) 1637 error("too many characters in id's and literals"); 1638 else 1639 *cnamp++ = *s; 1640 } while(*s++); 1641 return temp; 1642 } 1643 1644 long 1645 gettok(void) 1646 { 1647 long c; 1648 Rune rune; 1649 int i, base, match, reserve; 1650 static int peekline; 1651 1652 begin: 1653 reserve = 0; 1654 lineno += peekline; 1655 peekline = 0; 1656 c = Bgetrune(finput); 1657 while(c == ' ' || c == '\n' || c == '\t' || c == '\f') { 1658 if(c == '\n') 1659 lineno++; 1660 c = Bgetrune(finput); 1661 } 1662 1663 /* skip comment */ 1664 if(c == '/') { 1665 lineno += skipcom(); 1666 goto begin; 1667 } 1668 switch(c) { 1669 case Beof: 1670 return ENDFILE; 1671 1672 case '{': 1673 Bungetrune(finput); 1674 return '='; 1675 1676 case '<': 1677 /* get, and look up, a type name (union member name) */ 1678 i = 0; 1679 while((c=Bgetrune(finput)) != '>' && c >= 0 && c != '\n') { 1680 rune = c; 1681 c = runetochar(&tokname[i], &rune); 1682 if(i < NAMESIZE) 1683 i += c; 1684 } 1685 if(c != '>') 1686 error("unterminated < ... > clause"); 1687 tokname[i] = 0; 1688 for(i=1; i<=ntypes; i++) 1689 if(!strcmp(typeset[i], tokname)) { 1690 numbval = i; 1691 return TYPENAME; 1692 } 1693 ntypes++; 1694 numbval = ntypes; 1695 typeset[numbval] = cstash(tokname); 1696 return TYPENAME; 1697 1698 case '"': 1699 case '\'': 1700 match = c; 1701 tokname[0] = ' '; 1702 i = 1; 1703 for(;;) { 1704 c = Bgetrune(finput); 1705 if(c == '\n' || c <= 0) 1706 error("illegal or missing ' or \"" ); 1707 if(c == '\\') { 1708 tokname[i] = '\\'; 1709 if(i < NAMESIZE) 1710 i++; 1711 c = Bgetrune(finput); 1712 } else 1713 if(c == match) 1714 break; 1715 rune = c; 1716 c = runetochar(&tokname[i], &rune); 1717 if(i < NAMESIZE) 1718 i += c; 1719 } 1720 break; 1721 1722 case '%': 1723 case '\\': 1724 switch(c = Bgetrune(finput)) { 1725 case '0': return TERM; 1726 case '<': return LEFT; 1727 case '2': return BINARY; 1728 case '>': return RIGHT; 1729 case '%': 1730 case '\\': return MARK; 1731 case '=': return PREC; 1732 case '{': return LCURLY; 1733 default: reserve = 1; 1734 } 1735 1736 default: 1737 /* number */ 1738 if(isdigit(c)) { 1739 numbval = c-'0'; 1740 base = (c=='0')? 8: 10; 1741 for(c = Bgetrune(finput); isdigit(c); c = Bgetrune(finput)) 1742 numbval = numbval*base + (c-'0'); 1743 Bungetrune(finput); 1744 return NUMBER; 1745 } 1746 if(islower(c) || isupper(c) || c=='_' || c=='.' || c=='$') { 1747 i = 0; 1748 while(islower(c) || isupper(c) || isdigit(c) || 1749 c == '-' || c=='_' || c=='.' || c=='$') { 1750 if(reserve && isupper(c)) 1751 c += 'a'-'A'; 1752 rune = c; 1753 c = runetochar(&tokname[i], &rune); 1754 if(i < NAMESIZE) 1755 i += c; 1756 c = Bgetrune(finput); 1757 } 1758 } else 1759 return c; 1760 Bungetrune(finput); 1761 } 1762 tokname[i] = 0; 1763 1764 /* find a reserved word */ 1765 if(reserve) { 1766 for(c=0; resrv[c].name; c++) 1767 if(strcmp(tokname, resrv[c].name) == 0) 1768 return resrv[c].value; 1769 error("invalid escape, or illegal reserved word: %s", tokname); 1770 } 1771 1772 /* look ahead to distinguish IDENTIFIER from IDENTCOLON */ 1773 c = Bgetrune(finput); 1774 while(c == ' ' || c == '\t'|| c == '\n' || c == '\f' || c == '/') { 1775 if(c == '\n') 1776 peekline++; 1777 /* look for comments */ 1778 if(c == '/') 1779 peekline += skipcom(); 1780 c = Bgetrune(finput); 1781 } 1782 if(c == ':') 1783 return IDENTCOLON; 1784 Bungetrune(finput); 1785 return IDENTIFIER; 1786 } 1787 1788 /* 1789 * determine the type of a symbol 1790 */ 1791 int 1792 fdtype(int t) 1793 { 1794 int v; 1795 1796 if(t >= NTBASE) 1797 v = nontrst[t-NTBASE].value; 1798 else 1799 v = TYPE(toklev[t]); 1800 if(v <= 0) 1801 error("must specify type for %s", (t>=NTBASE)? 1802 nontrst[t-NTBASE].name: tokset[t].name); 1803 return v; 1804 } 1805 1806 int 1807 chfind(int t, char *s) 1808 { 1809 int i; 1810 1811 if(s[0] == ' ') 1812 t = 0; 1813 TLOOP(i) 1814 if(!strcmp(s, tokset[i].name)) 1815 return i; 1816 NTLOOP(i) 1817 if(!strcmp(s, nontrst[i].name)) 1818 return NTBASE+i; 1819 1820 /* cannot find name */ 1821 if(t > 1) 1822 error("%s should have been defined earlier", s); 1823 return defin(t, s); 1824 } 1825 1826 /* 1827 * copy the union declaration to the output, and the define file if present 1828 */ 1829 void 1830 cpyunion(void) 1831 { 1832 long c; 1833 int level; 1834 1835 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile); 1836 Bprint(ftable, "typedef union "); 1837 if(fdefine != 0) 1838 Bprint(fdefine, "\ntypedef union "); 1839 1840 level = 0; 1841 for(;;) { 1842 if((c=Bgetrune(finput)) == Beof) 1843 error("EOF encountered while processing %%union"); 1844 Bputrune(ftable, c); 1845 if(fdefine != 0) 1846 Bputrune(fdefine, c); 1847 switch(c) { 1848 case '\n': 1849 lineno++; 1850 break; 1851 case '{': 1852 level++; 1853 break; 1854 case '}': 1855 level--; 1856 1857 /* we are finished copying */ 1858 if(level == 0) { 1859 Bprint(ftable, " YYSTYPE;\n"); 1860 if(fdefine != 0) 1861 Bprint(fdefine, "\tYYSTYPE;\nextern\tYYSTYPE\tyylval;\n"); 1862 return; 1863 } 1864 } 1865 } 1866 } 1867 1868 /* 1869 * copies code between \{ and \} 1870 */ 1871 void 1872 cpycode(void) 1873 { 1874 1875 long c; 1876 1877 c = Bgetrune(finput); 1878 if(c == '\n') { 1879 c = Bgetrune(finput); 1880 lineno++; 1881 } 1882 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile); 1883 while(c != Beof) { 1884 if(c == '\\') { 1885 if((c=Bgetrune(finput)) == '}') 1886 return; 1887 Bputc(ftable, '\\'); 1888 } 1889 if(c == '%') { 1890 if((c=Bgetrune(finput)) == '}') 1891 return; 1892 Bputc(ftable, '%'); 1893 } 1894 Bputrune(ftable, c); 1895 if(c == '\n') 1896 lineno++; 1897 c = Bgetrune(finput); 1898 } 1899 error("eof before %%}"); 1900 } 1901 1902 /* 1903 * skip over comments 1904 * skipcom is called after reading a '/' 1905 */ 1906 int 1907 skipcom(void) 1908 { 1909 long c; 1910 int i; 1911 1912 /* i is the number of lines skipped */ 1913 i = 0; 1914 if(Bgetrune(finput) != '*') 1915 error("illegal comment"); 1916 c = Bgetrune(finput); 1917 while(c != Beof) { 1918 while(c == '*') 1919 if((c=Bgetrune(finput)) == '/') 1920 return i; 1921 if(c == '\n') 1922 i++; 1923 c = Bgetrune(finput); 1924 } 1925 error("EOF inside comment"); 1926 return 0; 1927 } 1928 1929 /* 1930 * copy C action to the next ; or closing } 1931 */ 1932 void 1933 cpyact(int offset) 1934 { 1935 long c; 1936 int brac, match, j, s, fnd, tok; 1937 1938 Bprint(faction, "\n#line\t%d\t\"%s\"\n", lineno, infile); 1939 brac = 0; 1940 1941 loop: 1942 c = Bgetrune(finput); 1943 swt: 1944 switch(c) { 1945 case ';': 1946 if(brac == 0) { 1947 Bputrune(faction, c); 1948 return; 1949 } 1950 goto lcopy; 1951 1952 case '{': 1953 brac++; 1954 goto lcopy; 1955 1956 case '$': 1957 s = 1; 1958 tok = -1; 1959 c = Bgetrune(finput); 1960 1961 /* type description */ 1962 if(c == '<') { 1963 Bungetrune(finput); 1964 if(gettok() != TYPENAME) 1965 error("bad syntax on $<ident> clause"); 1966 tok = numbval; 1967 c = Bgetrune(finput); 1968 } 1969 if(c == '$') { 1970 Bprint(faction, "yyval"); 1971 1972 /* put out the proper tag... */ 1973 if(ntypes) { 1974 if(tok < 0) 1975 tok = fdtype(*prdptr[nprod]); 1976 Bprint(faction, ".%s", typeset[tok]); 1977 } 1978 goto loop; 1979 } 1980 if(c == '-') { 1981 s = -s; 1982 c = Bgetrune(finput); 1983 } 1984 if(isdigit(c)) { 1985 j = 0; 1986 while(isdigit(c)) { 1987 j = j*10 + (c-'0'); 1988 c = Bgetrune(finput); 1989 } 1990 Bungetrune(finput); 1991 j = j*s - offset; 1992 if(j > 0) 1993 error("Illegal use of $%d", j+offset); 1994 1995 dollar: 1996 Bprint(faction, "yypt[-%d].yyv", -j); 1997 1998 /* put out the proper tag */ 1999 if(ntypes) { 2000 if(j+offset <= 0 && tok < 0) 2001 error("must specify type of $%d", j+offset); 2002 if(tok < 0) 2003 tok = fdtype(prdptr[nprod][j+offset]); 2004 Bprint(faction, ".%s", typeset[tok]); 2005 } 2006 goto loop; 2007 } 2008 if(isupper(c) || islower(c) || c == '_' || c == '.') { 2009 int tok; /* tok used oustide for type info */ 2010 2011 /* look for $name */ 2012 Bungetrune(finput); 2013 if(gettok() != IDENTIFIER) 2014 error("$ must be followed by an identifier"); 2015 tok = chfind(2, tokname); 2016 if((c = Bgetrune(finput)) != '#') { 2017 Bungetrune(finput); 2018 fnd = -1; 2019 } else 2020 if(gettok() != NUMBER) { 2021 error("# must be followed by number"); 2022 fnd = -1; 2023 } else 2024 fnd = numbval; 2025 for(j=1; j<=offset; ++j) 2026 if(tok == prdptr[nprod][j]) { 2027 if(--fnd <= 0) { 2028 j -= offset; 2029 goto dollar; 2030 } 2031 } 2032 error("$name or $name#number not found"); 2033 } 2034 Bputc(faction, '$'); 2035 if(s < 0 ) 2036 Bputc(faction, '-'); 2037 goto swt; 2038 2039 case '}': 2040 brac--; 2041 if(brac) 2042 goto lcopy; 2043 Bputrune(faction, c); 2044 return; 2045 2046 case '/': 2047 /* look for comments */ 2048 Bputrune(faction, c); 2049 c = Bgetrune(finput); 2050 if(c != '*') 2051 goto swt; 2052 2053 /* it really is a comment */ 2054 Bputrune(faction, c); 2055 c = Bgetrune(finput); 2056 while(c >= 0) { 2057 while(c == '*') { 2058 Bputrune(faction, c); 2059 if((c=Bgetrune(finput)) == '/') 2060 goto lcopy; 2061 } 2062 Bputrune(faction, c); 2063 if(c == '\n') 2064 lineno++; 2065 c = Bgetrune(finput); 2066 } 2067 error("EOF inside comment"); 2068 2069 case '\'': 2070 /* character constant */ 2071 match = '\''; 2072 goto string; 2073 2074 case '"': 2075 /* character string */ 2076 match = '"'; 2077 2078 string: 2079 Bputrune(faction, c); 2080 while(c = Bgetrune(finput)) { 2081 if(c == '\\') { 2082 Bputrune(faction, c); 2083 c = Bgetrune(finput); 2084 if(c == '\n') 2085 lineno++; 2086 } else 2087 if(c == match) 2088 goto lcopy; 2089 if(c == '\n') 2090 error("newline in string or char. const."); 2091 Bputrune(faction, c); 2092 } 2093 error("EOF in string or character constant"); 2094 2095 case Beof: 2096 error("action does not terminate"); 2097 2098 case '\n': 2099 lineno++; 2100 goto lcopy; 2101 } 2102 2103 lcopy: 2104 Bputrune(faction, c); 2105 goto loop; 2106 } 2107 2108 void 2109 openup(char *stem, int dflag, int vflag, int ytab, char *ytabc) 2110 { 2111 char buf[256]; 2112 2113 if(vflag) { 2114 snprint(buf, sizeof buf, "%s.%s", stem, FILEU); 2115 foutput = Bopen(buf, OWRITE); 2116 if(foutput == 0) 2117 error("cannot open %s", buf); 2118 } 2119 if(yydebug) { 2120 snprint(buf, sizeof buf, "%s.%s", stem, FILEDEBUG); 2121 if((fdebug = Bopen(buf, OWRITE)) == 0) 2122 error("can't open %s", buf); 2123 } 2124 if(dflag) { 2125 snprint(buf, sizeof buf, "%s.%s", stem, FILED); 2126 fdefine = Bopen(buf, OWRITE); 2127 if(fdefine == 0) 2128 error("can't create %s", buf); 2129 } 2130 if(ytab == 0) 2131 snprint(buf, sizeof buf, "%s.%s", stem, OFILE); 2132 else 2133 strecpy(buf, buf+sizeof buf, ytabc); 2134 ftable = Bopen(buf, OWRITE); 2135 if(ftable == 0) 2136 error("cannot open table file %s", buf); 2137 } 2138 2139 /* 2140 * print the output for the states 2141 */ 2142 void 2143 output(void) 2144 { 2145 int i, k, c; 2146 Wset *u, *v; 2147 2148 Bprint(ftable, "short yyexca[] =\n{"); 2149 if(fdebug) 2150 Bprint(fdebug, "char* yystates[] =\n{\n"); 2151 2152 /* output the stuff for state i */ 2153 SLOOP(i) { 2154 nolook = tystate[i]!=MUSTLOOKAHEAD; 2155 closure(i); 2156 2157 /* output actions */ 2158 nolook = 1; 2159 aryfil(temp1, ntokens+nnonter+1, 0); 2160 WSLOOP(wsets, u) { 2161 c = *(u->pitem); 2162 if(c > 1 && c < NTBASE && temp1[c] == 0) { 2163 WSLOOP(u, v) 2164 if(c == *(v->pitem)) 2165 putitem(v->pitem+1, (Lkset*)0); 2166 temp1[c] = state(c); 2167 } else 2168 if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0) 2169 temp1[c+ntokens] = amem[indgo[i]+c]; 2170 } 2171 if(i == 1) 2172 temp1[1] = ACCEPTCODE; 2173 2174 /* now, we have the shifts; look at the reductions */ 2175 lastred = 0; 2176 WSLOOP(wsets, u) { 2177 c = *u->pitem; 2178 2179 /* reduction */ 2180 if(c <= 0) { 2181 lastred = -c; 2182 TLOOP(k) 2183 if(BIT(u->ws.lset, k)) { 2184 if(temp1[k] == 0) 2185 temp1[k] = c; 2186 else 2187 if(temp1[k] < 0) { /* reduce/reduce conflict */ 2188 if(foutput) 2189 Bprint(foutput, 2190 "\n%d: reduce/reduce conflict" 2191 " (red'ns %d and %d ) on %s", 2192 i, -temp1[k], lastred, 2193 symnam(k)); 2194 if(-temp1[k] > lastred) 2195 temp1[k] = -lastred; 2196 zzrrconf++; 2197 } else 2198 /* potential shift/reduce conflict */ 2199 precftn( lastred, k, i ); 2200 } 2201 } 2202 } 2203 wract(i); 2204 } 2205 2206 if(fdebug) 2207 Bprint(fdebug, "};\n"); 2208 Bprint(ftable, "};\n"); 2209 Bprint(ftable, "#define YYNPROD %d\n", nprod); 2210 Bprint(ftable, "#define YYPRIVATE %d\n", PRIVATE); 2211 if(yydebug) 2212 Bprint(ftable, "#define yydebug %s\n", yydebug); 2213 } 2214 2215 /* 2216 * pack state i from temp1 into amem 2217 */ 2218 int 2219 apack(int *p, int n) 2220 { 2221 int *pp, *qq, *rr, off, *q, *r; 2222 2223 /* we don't need to worry about checking because 2224 * we will only look at entries known to be there... 2225 * eliminate leading and trailing 0's 2226 */ 2227 2228 q = p+n; 2229 for(pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off) 2230 ; 2231 /* no actions */ 2232 if(pp > q) 2233 return 0; 2234 p = pp; 2235 2236 /* now, find a place for the elements from p to q, inclusive */ 2237 r = &amem[ACTSIZE-1]; 2238 for(rr = amem; rr <= r; rr++, off++) { 2239 for(qq = rr, pp = p; pp <= q; pp++, qq++) 2240 if(*pp != 0) 2241 if(*pp != *qq && *qq != 0) 2242 goto nextk; 2243 2244 /* we have found an acceptable k */ 2245 if(pkdebug && foutput != 0) 2246 Bprint(foutput, "off = %d, k = %d\n", off, (int)(rr-amem)); 2247 for(qq = rr, pp = p; pp <= q; pp++, qq++) 2248 if(*pp) { 2249 if(qq > r) 2250 error("action table overflow"); 2251 if(qq > memp) 2252 memp = qq; 2253 *qq = *pp; 2254 } 2255 if(pkdebug && foutput != 0) 2256 for(pp = amem; pp <= memp; pp += 10) { 2257 Bprint(foutput, "\t"); 2258 for(qq = pp; qq <= pp+9; qq++) 2259 Bprint(foutput, "%d ", *qq); 2260 Bprint(foutput, "\n"); 2261 } 2262 return(off); 2263 nextk:; 2264 } 2265 error("no space in action table"); 2266 return 0; 2267 } 2268 2269 /* 2270 * output the gotos for the nontermninals 2271 */ 2272 void 2273 go2out(void) 2274 { 2275 int i, j, k, best, count, cbest, times; 2276 2277 /* mark begining of gotos */ 2278 Bprint(ftemp, "$\n"); 2279 for(i = 1; i <= nnonter; i++) { 2280 go2gen(i); 2281 2282 /* find the best one to make default */ 2283 best = -1; 2284 times = 0; 2285 2286 /* is j the most frequent */ 2287 for(j = 0; j <= nstate; j++) { 2288 if(tystate[j] == 0) 2289 continue; 2290 if(tystate[j] == best) 2291 continue; 2292 2293 /* is tystate[j] the most frequent */ 2294 count = 0; 2295 cbest = tystate[j]; 2296 for(k = j; k <= nstate; k++) 2297 if(tystate[k] == cbest) 2298 count++; 2299 if(count > times) { 2300 best = cbest; 2301 times = count; 2302 } 2303 } 2304 2305 /* best is now the default entry */ 2306 zzgobest += times-1; 2307 for(j = 0; j <= nstate; j++) 2308 if(tystate[j] != 0 && tystate[j] != best) { 2309 Bprint(ftemp, "%d,%d,", j, tystate[j]); 2310 zzgoent++; 2311 } 2312 2313 /* now, the default */ 2314 if(best == -1) 2315 best = 0; 2316 zzgoent++; 2317 Bprint(ftemp, "%d\n", best); 2318 } 2319 } 2320 2321 /* 2322 * output the gotos for nonterminal c 2323 */ 2324 void 2325 go2gen(int c) 2326 { 2327 int i, work, cc; 2328 Item *p, *q; 2329 2330 2331 /* first, find nonterminals with gotos on c */ 2332 aryfil(temp1, nnonter+1, 0); 2333 temp1[c] = 1; 2334 work = 1; 2335 while(work) { 2336 work = 0; 2337 PLOOP(0, i) 2338 2339 /* cc is a nonterminal */ 2340 if((cc=prdptr[i][1]-NTBASE) >= 0) 2341 /* cc has a goto on c */ 2342 if(temp1[cc] != 0) { 2343 2344 /* thus, the left side of production i does too */ 2345 cc = *prdptr[i]-NTBASE; 2346 if(temp1[cc] == 0) { 2347 work = 1; 2348 temp1[cc] = 1; 2349 } 2350 } 2351 } 2352 2353 /* now, we have temp1[c] = 1 if a goto on c in closure of cc */ 2354 if(g2debug && foutput != 0) { 2355 Bprint(foutput, "%s: gotos on ", nontrst[c].name); 2356 NTLOOP(i) 2357 if(temp1[i]) 2358 Bprint(foutput, "%s ", nontrst[i].name); 2359 Bprint(foutput, "\n"); 2360 } 2361 2362 /* now, go through and put gotos into tystate */ 2363 aryfil(tystate, nstate, 0); 2364 SLOOP(i) 2365 ITMLOOP(i, p, q) 2366 if((cc = *p->pitem) >= NTBASE) 2367 /* goto on c is possible */ 2368 if(temp1[cc-NTBASE]) { 2369 tystate[i] = amem[indgo[i]+c]; 2370 break; 2371 } 2372 } 2373 2374 /* 2375 * decide a shift/reduce conflict by precedence. 2376 * r is a rule number, t a token number 2377 * the conflict is in state s 2378 * temp1[t] is changed to reflect the action 2379 */ 2380 void 2381 precftn(int r, int t, int s) 2382 { 2383 int lp, lt, action; 2384 2385 lp = levprd[r]; 2386 lt = toklev[t]; 2387 if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) { 2388 2389 /* conflict */ 2390 if(foutput != 0) 2391 Bprint(foutput, 2392 "\n%d: shift/reduce conflict (shift %d(%d), red'n %d(%d)) on %s", 2393 s, temp1[t], PLEVEL(lt), r, PLEVEL(lp), symnam(t)); 2394 zzsrconf++; 2395 return; 2396 } 2397 if(PLEVEL(lt) == PLEVEL(lp)) 2398 action = ASSOC(lt); 2399 else 2400 if(PLEVEL(lt) > PLEVEL(lp)) 2401 action = RASC; /* shift */ 2402 else 2403 action = LASC; /* reduce */ 2404 switch(action) { 2405 case BASC: /* error action */ 2406 temp1[t] = ERRCODE; 2407 break; 2408 case LASC: /* reduce */ 2409 temp1[t] = -r; 2410 break; 2411 } 2412 } 2413 2414 /* 2415 * output state i 2416 * temp1 has the actions, lastred the default 2417 */ 2418 void 2419 wract(int i) 2420 { 2421 int p, p0, p1, ntimes, tred, count, j, flag; 2422 2423 /* find the best choice for lastred */ 2424 lastred = 0; 2425 ntimes = 0; 2426 TLOOP(j) { 2427 if(temp1[j] >= 0) 2428 continue; 2429 if(temp1[j]+lastred == 0) 2430 continue; 2431 /* count the number of appearances of temp1[j] */ 2432 count = 0; 2433 tred = -temp1[j]; 2434 levprd[tred] |= REDFLAG; 2435 TLOOP(p) 2436 if(temp1[p]+tred == 0) 2437 count++; 2438 if(count > ntimes) { 2439 lastred = tred; 2440 ntimes = count; 2441 } 2442 } 2443 2444 /* 2445 * for error recovery, arrange that, if there is a shift on the 2446 * error recovery token, `error', that the default be the error action 2447 */ 2448 if(temp1[2] > 0) 2449 lastred = 0; 2450 2451 /* clear out entries in temp1 which equal lastred */ 2452 TLOOP(p) 2453 if(temp1[p]+lastred == 0) 2454 temp1[p] = 0; 2455 2456 wrstate(i); 2457 defact[i] = lastred; 2458 flag = 0; 2459 TLOOP(p0) 2460 if((p1=temp1[p0]) != 0) { 2461 if(p1 < 0) { 2462 p1 = -p1; 2463 goto exc; 2464 } 2465 if(p1 == ACCEPTCODE) { 2466 p1 = -1; 2467 goto exc; 2468 } 2469 if(p1 == ERRCODE) { 2470 p1 = 0; 2471 exc: 2472 if(flag++ == 0) 2473 Bprint(ftable, "-1, %d,\n", i); 2474 Bprint(ftable, "\t%d, %d,\n", p0, p1); 2475 zzexcp++; 2476 continue; 2477 } 2478 Bprint(ftemp, "%d,%d,", p0, p1); 2479 zzacent++; 2480 } 2481 if(flag) { 2482 defact[i] = -2; 2483 Bprint(ftable, "\t-2, %d,\n", lastred); 2484 } 2485 Bprint(ftemp, "\n"); 2486 } 2487 2488 /* 2489 * writes state i 2490 */ 2491 void 2492 wrstate(int i) 2493 { 2494 int j0, j1; 2495 Item *pp, *qq; 2496 Wset *u; 2497 2498 if(fdebug) { 2499 if(lastred) { 2500 Bprint(fdebug, " 0, /*%d*/\n", i); 2501 } else { 2502 Bprint(fdebug, " \""); 2503 ITMLOOP(i, pp, qq) 2504 Bprint(fdebug, "%s\\n", writem(pp->pitem)); 2505 if(tystate[i] == MUSTLOOKAHEAD) 2506 WSLOOP(wsets + (pstate[i+1] - pstate[i]), u) 2507 if(*u->pitem < 0) 2508 Bprint(fdebug, "%s\\n", writem(u->pitem)); 2509 Bprint(fdebug, "\", /*%d*/\n", i); 2510 } 2511 } 2512 if(foutput == 0) 2513 return; 2514 Bprint(foutput, "\nstate %d\n", i); 2515 ITMLOOP(i, pp, qq) 2516 Bprint(foutput, "\t%s\n", writem(pp->pitem)); 2517 if(tystate[i] == MUSTLOOKAHEAD) 2518 /* print out empty productions in closure */ 2519 WSLOOP(wsets+(pstate[i+1]-pstate[i]), u) 2520 if(*u->pitem < 0) 2521 Bprint(foutput, "\t%s\n", writem(u->pitem)); 2522 2523 /* check for state equal to another */ 2524 TLOOP(j0) 2525 if((j1=temp1[j0]) != 0) { 2526 Bprint(foutput, "\n\t%s ", symnam(j0)); 2527 /* shift, error, or accept */ 2528 if(j1 > 0) { 2529 if(j1 == ACCEPTCODE) 2530 Bprint(foutput, "accept"); 2531 else 2532 if(j1 == ERRCODE) 2533 Bprint(foutput, "error"); 2534 else 2535 Bprint(foutput, "shift %d", j1); 2536 } else 2537 Bprint(foutput, "reduce %d (src line %d)", -j1, rlines[-j1]); 2538 } 2539 2540 /* output the final production */ 2541 if(lastred) 2542 Bprint(foutput, "\n\t. reduce %d (src line %d)\n\n", 2543 lastred, rlines[lastred]); 2544 else 2545 Bprint(foutput, "\n\t. error\n\n"); 2546 2547 /* now, output nonterminal actions */ 2548 j1 = ntokens; 2549 for(j0 = 1; j0 <= nnonter; j0++) { 2550 j1++; 2551 if(temp1[j1]) 2552 Bprint(foutput, "\t%s goto %d\n", symnam(j0+NTBASE), temp1[j1]); 2553 } 2554 } 2555 2556 void 2557 warray(char *s, int *v, int n) 2558 { 2559 int i; 2560 2561 Bprint(ftable, "short %s[] =\n{", s); 2562 for(i=0;;) { 2563 if(i%10 == 0) 2564 Bprint(ftable, "\n"); 2565 Bprint(ftable, "%4d", v[i]); 2566 i++; 2567 if(i >= n) { 2568 Bprint(ftable, "\n};\n"); 2569 break; 2570 } 2571 Bprint(ftable, ","); 2572 } 2573 } 2574 2575 /* 2576 * in order to free up the mem and amem arrays for the optimizer, 2577 * and still be able to output yyr1, etc., after the sizes of 2578 * the action array is known, we hide the nonterminals 2579 * derived by productions in levprd. 2580 */ 2581 2582 void 2583 hideprod(void) 2584 { 2585 int i, j; 2586 2587 j = 0; 2588 levprd[0] = 0; 2589 PLOOP(1, i) { 2590 if(!(levprd[i] & REDFLAG)) { 2591 j++; 2592 if(foutput != 0) 2593 Bprint(foutput, "Rule not reduced: %s\n", writem(prdptr[i])); 2594 } 2595 levprd[i] = *prdptr[i] - NTBASE; 2596 } 2597 if(j) 2598 print("%d rules never reduced\n", j); 2599 } 2600 2601 void 2602 callopt(void) 2603 { 2604 int i, *p, j, k, *q; 2605 2606 /* read the arrays from tempfile and set parameters */ 2607 finput = Bopen(tempname, OREAD); 2608 if(finput == 0) 2609 error("optimizer cannot open tempfile"); 2610 2611 pgo[0] = 0; 2612 temp1[0] = 0; 2613 nstate = 0; 2614 nnonter = 0; 2615 for(;;) { 2616 switch(gtnm()) { 2617 case '\n': 2618 nstate++; 2619 pmem--; 2620 temp1[nstate] = pmem - mem0; 2621 case ',': 2622 continue; 2623 case '$': 2624 break; 2625 default: 2626 error("bad tempfile"); 2627 } 2628 break; 2629 } 2630 2631 pmem--; 2632 temp1[nstate] = yypgo[0] = pmem - mem0; 2633 for(;;) { 2634 switch(gtnm()) { 2635 case '\n': 2636 nnonter++; 2637 yypgo[nnonter] = pmem-mem0; 2638 case ',': 2639 continue; 2640 case -1: 2641 break; 2642 default: 2643 error("bad tempfile"); 2644 } 2645 break; 2646 } 2647 pmem--; 2648 yypgo[nnonter--] = pmem - mem0; 2649 for(i = 0; i < nstate; i++) { 2650 k = 32000; 2651 j = 0; 2652 q = mem0 + temp1[i+1]; 2653 for(p = mem0 + temp1[i]; p < q ; p += 2) { 2654 if(*p > j) 2655 j = *p; 2656 if(*p < k) 2657 k = *p; 2658 } 2659 /* nontrivial situation */ 2660 if(k <= j) { 2661 /* j is now the range */ 2662 /* j -= k; /* call scj */ 2663 if(k > maxoff) 2664 maxoff = k; 2665 } 2666 tystate[i] = (temp1[i+1]-temp1[i]) + 2*j; 2667 if(j > maxspr) 2668 maxspr = j; 2669 } 2670 2671 /* initialize ggreed table */ 2672 for(i = 1; i <= nnonter; i++) { 2673 ggreed[i] = 1; 2674 j = 0; 2675 2676 /* minimum entry index is always 0 */ 2677 q = mem0 + yypgo[i+1] - 1; 2678 for(p = mem0+yypgo[i]; p < q ; p += 2) { 2679 ggreed[i] += 2; 2680 if(*p > j) 2681 j = *p; 2682 } 2683 ggreed[i] = ggreed[i] + 2*j; 2684 if(j > maxoff) 2685 maxoff = j; 2686 } 2687 2688 /* now, prepare to put the shift actions into the amem array */ 2689 for(i = 0; i < ACTSIZE; i++) 2690 amem[i] = 0; 2691 maxa = amem; 2692 for(i = 0; i < nstate; i++) { 2693 if(tystate[i] == 0 && adb > 1) 2694 Bprint(ftable, "State %d: null\n", i); 2695 indgo[i] = YYFLAG1; 2696 } 2697 while((i = nxti()) != NOMORE) 2698 if(i >= 0) 2699 stin(i); 2700 else 2701 gin(-i); 2702 2703 /* print amem array */ 2704 if(adb > 2 ) 2705 for(p = amem; p <= maxa; p += 10) { 2706 Bprint(ftable, "%4d ", (int)(p-amem)); 2707 for(i = 0; i < 10; ++i) 2708 Bprint(ftable, "%4d ", p[i]); 2709 Bprint(ftable, "\n"); 2710 } 2711 2712 /* write out the output appropriate to the language */ 2713 aoutput(); 2714 osummary(); 2715 ZAPFILE(tempname); 2716 } 2717 2718 void 2719 gin(int i) 2720 { 2721 int *p, *r, *s, *q1, *q2; 2722 2723 /* enter gotos on nonterminal i into array amem */ 2724 ggreed[i] = 0; 2725 2726 q2 = mem0+ yypgo[i+1] - 1; 2727 q1 = mem0 + yypgo[i]; 2728 2729 /* now, find amem place for it */ 2730 for(p = amem; p < &amem[ACTSIZE]; p++) { 2731 if(*p) 2732 continue; 2733 for(r = q1; r < q2; r += 2) { 2734 s = p + *r + 1; 2735 if(*s) 2736 goto nextgp; 2737 if(s > maxa) 2738 if((maxa = s) > &amem[ACTSIZE]) 2739 error("a array overflow"); 2740 } 2741 /* we have found amem spot */ 2742 *p = *q2; 2743 if(p > maxa) 2744 if((maxa = p) > &amem[ACTSIZE]) 2745 error("a array overflow"); 2746 for(r = q1; r < q2; r += 2) { 2747 s = p + *r + 1; 2748 *s = r[1]; 2749 } 2750 pgo[i] = p-amem; 2751 if(adb > 1) 2752 Bprint(ftable, "Nonterminal %d, entry at %d\n", i, pgo[i]); 2753 return; 2754 2755 nextgp:; 2756 } 2757 error("cannot place goto %d\n", i); 2758 } 2759 2760 void 2761 stin(int i) 2762 { 2763 int *r, *s, n, flag, j, *q1, *q2; 2764 2765 tystate[i] = 0; 2766 2767 /* enter state i into the amem array */ 2768 q2 = mem0+temp1[i+1]; 2769 q1 = mem0+temp1[i]; 2770 /* find an acceptable place */ 2771 for(n = -maxoff; n < ACTSIZE; n++) { 2772 flag = 0; 2773 for(r = q1; r < q2; r += 2) { 2774 if((s = *r + n + amem) < amem) 2775 goto nextn; 2776 if(*s == 0) 2777 flag++; 2778 else 2779 if(*s != r[1]) 2780 goto nextn; 2781 } 2782 2783 /* check that the position equals another only if the states are identical */ 2784 for(j=0; j<nstate; j++) { 2785 if(indgo[j] == n) { 2786 2787 /* we have some disagreement */ 2788 if(flag) 2789 goto nextn; 2790 if(temp1[j+1]+temp1[i] == temp1[j]+temp1[i+1]) { 2791 2792 /* states are equal */ 2793 indgo[i] = n; 2794 if(adb > 1) 2795 Bprint(ftable, 2796 "State %d: entry at %d equals state %d\n", 2797 i, n, j); 2798 return; 2799 } 2800 2801 /* we have some disagreement */ 2802 goto nextn; 2803 } 2804 } 2805 2806 for(r = q1; r < q2; r += 2) { 2807 if((s = *r+n+amem) >= &amem[ACTSIZE]) 2808 error("out of space in optimizer a array"); 2809 if(s > maxa) 2810 maxa = s; 2811 if(*s != 0 && *s != r[1]) 2812 error("clobber of a array, pos'n %d, by %d", s-amem, r[1]); 2813 *s = r[1]; 2814 } 2815 indgo[i] = n; 2816 if(adb > 1) 2817 Bprint(ftable, "State %d: entry at %d\n", i, indgo[i]); 2818 return; 2819 nextn:; 2820 } 2821 error("Error; failure to place state %d\n", i); 2822 } 2823 2824 /* 2825 * finds the next i 2826 */ 2827 int 2828 nxti(void) 2829 { 2830 int i, max, maxi; 2831 2832 max = 0; 2833 maxi = 0; 2834 for(i = 1; i <= nnonter; i++) 2835 if(ggreed[i] >= max) { 2836 max = ggreed[i]; 2837 maxi = -i; 2838 } 2839 for(i = 0; i < nstate; ++i) 2840 if(tystate[i] >= max) { 2841 max = tystate[i]; 2842 maxi = i; 2843 } 2844 if(nxdb) 2845 Bprint(ftable, "nxti = %d, max = %d\n", maxi, max); 2846 if(max == 0) 2847 return NOMORE; 2848 return maxi; 2849 } 2850 2851 /* 2852 * write summary 2853 */ 2854 void 2855 osummary(void) 2856 { 2857 2858 int i, *p; 2859 2860 if(foutput == 0) 2861 return; 2862 i = 0; 2863 for(p = maxa; p >= amem; p--) 2864 if(*p == 0) 2865 i++; 2866 2867 Bprint(foutput, "Optimizer space used: input %d/%d, output %d/%d\n", 2868 (int)(pmem-mem0+1), MEMSIZE, (int)(maxa-amem+1), ACTSIZE); 2869 Bprint(foutput, "%d table entries, %d zero\n", (int)(maxa-amem+1), i); 2870 Bprint(foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff); 2871 } 2872 2873 /* 2874 * this version is for C 2875 * write out the optimized parser 2876 */ 2877 void 2878 aoutput(void) 2879 { 2880 Bprint(ftable, "#define\tYYLAST\t%d\n", (int)(maxa-amem+1)); 2881 arout("yyact", amem, (maxa-amem)+1); 2882 arout("yypact", indgo, nstate); 2883 arout("yypgo", pgo, nnonter+1); 2884 } 2885 2886 void 2887 arout(char *s, int *v, int n) 2888 { 2889 int i; 2890 2891 Bprint(ftable, "short %s[] =\n{", s); 2892 for(i = 0; i < n;) { 2893 if(i%10 == 0) 2894 Bprint(ftable, "\n"); 2895 Bprint(ftable, "%4d", v[i]); 2896 i++; 2897 if(i == n) 2898 Bprint(ftable, "\n};\n"); 2899 else 2900 Bprint(ftable, ","); 2901 } 2902 } 2903 2904 /* 2905 * read and convert an integer from the standard input 2906 * return the terminating character 2907 * blanks, tabs, and newlines are ignored 2908 */ 2909 int 2910 gtnm(void) 2911 { 2912 int sign, val, c; 2913 2914 sign = 0; 2915 val = 0; 2916 while((c=Bgetrune(finput)) != Beof) { 2917 if(isdigit(c)) { 2918 val = val*10 + c-'0'; 2919 continue; 2920 } 2921 if(c == '-') { 2922 sign = 1; 2923 continue; 2924 } 2925 break; 2926 } 2927 if(sign) 2928 val = -val; 2929 *pmem++ = val; 2930 if(pmem >= &mem0[MEMSIZE]) 2931 error("out of space"); 2932 return c; 2933 } 2934