1implement Yacc; 2 3include "sys.m"; 4 sys: Sys; 5 print, fprint, sprint: import sys; 6 UTFmax: import Sys; 7 8include "bufio.m"; 9 bufio: Bufio; 10 Iobuf: import bufio; 11 12include "draw.m"; 13 14Yacc: module 15{ 16 init: fn(ctxt: ref Draw->Context, argv: list of string); 17}; 18 19Arg: adt 20{ 21 argv: list of string; 22 c: int; 23 opts: string; 24 25 init: fn(argv: list of string): ref Arg; 26 opt: fn(arg: self ref Arg): int; 27 arg: fn(arg: self ref Arg): string; 28}; 29 30PARSER: con "/lib/yaccpar"; 31OFILE: con "tab.b"; 32FILEU: con "output"; 33FILED: con "tab.m"; 34FILEDEBUG: con "debug"; 35 36# the following are adjustable 37# according to memory size 38ACTSIZE: con 30000; 39NSTATES: con 2000; 40TEMPSIZE: con 2000; 41 42SYMINC: con 50; # increase for non-term or term 43RULEINC: con 50; # increase for max rule length prodptr[i] 44PRODINC: con 100; # increase for productions prodptr 45WSETINC: con 50; # increase for working sets wsets 46STATEINC: con 200; # increase for states statemem 47 48NAMESIZE: con 50; 49NTYPES: con 63; 50ISIZE: con 400; 51 52PRIVATE: con 16rE000; # unicode private use 53 54# relationships which must hold: 55# TEMPSIZE >= NTERMS + NNONTERM + 1 56# TEMPSIZE >= NSTATES 57# 58 59NTBASE: con 8r10000; 60ERRCODE: con 8190; 61ACCEPTCODE: con 8191; 62YYLEXUNK: con 3; 63TOKSTART: con 4; #index of first defined token 64 65# no, left, right, binary assoc. 66NOASC, LASC, RASC, BASC: con iota; 67 68# flags for state generation 69DONE, MUSTDO, MUSTLOOKAHEAD: con iota; 70 71# flags for a rule having an action, and being reduced 72ACTFLAG: con 16r4; 73REDFLAG: con 16r8; 74 75# output parser flags 76YYFLAG1: con -1000; 77 78# parse tokens 79IDENTIFIER, MARK, TERM, LEFT, RIGHT, BINARY, PREC, LCURLY, IDENTCOLON, NUMBER, START, TYPEDEF, TYPENAME, MODULE: con PRIVATE+iota; 80 81ENDFILE: con 0; 82 83EMPTY: con 1; 84WHOKNOWS: con 0; 85OK: con 1; 86NOMORE: con -1000; 87 88# macros for getting associativity and precedence levels 89ASSOC(i: int): int 90{ 91 return i & 3; 92} 93 94PLEVEL(i: int): int 95{ 96 return (i >> 4) & 16r3f; 97} 98 99TYPE(i: int): int 100{ 101 return (i >> 10) & 16r3f; 102} 103 104# macros for setting associativity and precedence levels 105SETASC(i, j: int): int 106{ 107 return i | j; 108} 109 110SETPLEV(i, j: int): int 111{ 112 return i | (j << 4); 113} 114 115SETTYPE(i, j: int): int 116{ 117 return i | (j << 10); 118} 119 120# I/O descriptors 121stderr: ref Sys->FD; 122fdefine: ref Iobuf; # file for module definition 123fdebug: ref Iobuf; # y.debug for strings for debugging 124ftable: ref Iobuf; # y.tab.c file 125finput: ref Iobuf; # input file 126foutput: ref Iobuf; # y.output file 127 128CodeData, CodeMod, CodeAct: con iota; 129NCode: con 8192; 130 131Code: adt 132{ 133 kind: int; 134 data: array of byte; 135 ndata: int; 136 next: cyclic ref Code; 137}; 138 139codehead: ref Code; 140codetail: ref Code; 141 142modname: string; # name of module 143suppressmod: int; # suppress module definition 144stacksize := 200; 145 146# communication variables between various I/O routines 147infile: string; # input file name 148numbval: int; # value of an input number 149tokname: string; # input token name, slop for runes and 0 150 151# structure declarations 152Lkset: type array of int; 153 154Pitem: adt 155{ 156 prod: array of int; 157 off: int; # offset within the production 158 first: int; # first term or non-term in item 159 prodno: int; # production number for sorting 160}; 161 162Item: adt 163{ 164 pitem: Pitem; 165 look: Lkset; 166}; 167 168Symb: adt 169{ 170 name: string; 171 value: int; 172}; 173 174Wset: adt 175{ 176 pitem: Pitem; 177 flag: int; 178 ws: Lkset; 179}; 180 181 # storage of names 182 183parser := PARSER; 184yydebug: string; 185 186 # storage of types 187ntypes: int; # number of types defined 188typeset := array[NTYPES] of string; # pointers to type tags 189 190 # token information 191 192ntokens := 0; # number of tokens 193tokset: array of Symb; 194toklev: array of int; # vector with the precedence of the terminals 195 196 # nonterminal information 197 198nnonter := -1; # the number of nonterminals 199nontrst: array of Symb; 200start: int; # start symbol 201 202 # state information 203 204nstate := 0; # number of states 205pstate := array[NSTATES+2] of int; # index into statemem to the descriptions of the states 206statemem : array of Item; 207tystate := array[NSTATES] of int; # contains type information about the states 208tstates : array of int; # states generated by terminal gotos 209ntstates : array of int; # states generated by nonterminal gotos 210mstates := array[NSTATES] of {* => 0}; # chain of overflows of term/nonterm generation lists 211lastred: int; # number of last reduction of a state 212defact := array[NSTATES] of int; # default actions of states 213 214 # lookahead set information 215 216lkst: array of Lkset; 217nolook := 0; # flag to turn off lookahead computations 218tbitset := 0; # size of lookahead sets 219clset: Lkset; # temporary storage for lookahead computations 220 221 # working set information 222 223wsets: array of Wset; 224cwp: int; 225 226 # storage for action table 227 228amem: array of int; # action table storage 229memp: int; # next free action table position 230indgo := array[NSTATES] of int; # index to the stored goto table 231 232 # temporary vector, indexable by states, terms, or ntokens 233 234temp1 := array[TEMPSIZE] of int; # temporary storage, indexed by terms + ntokens or states 235lineno := 1; # current input line number 236fatfl := 1; # if on, error is fatal 237nerrors := 0; # number of errors 238 239 # assigned token type values 240extval := 0; 241 242ytabc := OFILE; # name of y.tab.c 243 244 # grammar rule information 245 246nprod := 1; # number of productions 247prdptr: array of array of int; # pointers to descriptions of productions 248levprd: array of int; # precedence levels for the productions 249rlines: array of int; # line number for this rule 250 251 252 # statistics collection variables 253 254zzgoent := 0; 255zzgobest := 0; 256zzacent := 0; 257zzexcp := 0; 258zzclose := 0; 259zzrrconf := 0; 260zzsrconf := 0; 261zzstate := 0; 262 263 # optimizer arrays 264yypgo: array of array of int; 265optst: array of array of int; 266ggreed: array of int; 267pgo: array of int; 268 269maxspr: int; # maximum spread of any entry 270maxoff: int; # maximum offset into a array 271maxa: int; 272 273 # storage for information about the nonterminals 274 275pres: array of array of array of int; # vector of pointers to productions yielding each nonterminal 276pfirst: array of Lkset; 277pempty: array of int; # vector of nonterminals nontrivially deriving e 278 # random stuff picked out from between functions 279 280indebug := 0; # debugging flag for cpfir 281pidebug := 0; # debugging flag for putitem 282gsdebug := 0; # debugging flag for stagen 283cldebug := 0; # debugging flag for closure 284pkdebug := 0; # debugging flag for apack 285g2debug := 0; # debugging for go2gen 286adb := 0; # debugging for callopt 287 288Resrv : adt 289{ 290 name: string; 291 value: int; 292}; 293 294resrv := array[] of { 295 Resrv("binary", BINARY), 296 Resrv("module", MODULE), 297 Resrv("left", LEFT), 298 Resrv("nonassoc", BINARY), 299 Resrv("prec", PREC), 300 Resrv("right", RIGHT), 301 Resrv("start", START), 302 Resrv("term", TERM), 303 Resrv("token", TERM), 304 Resrv("type", TYPEDEF),}; 305 306zznewstate := 0; 307 308init(nil: ref Draw->Context, argv: list of string) 309{ 310 sys = load Sys Sys->PATH; 311 bufio = load Bufio Bufio->PATH; 312 313 stderr = sys->fildes(2); 314 315 setup(argv); # initialize and read productions 316 317 tbitset = (ntokens+32)/32; 318 cpres(); # make table of which productions yield a given nonterminal 319 cempty(); # make a table of which nonterminals can match the empty string 320 cpfir(); # make a table of firsts of nonterminals 321 322 stagen(); # generate the states 323 324 yypgo = array[nnonter+1] of array of int; 325 optst = array[nstate] of array of int; 326 output(); # write the states and the tables 327 go2out(); 328 329 hideprod(); 330 summary(); 331 332 callopt(); 333 334 others(); 335 336 if(fdefine != nil) 337 fdefine.close(); 338 if(fdebug != nil) 339 fdebug.close(); 340 if(ftable != nil) 341 ftable.close(); 342 if(foutput != nil) 343 foutput.close(); 344} 345 346setup(argv: list of string) 347{ 348 j, ty: int; 349 350 ytab := 0; 351 vflag := 0; 352 dflag := 0; 353 stem := 0; 354 stemc := "y"; 355 foutput = nil; 356 fdefine = nil; 357 fdebug = nil; 358 arg := Arg.init(argv); 359 while(c := arg.opt()){ 360 case c{ 361 'v' or 'V' => 362 vflag++; 363 'D' => 364 yydebug = arg.arg(); 365 'd' => 366 dflag++; 367 'n' => 368 stacksize = int arg.arg(); 369 'o' => 370 ytab++; 371 ytabc = arg.arg(); 372 's' => 373 stem++; 374 stemc = arg.arg(); 375 'm' => 376 suppressmod++; 377 * => 378 usage(); 379 } 380 } 381 argv = arg.argv; 382 if(len argv != 1) 383 usage(); 384 if (suppressmod && dflag) { 385 sys->fprint(stderr, "yacc: -m and -d are exclusive\n"); 386 usage(); 387 } 388 if (stacksize < 1) { 389 sys->fprint(stderr, "yacc: stack size too small\n"); 390 usage(); 391 } 392 infile = hd argv; 393 finput = bufio->open(infile, Bufio->OREAD); 394 if(finput == nil) 395 error("cannot open '"+infile+"'"); 396 397 openup(stemc, dflag, vflag, ytab, ytabc); 398 399 defin(0, "$end"); 400 extval = PRIVATE; # tokens start in unicode 'private use' 401 defin(0, "error"); 402 defin(1, "$accept"); 403 defin(0, "$unk"); 404 i := 0; 405 406 for(t := gettok(); t != MARK && t != ENDFILE; ) 407 case t { 408 ';' => 409 t = gettok(); 410 411 START => 412 if(gettok() != IDENTIFIER) 413 error("bad %%start construction"); 414 start = chfind(1, tokname); 415 t = gettok(); 416 417 TYPEDEF => 418 if(gettok() != TYPENAME) 419 error("bad syntax in %%type"); 420 ty = numbval; 421 for(;;) { 422 t = gettok(); 423 case t { 424 IDENTIFIER => 425 if((t=chfind(1, tokname)) < NTBASE) { 426 j = TYPE(toklev[t]); 427 if(j != 0 && j != ty) 428 error("type redeclaration of token "+ 429 tokset[t].name); 430 else 431 toklev[t] = SETTYPE(toklev[t], ty); 432 } else { 433 j = nontrst[t-NTBASE].value; 434 if(j != 0 && j != ty) 435 error("type redeclaration of nonterminal "+ 436 nontrst[t-NTBASE].name); 437 else 438 nontrst[t-NTBASE].value = ty; 439 } 440 continue; 441 ',' => 442 continue; 443 ';' => 444 t = gettok(); 445 } 446 break; 447 } 448 449 MODULE => 450 cpymodule(); 451 t = gettok(); 452 453 LEFT or BINARY or RIGHT or TERM => 454 # nonzero means new prec. and assoc. 455 lev := t-TERM; 456 if(lev) 457 i++; 458 ty = 0; 459 460 # get identifiers so defined 461 t = gettok(); 462 463 # there is a type defined 464 if(t == TYPENAME) { 465 ty = numbval; 466 t = gettok(); 467 } 468 for(;;) { 469 case t { 470 ',' => 471 t = gettok(); 472 continue; 473 474 ';' => 475 break; 476 477 IDENTIFIER => 478 j = chfind(0, tokname); 479 if(j >= NTBASE) 480 error(tokname+" defined earlier as nonterminal"); 481 if(lev) { 482 if(ASSOC(toklev[j])) 483 error("redeclaration of precedence of "+tokname); 484 toklev[j] = SETASC(toklev[j], lev); 485 toklev[j] = SETPLEV(toklev[j], i); 486 } 487 if(ty) { 488 if(TYPE(toklev[j])) 489 error("redeclaration of type of "+tokname); 490 toklev[j] = SETTYPE(toklev[j],ty); 491 } 492 t = gettok(); 493 if(t == NUMBER) { 494 tokset[j].value = numbval; 495 t = gettok(); 496 } 497 continue; 498 } 499 break; 500 } 501 502 LCURLY => 503 cpycode(); 504 t = gettok(); 505 506 * => 507 error("syntax error"); 508 } 509 if(t == ENDFILE) 510 error("unexpected EOF before %%"); 511 if(modname == nil) 512 error("missing %module specification"); 513 514 moreprod(); 515 prdptr[0] = array[4] of { 516 NTBASE, # added production 517 start, # if start is 0, we will overwrite with the lhs of the first rule 518 1, 519 0 520 }; 521 nprod = 1; 522 curprod := array[RULEINC] of int; 523 t = gettok(); 524 if(t != IDENTCOLON) 525 error("bad syntax on first rule"); 526 527 if(!start) 528 prdptr[0][1] = chfind(1, tokname); 529 530 # read rules 531 # put into prdptr array in the format 532 # target 533 # followed by id's of terminals and non-terminals 534 # followd by -nprod 535 while(t != MARK && t != ENDFILE) { 536 mem := 0; 537 # process a rule 538 rlines[nprod] = lineno; 539 if(t == '|') 540 curprod[mem++] = prdptr[nprod-1][0]; 541 else if(t == IDENTCOLON) { 542 curprod[mem] = chfind(1, tokname); 543 if(curprod[mem] < NTBASE) 544 error("token illegal on LHS of grammar rule"); 545 mem++; 546 } else 547 error("illegal rule: missing semicolon or | ?"); 548 549 # read rule body 550 t = gettok(); 551 552 for(;;){ 553 while(t == IDENTIFIER) { 554 curprod[mem] = chfind(1, tokname); 555 if(curprod[mem] < NTBASE) 556 levprd[nprod] = toklev[curprod[mem]]; 557 mem++; 558 if(mem >= len curprod){ 559 ncurprod := array[mem+RULEINC] of int; 560 ncurprod[0:] = curprod; 561 curprod = ncurprod; 562 } 563 t = gettok(); 564 } 565 if(t == PREC) { 566 if(gettok() != IDENTIFIER) 567 error("illegal %%prec syntax"); 568 j = chfind(2, tokname); 569 if(j >= NTBASE) 570 error("nonterminal "+nontrst[j-NTBASE].name+" illegal after %%prec"); 571 levprd[nprod] = toklev[j]; 572 t = gettok(); 573 } 574 if(t != '=') 575 break; 576 levprd[nprod] |= ACTFLAG; 577 addcode(CodeAct, "\n"+string nprod+"=>"); 578 cpyact(curprod, mem); 579 580 # action within rule... 581 if((t=gettok()) == IDENTIFIER) { 582 # make it a nonterminal 583 j = chfind(1, "$$"+string nprod); 584 585 # 586 # the current rule will become rule number nprod+1 587 # enter null production for action 588 # 589 prdptr[nprod] = array[2] of {j, -nprod}; 590 591 # update the production information 592 nprod++; 593 moreprod(); 594 levprd[nprod] = levprd[nprod-1] & ~ACTFLAG; 595 levprd[nprod-1] = ACTFLAG; 596 rlines[nprod] = lineno; 597 598 # make the action appear in the original rule 599 curprod[mem++] = j; 600 if(mem >= len curprod){ 601 ncurprod := array[mem+RULEINC] of int; 602 ncurprod[0:] = curprod; 603 curprod = ncurprod; 604 } 605 } 606 } 607 608 while(t == ';') 609 t = gettok(); 610 curprod[mem++] = -nprod; 611 612 # check that default action is reasonable 613 if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[curprod[0]-NTBASE].value) { 614 # no explicit action, LHS has value 615 616 tempty := curprod[1]; 617 if(tempty < 0) 618 error("must return a value, since LHS has a type"); 619 else 620 if(tempty >= NTBASE) 621 tempty = nontrst[tempty-NTBASE].value; 622 else 623 tempty = TYPE(toklev[tempty]); 624 if(tempty != nontrst[curprod[0]-NTBASE].value) 625 error("default action causes potential type clash"); 626 else{ 627 addcodec(CodeAct, '\n'); 628 addcode(CodeAct, string nprod); 629 addcode(CodeAct, "=>\nyyval."); 630 addcode(CodeAct, typeset[tempty]); 631 addcode(CodeAct, " = yys[yyp+1].yyv."); 632 addcode(CodeAct, typeset[tempty]); 633 addcodec(CodeAct, ';'); 634 } 635 } 636 moreprod(); 637 prdptr[nprod] = array[mem] of int; 638 prdptr[nprod][0:] = curprod[:mem]; 639 nprod++; 640 moreprod(); 641 levprd[nprod] = 0; 642 } 643 644 # 645 # end of all rules 646 # dump out the prefix code 647 # 648 ftable.puts("implement "); 649 ftable.puts(modname); 650 ftable.puts(";\n"); 651 652 dumpcode(CodeMod); 653 dumpmod(); 654 dumpcode(CodeAct); 655 656 ftable.puts("YYEOFCODE: con 1;\n"); 657 ftable.puts("YYERRCODE: con 2;\n"); 658 ftable.puts("YYMAXDEPTH: con " + string stacksize + ";\n"); # was 150 659 #ftable.puts("yyval: YYSTYPE;\n"); 660 661 # 662 # copy any postfix code 663 # 664 if(t == MARK) { 665 ftable.puts("\n#line\t"); 666 ftable.puts(string lineno); 667 ftable.puts("\t\""); 668 ftable.puts(infile); 669 ftable.puts("\"\n"); 670 while((c=finput.getc()) != Bufio->EOF) 671 ftable.putc(c); 672 } 673 finput.close(); 674} 675 676# 677# allocate enough room to hold another production 678# 679moreprod() 680{ 681 n := len prdptr; 682 if(nprod < n) 683 return; 684 n += PRODINC; 685 aprod := array[n] of array of int; 686 aprod[0:] = prdptr; 687 prdptr = aprod; 688 689 alevprd := array[n] of int; 690 alevprd[0:] = levprd; 691 levprd = alevprd; 692 693 arlines := array[n] of int; 694 arlines[0:] = rlines; 695 rlines = arlines; 696} 697 698# 699# define s to be a terminal if t=0 700# or a nonterminal if t=1 701# 702defin(nt: int, s: string): int 703{ 704 val := 0; 705 if(nt) { 706 nnonter++; 707 if(nnonter >= len nontrst){ 708 anontrst := array[nnonter + SYMINC] of Symb; 709 anontrst[0:] = nontrst; 710 nontrst = anontrst; 711 } 712 nontrst[nnonter] = Symb(s, 0); 713 return NTBASE + nnonter; 714 } 715 716 # must be a token 717 ntokens++; 718 if(ntokens >= len tokset){ 719 atokset := array[ntokens + SYMINC] of Symb; 720 atokset[0:] = tokset; 721 tokset = atokset; 722 723 atoklev := array[ntokens + SYMINC] of int; 724 atoklev[0:] = toklev; 725 toklev = atoklev; 726 } 727 tokset[ntokens].name = s; 728 toklev[ntokens] = 0; 729 730 # establish value for token 731 # single character literal 732 if(s[0] == ' ' && len s == 1+1){ 733 val = s[1]; 734 }else if(s[0] == ' ' && s[1] == '\\') { # escape sequence 735 if(len s == 2+1) { 736 # single character escape sequence 737 case s[2] { 738 '\'' => val = '\''; 739 '"' => val = '"'; 740 '\\' => val = '\\'; 741 'a' => val = '\a'; 742 'b' => val = '\b'; 743 'n' => val = '\n'; 744 'r' => val = '\r'; 745 't' => val = '\t'; 746 'v' => val = '\v'; 747 * => 748 error("invalid escape "+s[1:3]); 749 } 750 }else if(s[2] == 'u' && len s == 2+1+4) { # \unnnn sequence 751 val = 0; 752 s = s[3:]; 753 while(s != ""){ 754 c := s[0]; 755 if(c >= '0' && c <= '9') 756 c -= '0'; 757 else if(c >= 'a' && c <= 'f') 758 c -= 'a' - 10; 759 else if(c >= 'A' && c <= 'F') 760 c -= 'A' - 10; 761 else 762 error("illegal \\unnnn construction"); 763 val = val * 16 + c; 764 s = s[1:]; 765 } 766 if(val == 0) 767 error("'\\u0000' is illegal"); 768 }else 769 error("unknown escape"); 770 }else 771 val = extval++; 772 773 tokset[ntokens].value = val; 774 return ntokens; 775} 776 777peekline := 0; 778gettok(): int 779{ 780 i, match, c: int; 781 782 tokname = ""; 783 for(;;){ 784 lineno += peekline; 785 peekline = 0; 786 c = finput.getc(); 787 while(c == ' ' || c == '\n' || c == '\t' || c == '\v' || c == '\r') { 788 if(c == '\n') 789 lineno++; 790 c = finput.getc(); 791 } 792 793 # skip comment 794 if(c != '#') 795 break; 796 lineno += skipcom(); 797 } 798 case c { 799 Bufio->EOF => 800 return ENDFILE; 801 802 '{' => 803 finput.ungetc(); 804 return '='; 805 806 '<' => 807 # get, and look up, a type name (union member name) 808 i = 0; 809 while((c=finput.getc()) != '>' && c != Bufio->EOF && c != '\n') 810 tokname[i++] = c; 811 if(c != '>') 812 error("unterminated < ... > clause"); 813 for(i=1; i<=ntypes; i++) 814 if(typeset[i] == tokname) { 815 numbval = i; 816 return TYPENAME; 817 } 818 ntypes++; 819 numbval = ntypes; 820 typeset[numbval] = tokname; 821 return TYPENAME; 822 823 '"' or '\'' => 824 match = c; 825 tokname[0] = ' '; 826 i = 1; 827 for(;;) { 828 c = finput.getc(); 829 if(c == '\n' || c == Bufio->EOF) 830 error("illegal or missing ' or \"" ); 831 if(c == '\\') { 832 tokname[i++] = '\\'; 833 c = finput.getc(); 834 } else if(c == match) 835 return IDENTIFIER; 836 tokname[i++] = c; 837 } 838 839 '%' => 840 case c = finput.getc(){ 841 '%' => return MARK; 842 '=' => return PREC; 843 '{' => return LCURLY; 844 } 845 846 getword(c); 847 # find a reserved word 848 for(c=0; c < len resrv; c++) 849 if(tokname == resrv[c].name) 850 return resrv[c].value; 851 error("invalid escape, or illegal reserved word: "+tokname); 852 853 '0' to '9' => 854 numbval = c - '0'; 855 while(isdigit(c = finput.getc())) 856 numbval = numbval*10 + c-'0'; 857 finput.ungetc(); 858 return NUMBER; 859 860 * => 861 if(isword(c) || c=='.' || c=='$') 862 getword(c); 863 else 864 return c; 865 } 866 867 # look ahead to distinguish IDENTIFIER from IDENTCOLON 868 c = finput.getc(); 869 while(c == ' ' || c == '\t'|| c == '\n' || c == '\v' || c == '\r' || c == '#') { 870 if(c == '\n') 871 peekline++; 872 # look for comments 873 if(c == '#') 874 peekline += skipcom(); 875 c = finput.getc(); 876 } 877 if(c == ':') 878 return IDENTCOLON; 879 finput.ungetc(); 880 return IDENTIFIER; 881} 882 883getword(c: int) 884{ 885 i := 0; 886 while(isword(c) || isdigit(c) || c == '_' || c=='.' || c=='$') { 887 tokname[i++] = c; 888 c = finput.getc(); 889 } 890 finput.ungetc(); 891} 892 893# 894# determine the type of a symbol 895# 896fdtype(t: int): int 897{ 898 v : int; 899 s: string; 900 901 if(t >= NTBASE) { 902 v = nontrst[t-NTBASE].value; 903 s = nontrst[t-NTBASE].name; 904 } else { 905 v = TYPE(toklev[t]); 906 s = tokset[t].name; 907 } 908 if(v <= 0) 909 error("must specify type for "+s); 910 return v; 911} 912 913chfind(t: int, s: string): int 914{ 915 if(s[0] == ' ') 916 t = 0; 917 for(i:=0; i<=ntokens; i++) 918 if(s == tokset[i].name) 919 return i; 920 for(i=0; i<=nnonter; i++) 921 if(s == nontrst[i].name) 922 return NTBASE+i; 923 924 # cannot find name 925 if(t > 1) 926 error(s+" should have been defined earlier"); 927 return defin(t, s); 928} 929 930# 931# saves module definition in Code 932# 933cpymodule() 934{ 935 if(gettok() != IDENTIFIER) 936 error("bad %%module construction"); 937 if(modname != nil) 938 error("duplicate %%module construction"); 939 modname = tokname; 940 941 level := 0; 942 for(;;) { 943 if((c:=finput.getc()) == Bufio->EOF) 944 error("EOF encountered while processing %%module"); 945 case c { 946 '\n' => 947 lineno++; 948 '{' => 949 level++; 950 if(level == 1) 951 continue; 952 '}' => 953 level--; 954 955 # we are finished copying 956 if(level == 0) 957 return; 958 } 959 addcodec(CodeMod, c); 960 } 961 if(codehead == nil || codetail.kind != CodeMod) 962 addcodec(CodeMod, '\n'); # ensure we add something 963} 964 965# 966# saves code between %{ and %} 967# 968cpycode() 969{ 970 c := finput.getc(); 971 if(c == '\n') { 972 c = finput.getc(); 973 lineno++; 974 } 975 addcode(CodeData, "\n#line\t" + string lineno + "\t\"" + infile + "\"\n"); 976 while(c != Bufio->EOF) { 977 if(c == '%') { 978 if((c=finput.getc()) == '}') 979 return; 980 addcodec(CodeData, '%'); 981 } 982 addcodec(CodeData, c); 983 if(c == '\n') 984 lineno++; 985 c = finput.getc(); 986 } 987 error("eof before %%}"); 988} 989 990addcode(k: int, s: string) 991{ 992 for(i := 0; i < len s; i++) 993 addcodec(k, s[i]); 994} 995 996addcodec(k, c: int) 997{ 998 if(codehead == nil 999 || k != codetail.kind 1000 || codetail.ndata >= NCode){ 1001 cd := ref Code(k, array[NCode+UTFmax] of byte, 0, nil); 1002 if(codehead == nil) 1003 codehead = cd; 1004 else 1005 codetail.next = cd; 1006 codetail = cd; 1007 } 1008 1009 codetail.ndata += sys->char2byte(c, codetail.data, codetail.ndata); 1010} 1011 1012dumpcode(til: int) 1013{ 1014 for(; codehead != nil; codehead = codehead.next){ 1015 if(codehead.kind == til) 1016 return; 1017 if(ftable.write(codehead.data, codehead.ndata) != codehead.ndata) 1018 error("can't write output file"); 1019 } 1020} 1021 1022# 1023# write out the module declaration and any token info 1024# 1025dumpmod() 1026{ 1027 if(fdefine != nil) { 1028 fdefine.puts(modname); 1029 fdefine.puts(": module {\n"); 1030 } 1031 if (!suppressmod) { 1032 ftable.puts(modname); 1033 ftable.puts(": module {\n"); 1034 } 1035 1036 for(; codehead != nil; codehead = codehead.next){ 1037 if(codehead.kind != CodeMod) 1038 break; 1039 if(ftable.write(codehead.data, codehead.ndata) != codehead.ndata) 1040 error("can't write output file"); 1041 if(fdefine != nil && fdefine.write(codehead.data, codehead.ndata) != codehead.ndata) 1042 error("can't write define file"); 1043 } 1044 1045 for(i:=TOKSTART; i<=ntokens; i++) { 1046 # non-literals 1047 c := tokset[i].name[0]; 1048 if(c != ' ' && c != '$') { 1049 s := tokset[i].name+": con "+string tokset[i].value+";\n"; 1050 ftable.puts(s); 1051 if(fdefine != nil) 1052 fdefine.puts(s); 1053 } 1054 } 1055 1056 if(fdefine != nil) 1057 fdefine.puts("};\n"); 1058 if (!suppressmod) 1059 ftable.puts("\n};\n"); 1060 1061 if(fdebug != nil) { 1062 fdebug.puts("yytoknames = array[] of {\n"); 1063 for(i=1; i<=ntokens; i++) { 1064 if(tokset[i].name != nil) 1065 fdebug.puts("\t\""+chcopy(tokset[i].name)+"\",\n"); 1066 else 1067 fdebug.puts("\t\"\",\n"); 1068 } 1069 fdebug.puts("};\n"); 1070 } 1071} 1072 1073# 1074# skip over comments 1075# skipcom is called after reading a '#' 1076# 1077skipcom(): int 1078{ 1079 c := finput.getc(); 1080 while(c != Bufio->EOF) { 1081 if(c == '\n') 1082 return 1; 1083 c = finput.getc(); 1084 } 1085 error("EOF inside comment"); 1086 return 0; 1087} 1088 1089# 1090# copy limbo action to the next ; or closing } 1091# 1092cpyact(curprod: array of int, max: int) 1093{ 1094 addcode(CodeAct, "\n#line\t"); 1095 addcode(CodeAct, string lineno); 1096 addcode(CodeAct, "\t\""); 1097 addcode(CodeAct, infile); 1098 addcode(CodeAct, "\"\n"); 1099 1100 brac := 0; 1101 1102loop: for(;;){ 1103 c := finput.getc(); 1104 swt: case c { 1105 ';' => 1106 if(brac == 0) { 1107 addcodec(CodeAct, c); 1108 return; 1109 } 1110 1111 '{' => 1112 brac++; 1113 1114 '$' => 1115 s := 1; 1116 tok := -1; 1117 c = finput.getc(); 1118 1119 # type description 1120 if(c == '<') { 1121 finput.ungetc(); 1122 if(gettok() != TYPENAME) 1123 error("bad syntax on $<ident> clause"); 1124 tok = numbval; 1125 c = finput.getc(); 1126 } 1127 if(c == '$') { 1128 addcode(CodeAct, "yyval"); 1129 1130 # put out the proper tag... 1131 if(ntypes) { 1132 if(tok < 0) 1133 tok = fdtype(curprod[0]); 1134 addcode(CodeAct, "."+typeset[tok]); 1135 } 1136 continue loop; 1137 } 1138 if(c == '-') { 1139 s = -s; 1140 c = finput.getc(); 1141 } 1142 j := 0; 1143 if(isdigit(c)) { 1144 while(isdigit(c)) { 1145 j = j*10 + c-'0'; 1146 c = finput.getc(); 1147 } 1148 finput.ungetc(); 1149 j = j*s; 1150 if(j >= max) 1151 error("Illegal use of $" + string j); 1152 }else if(isword(c) || c == '_' || c == '.') { 1153 # look for $name 1154 finput.ungetc(); 1155 if(gettok() != IDENTIFIER) 1156 error("$ must be followed by an identifier"); 1157 tokn := chfind(2, tokname); 1158 fnd := -1; 1159 if((c = finput.getc()) != '@') 1160 finput.ungetc(); 1161 else if(gettok() != NUMBER) 1162 error("@ must be followed by number"); 1163 else 1164 fnd = numbval; 1165 for(j=1; j<max; j++){ 1166 if(tokn == curprod[j]) { 1167 fnd--; 1168 if(fnd <= 0) 1169 break; 1170 } 1171 } 1172 if(j >= max) 1173 error("$name or $name@number not found"); 1174 }else{ 1175 addcodec(CodeAct, '$'); 1176 if(s < 0) 1177 addcodec(CodeAct, '-'); 1178 finput.ungetc(); 1179 continue loop; 1180 } 1181 addcode(CodeAct, "yys[yypt-" + string(max-j-1) + "].yyv"); 1182 1183 # put out the proper tag 1184 if(ntypes) { 1185 if(j <= 0 && tok < 0) 1186 error("must specify type of $" + string j); 1187 if(tok < 0) 1188 tok = fdtype(curprod[j]); 1189 addcodec(CodeAct, '.'); 1190 addcode(CodeAct, typeset[tok]); 1191 } 1192 continue loop; 1193 1194 '}' => 1195 brac--; 1196 if(brac) 1197 break; 1198 addcodec(CodeAct, c); 1199 return; 1200 1201 '#' => 1202 # a comment 1203 addcodec(CodeAct, c); 1204 c = finput.getc(); 1205 while(c != Bufio->EOF) { 1206 if(c == '\n') { 1207 lineno++; 1208 break swt; 1209 } 1210 addcodec(CodeAct, c); 1211 c = finput.getc(); 1212 } 1213 error("EOF inside comment"); 1214 1215 '\''or '"' => 1216 # character string or constant 1217 match := c; 1218 addcodec(CodeAct, c); 1219 while(c = finput.getc()) { 1220 if(c == '\\') { 1221 addcodec(CodeAct, c); 1222 c = finput.getc(); 1223 if(c == '\n') 1224 lineno++; 1225 } else if(c == match) 1226 break swt; 1227 if(c == '\n') 1228 error("newline in string or char const."); 1229 addcodec(CodeAct, c); 1230 } 1231 error("EOF in string or character constant"); 1232 1233 Bufio->EOF => 1234 error("action does not terminate"); 1235 1236 '\n' => 1237 lineno++; 1238 } 1239 1240 addcodec(CodeAct, c); 1241 } 1242} 1243 1244openup(stem: string, dflag, vflag, ytab: int, ytabc: string) 1245{ 1246 buf: string; 1247 if(vflag) { 1248 buf = stem + "." + FILEU; 1249 foutput = bufio->create(buf, Bufio->OWRITE, 8r666); 1250 if(foutput == nil) 1251 error("can't create " + buf); 1252 } 1253 if(yydebug != nil) { 1254 buf = stem + "." + FILEDEBUG; 1255 fdebug = bufio->create(buf, Bufio->OWRITE, 8r666); 1256 if(fdebug == nil) 1257 error("can't create " + buf); 1258 } 1259 if(dflag) { 1260 buf = stem + "." + FILED; 1261 fdefine = bufio->create(buf, Bufio->OWRITE, 8r666); 1262 if(fdefine == nil) 1263 error("can't create " + buf); 1264 } 1265 if(ytab == 0) 1266 buf = stem + "." + OFILE; 1267 else 1268 buf = ytabc; 1269 ftable = bufio->create(buf, Bufio->OWRITE, 8r666); 1270 if(ftable == nil) 1271 error("can't create file " + buf); 1272} 1273 1274# 1275# return a pointer to the name of symbol i 1276# 1277symnam(i: int): string 1278{ 1279 s: string; 1280 if(i >= NTBASE) 1281 s = nontrst[i-NTBASE].name; 1282 else 1283 s = tokset[i].name; 1284 if(s[0] == ' ') 1285 s = s[1:]; 1286 return s; 1287} 1288 1289# 1290# write out error comment 1291# 1292error(s: string) 1293{ 1294 nerrors++; 1295 fprint(stderr, "yacc: fatal error: %s, %s:%d\n", s, infile, lineno); 1296 if(!fatfl) 1297 return; 1298 summary(); 1299 raise "fail:error"; 1300} 1301 1302# 1303# set elements 0 through n-1 to c 1304# 1305aryfil(v: array of int, n, c: int) 1306{ 1307 for(i:=0; i<n; i++) 1308 v[i] = c; 1309} 1310 1311# 1312# compute an array with the beginnings of productions yielding given nonterminals 1313# The array pres points to these lists 1314# the array pyield has the lists: the total size is only NPROD+1 1315# 1316cpres() 1317{ 1318 pres = array[nnonter+1] of array of array of int; 1319 curres := array[nprod] of array of int; 1320 for(i:=0; i<=nnonter; i++) { 1321 n := 0; 1322 c := i+NTBASE; 1323 fatfl = 0; # make undefined symbols nonfatal 1324 for(j:=0; j<nprod; j++) 1325 if(prdptr[j][0] == c) 1326 curres[n++] = prdptr[j][1:]; 1327 if(n == 0) 1328 error("nonterminal " + nontrst[i].name + " not defined!"); 1329 else{ 1330 pres[i] = array[n] of array of int; 1331 pres[i][0:] = curres[:n]; 1332 } 1333 } 1334 fatfl = 1; 1335 if(nerrors) { 1336 summary(); 1337 raise "fail:error"; 1338 } 1339} 1340 1341dumppres() 1342{ 1343 for(i := 0; i <= nnonter; i++){ 1344 print("nonterm %d\n", i); 1345 curres := pres[i]; 1346 for(j := 0; j < len curres; j++){ 1347 print("\tproduction %d:", j); 1348 prd := curres[j]; 1349 for(k := 0; k < len prd; k++) 1350 print(" %d", prd[k]); 1351 print("\n"); 1352 } 1353 } 1354} 1355 1356# 1357# mark nonterminals which derive the empty string 1358# also, look for nonterminals which don't derive any token strings 1359# 1360cempty() 1361{ 1362 i, p, np: int; 1363 prd: array of int; 1364 1365 pempty = array[nnonter+1] of int; 1366 1367 # first, use the array pempty to detect productions that can never be reduced 1368 # set pempty to WHONOWS 1369 aryfil(pempty, nnonter+1, WHOKNOWS); 1370 1371 # now, look at productions, marking nonterminals which derive something 1372more: for(;;){ 1373 for(i=0; i<nprod; i++) { 1374 prd = prdptr[i]; 1375 if(pempty[prd[0] - NTBASE]) 1376 continue; 1377 np = len prd - 1; 1378 for(p = 1; p < np; p++) 1379 if(prd[p] >= NTBASE && pempty[prd[p]-NTBASE] == WHOKNOWS) 1380 break; 1381 # production can be derived 1382 if(p == np) { 1383 pempty[prd[0]-NTBASE] = OK; 1384 continue more; 1385 } 1386 } 1387 break; 1388 } 1389 1390 # now, look at the nonterminals, to see if they are all OK 1391 for(i=0; i<=nnonter; i++) { 1392 # the added production rises or falls as the start symbol ... 1393 if(i == 0) 1394 continue; 1395 if(pempty[i] != OK) { 1396 fatfl = 0; 1397 error("nonterminal " + nontrst[i].name + " never derives any token string"); 1398 } 1399 } 1400 1401 if(nerrors) { 1402 summary(); 1403 raise "fail:error"; 1404 } 1405 1406 # now, compute the pempty array, to see which nonterminals derive the empty string 1407 # set pempty to WHOKNOWS 1408 aryfil(pempty, nnonter+1, WHOKNOWS); 1409 1410 # loop as long as we keep finding empty nonterminals 1411 1412again: for(;;){ 1413 next: for(i=1; i<nprod; i++) { 1414 # not known to be empty 1415 prd = prdptr[i]; 1416 if(pempty[prd[0]-NTBASE] != WHOKNOWS) 1417 continue; 1418 np = len prd - 1; 1419 for(p = 1; p < np; p++) 1420 if(prd[p] < NTBASE || pempty[prd[p]-NTBASE] != EMPTY) 1421 continue next; 1422 1423 # we have a nontrivially empty nonterminal 1424 pempty[prd[0]-NTBASE] = EMPTY; 1425 # got one ... try for another 1426 continue again; 1427 } 1428 return; 1429 } 1430} 1431 1432dumpempty() 1433{ 1434 for(i := 0; i <= nnonter; i++) 1435 if(pempty[i] == EMPTY) 1436 print("non-term %d %s matches empty\n", i, symnam(i+NTBASE)); 1437} 1438 1439# 1440# compute an array with the first of nonterminals 1441# 1442cpfir() 1443{ 1444 s, n, p, np, ch: int; 1445 curres: array of array of int; 1446 prd: array of int; 1447 1448 wsets = array[nnonter+WSETINC] of Wset; 1449 pfirst = array[nnonter+1] of Lkset; 1450 for(i:=0; i<=nnonter; i++) { 1451 wsets[i].ws = mkset(); 1452 pfirst[i] = mkset(); 1453 curres = pres[i]; 1454 n = len curres; 1455 # initially fill the sets 1456 for(s = 0; s < n; s++) { 1457 prd = curres[s]; 1458 np = len prd - 1; 1459 for(p = 0; p < np; p++) { 1460 ch = prd[p]; 1461 if(ch < NTBASE) { 1462 setbit(pfirst[i], ch); 1463 break; 1464 } 1465 if(!pempty[ch-NTBASE]) 1466 break; 1467 } 1468 } 1469 } 1470 1471 # now, reflect transitivity 1472 changes := 1; 1473 while(changes) { 1474 changes = 0; 1475 for(i=0; i<=nnonter; i++) { 1476 curres = pres[i]; 1477 n = len curres; 1478 for(s = 0; s < n; s++) { 1479 prd = curres[s]; 1480 np = len prd - 1; 1481 for(p = 0; p < np; p++) { 1482 ch = prd[p] - NTBASE; 1483 if(ch < 0) 1484 break; 1485 changes |= setunion(pfirst[i], pfirst[ch]); 1486 if(!pempty[ch]) 1487 break; 1488 } 1489 } 1490 } 1491 } 1492 1493 if(!indebug) 1494 return; 1495 if(foutput != nil){ 1496 for(i=0; i<=nnonter; i++) { 1497 foutput.putc('\n'); 1498 foutput.puts(nontrst[i].name); 1499 foutput.puts(": "); 1500 prlook(pfirst[i]); 1501 foutput.putc(' '); 1502 foutput.puts(string pempty[i]); 1503 foutput.putc('\n'); 1504 } 1505 } 1506} 1507 1508# 1509# generate the states 1510# 1511stagen() 1512{ 1513 # initialize 1514 nstate = 0; 1515 tstates = array[ntokens+1] of {* => 0}; # states generated by terminal gotos 1516 ntstates = array[nnonter+1] of {* => 0};# states generated by nonterminal gotos 1517 amem = array[ACTSIZE] of {* => 0}; 1518 memp = 0; 1519 1520 clset = mkset(); 1521 pstate[0] = pstate[1] = 0; 1522 aryfil(clset, tbitset, 0); 1523 putitem(Pitem(prdptr[0], 0, 0, 0), clset); 1524 tystate[0] = MUSTDO; 1525 nstate = 1; 1526 pstate[2] = pstate[1]; 1527 1528 # 1529 # now, the main state generation loop 1530 # first pass generates all of the states 1531 # later passes fix up lookahead 1532 # could be sped up a lot by remembering 1533 # results of the first pass rather than recomputing 1534 # 1535 first := 1; 1536 for(more := 1; more; first = 0){ 1537 more = 0; 1538 for(i:=0; i<nstate; i++) { 1539 if(tystate[i] != MUSTDO) 1540 continue; 1541 1542 tystate[i] = DONE; 1543 aryfil(temp1, nnonter+1, 0); 1544 1545 # take state i, close it, and do gotos 1546 closure(i); 1547 1548 # generate goto's 1549 for(p:=0; p<cwp; p++) { 1550 pi := wsets[p]; 1551 if(pi.flag) 1552 continue; 1553 wsets[p].flag = 1; 1554 c := pi.pitem.first; 1555 if(c <= 1) { 1556 if(pstate[i+1]-pstate[i] <= p) 1557 tystate[i] = MUSTLOOKAHEAD; 1558 continue; 1559 } 1560 # do a goto on c 1561 putitem(wsets[p].pitem, wsets[p].ws); 1562 for(q:=p+1; q<cwp; q++) { 1563 # this item contributes to the goto 1564 if(c == wsets[q].pitem.first) { 1565 putitem(wsets[q].pitem, wsets[q].ws); 1566 wsets[q].flag = 1; 1567 } 1568 } 1569 1570 if(c < NTBASE) 1571 state(c); # register new state 1572 else 1573 temp1[c-NTBASE] = state(c); 1574 } 1575 1576 if(gsdebug && foutput != nil) { 1577 foutput.puts(string i + ": "); 1578 for(j:=0; j<=nnonter; j++) 1579 if(temp1[j]) 1580 foutput.puts(nontrst[j].name + " " + string temp1[j] + ", "); 1581 foutput.putc('\n'); 1582 } 1583 1584 if(first) 1585 indgo[i] = apack(temp1[1:], nnonter-1) - 1; 1586 1587 more++; 1588 } 1589 } 1590} 1591 1592# 1593# generate the closure of state i 1594# 1595closure(i: int) 1596{ 1597 zzclose++; 1598 1599 # first, copy kernel of state i to wsets 1600 cwp = 0; 1601 q := pstate[i+1]; 1602 for(p:=pstate[i]; p<q; p++) { 1603 wsets[cwp].pitem = statemem[p].pitem; 1604 wsets[cwp].flag = 1; # this item must get closed 1605 wsets[cwp].ws[0:] = statemem[p].look; 1606 cwp++; 1607 } 1608 1609 # now, go through the loop, closing each item 1610 work := 1; 1611 while(work) { 1612 work = 0; 1613 for(u:=0; u<cwp; u++) { 1614 if(wsets[u].flag == 0) 1615 continue; 1616 # dot is before c 1617 c := wsets[u].pitem.first; 1618 if(c < NTBASE) { 1619 wsets[u].flag = 0; 1620 # only interesting case is where . is before nonterminal 1621 continue; 1622 } 1623 1624 # compute the lookahead 1625 aryfil(clset, tbitset, 0); 1626 1627 # find items involving c 1628 for(v:=u; v<cwp; v++) { 1629 if(wsets[v].flag != 1 1630 || wsets[v].pitem.first != c) 1631 continue; 1632 pi := wsets[v].pitem.prod; 1633 ipi := wsets[v].pitem.off + 1; 1634 1635 wsets[v].flag = 0; 1636 if(nolook) 1637 continue; 1638 while((ch := pi[ipi++]) > 0) { 1639 # terminal symbol 1640 if(ch < NTBASE) { 1641 setbit(clset, ch); 1642 break; 1643 } 1644 # nonterminal symbol 1645 setunion(clset, pfirst[ch-NTBASE]); 1646 if(!pempty[ch-NTBASE]) 1647 break; 1648 } 1649 if(ch <= 0) 1650 setunion(clset, wsets[v].ws); 1651 } 1652 1653 # 1654 # now loop over productions derived from c 1655 # 1656 curres := pres[c - NTBASE]; 1657 n := len curres; 1658 # initially fill the sets 1659 nexts: for(s := 0; s < n; s++) { 1660 prd := curres[s]; 1661 # 1662 # put these items into the closure 1663 # is the item there 1664 # 1665 for(v=0; v<cwp; v++) { 1666 # yes, it is there 1667 if(wsets[v].pitem.off == 0 1668 && wsets[v].pitem.prod == prd) { 1669 if(!nolook && setunion(wsets[v].ws, clset)) 1670 wsets[v].flag = work = 1; 1671 continue nexts; 1672 } 1673 } 1674 1675 # not there; make a new entry 1676 if(cwp >= len wsets){ 1677 awsets := array[cwp + WSETINC] of Wset; 1678 awsets[0:] = wsets; 1679 wsets = awsets; 1680 } 1681 wsets[cwp].pitem = Pitem(prd, 0, prd[0], -prd[len prd-1]); 1682 wsets[cwp].flag = 1; 1683 wsets[cwp].ws = mkset(); 1684 if(!nolook) { 1685 work = 1; 1686 wsets[cwp].ws[0:] = clset; 1687 } 1688 cwp++; 1689 } 1690 } 1691 } 1692 1693 # have computed closure; flags are reset; return 1694 if(cldebug && foutput != nil) { 1695 foutput.puts("\nState " + string i + ", nolook = " + string nolook + "\n"); 1696 for(u:=0; u<cwp; u++) { 1697 if(wsets[u].flag) 1698 foutput.puts("flag set!\n"); 1699 wsets[u].flag = 0; 1700 foutput.putc('\t'); 1701 foutput.puts(writem(wsets[u].pitem)); 1702 prlook(wsets[u].ws); 1703 foutput.putc('\n'); 1704 } 1705 } 1706} 1707 1708# 1709# sorts last state,and sees if it equals earlier ones. returns state number 1710# 1711state(c: int): int 1712{ 1713 zzstate++; 1714 p1 := pstate[nstate]; 1715 p2 := pstate[nstate+1]; 1716 if(p1 == p2) 1717 return 0; # null state 1718 # sort the items 1719 k, l: int; 1720 for(k = p1+1; k < p2; k++) { # make k the biggest 1721 for(l = k; l > p1; l--) { 1722 if(statemem[l].pitem.prodno < statemem[l-1].pitem.prodno 1723 || statemem[l].pitem.prodno == statemem[l-1].pitem.prodno 1724 && statemem[l].pitem.off < statemem[l-1].pitem.off) { 1725 s := statemem[l]; 1726 statemem[l] = statemem[l-1]; 1727 statemem[l-1] = s; 1728 }else 1729 break; 1730 } 1731 } 1732 1733 size1 := p2 - p1; # size of state 1734 1735 if(c >= NTBASE) 1736 i := ntstates[c-NTBASE]; 1737 else 1738 i = tstates[c]; 1739 1740look: for(; i != 0; i = mstates[i]) { 1741 # get ith state 1742 q1 := pstate[i]; 1743 q2 := pstate[i+1]; 1744 size2 := q2 - q1; 1745 if(size1 != size2) 1746 continue; 1747 k = p1; 1748 for(l = q1; l < q2; l++) { 1749 if(statemem[l].pitem.prod != statemem[k].pitem.prod 1750 || statemem[l].pitem.off != statemem[k].pitem.off) 1751 continue look; 1752 k++; 1753 } 1754 1755 # found it 1756 pstate[nstate+1] = pstate[nstate]; # delete last state 1757 # fix up lookaheads 1758 if(nolook) 1759 return i; 1760 k = p1; 1761 for(l = q1; l < q2; l++) { 1762 if(setunion(statemem[l].look, statemem[k].look)) 1763 tystate[i] = MUSTDO; 1764 k++; 1765 } 1766 return i; 1767 } 1768 # state is new 1769 zznewstate++; 1770 if(nolook) 1771 error("yacc state/nolook error"); 1772 pstate[nstate+2] = p2; 1773 if(nstate+1 >= NSTATES) 1774 error("too many states"); 1775 if(c >= NTBASE) { 1776 mstates[nstate] = ntstates[c-NTBASE]; 1777 ntstates[c-NTBASE] = nstate; 1778 } else { 1779 mstates[nstate] = tstates[c]; 1780 tstates[c] = nstate; 1781 } 1782 tystate[nstate] = MUSTDO; 1783 return nstate++; 1784} 1785 1786putitem(p: Pitem, set: Lkset) 1787{ 1788 p.off++; 1789 p.first = p.prod[p.off]; 1790 1791 if(pidebug && foutput != nil) 1792 foutput.puts("putitem(" + writem(p) + "), state " + string nstate + "\n"); 1793 j := pstate[nstate+1]; 1794 if(j >= len statemem){ 1795 asm := array[j + STATEINC] of Item; 1796 asm[0:] = statemem; 1797 statemem = asm; 1798 } 1799 statemem[j].pitem = p; 1800 if(!nolook){ 1801 s := mkset(); 1802 s[0:] = set; 1803 statemem[j].look = s; 1804 } 1805 j++; 1806 pstate[nstate+1] = j; 1807} 1808 1809# 1810# creates output string for item pointed to by pp 1811# 1812writem(pp: Pitem): string 1813{ 1814 i: int; 1815 p := pp.prod; 1816 q := chcopy(nontrst[prdptr[pp.prodno][0]-NTBASE].name) + ": "; 1817 npi := pp.off; 1818 pi := p == prdptr[pp.prodno]; 1819 for(;;){ 1820 c := ' '; 1821 if(pi == npi) 1822 c = '.'; 1823 q[len q] = c; 1824 i = p[pi++]; 1825 if(i <= 0) 1826 break; 1827 q += chcopy(symnam(i)); 1828 } 1829 1830 # an item calling for a reduction 1831 i = p[npi]; 1832 if(i < 0) 1833 q += " (" + string -i + ")"; 1834 return q; 1835} 1836 1837# 1838# pack state i from temp1 into amem 1839# 1840apack(p: array of int, n: int): int 1841{ 1842 # 1843 # we don't need to worry about checking because 1844 # we will only look at entries known to be there... 1845 # eliminate leading and trailing 0's 1846 # 1847 off := 0; 1848 for(pp := 0; pp <= n && p[pp] == 0; pp++) 1849 off--; 1850 # no actions 1851 if(pp > n) 1852 return 0; 1853 for(; n > pp && p[n] == 0; n--) 1854 ; 1855 p = p[pp:n+1]; 1856 1857 # now, find a place for the elements from p to q, inclusive 1858 r := len amem - len p; 1859nextk: for(rr := 0; rr <= r; rr++) { 1860 qq := rr; 1861 for(pp = 0; pp < len p; pp++) { 1862 if(p[pp] != 0) 1863 if(p[pp] != amem[qq] && amem[qq] != 0) 1864 continue nextk; 1865 qq++; 1866 } 1867 1868 # we have found an acceptable k 1869 if(pkdebug && foutput != nil) 1870 foutput.puts("off = " + string(off+rr) + ", k = " + string rr + "\n"); 1871 qq = rr; 1872 for(pp = 0; pp < len p; pp++) { 1873 if(p[pp]) { 1874 if(qq > memp) 1875 memp = qq; 1876 amem[qq] = p[pp]; 1877 } 1878 qq++; 1879 } 1880 if(pkdebug && foutput != nil) { 1881 for(pp = 0; pp <= memp; pp += 10) { 1882 foutput.putc('\t'); 1883 for(qq = pp; qq <= pp+9; qq++) 1884 foutput.puts(string amem[qq] + " "); 1885 foutput.putc('\n'); 1886 } 1887 } 1888 return off + rr; 1889 } 1890 error("no space in action table"); 1891 return 0; 1892} 1893 1894# 1895# print the output for the states 1896# 1897output() 1898{ 1899 c, u, v: int; 1900 1901 ftable.puts("yyexca := array[] of {"); 1902 if(fdebug != nil) 1903 fdebug.puts("yystates = array [] of {\n"); 1904 1905 noset := mkset(); 1906 1907 # output the stuff for state i 1908 for(i:=0; i<nstate; i++) { 1909 nolook = tystate[i]!=MUSTLOOKAHEAD; 1910 closure(i); 1911 1912 # output actions 1913 nolook = 1; 1914 aryfil(temp1, ntokens+nnonter+1, 0); 1915 for(u=0; u<cwp; u++) { 1916 c = wsets[u].pitem.first; 1917 if(c > 1 && c < NTBASE && temp1[c] == 0) { 1918 for(v=u; v<cwp; v++) 1919 if(c == wsets[v].pitem.first) 1920 putitem(wsets[v].pitem, noset); 1921 temp1[c] = state(c); 1922 } else 1923 if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0) 1924 temp1[c+ntokens] = amem[indgo[i]+c]; 1925 } 1926 if(i == 1) 1927 temp1[1] = ACCEPTCODE; 1928 1929 # now, we have the shifts; look at the reductions 1930 lastred = 0; 1931 for(u=0; u<cwp; u++) { 1932 c = wsets[u].pitem.first; 1933 1934 # reduction 1935 if(c > 0) 1936 continue; 1937 lastred = -c; 1938 us := wsets[u].ws; 1939 for(k:=0; k<=ntokens; k++) { 1940 if(!bitset(us, k)) 1941 continue; 1942 if(temp1[k] == 0) 1943 temp1[k] = c; 1944 else 1945 if(temp1[k] < 0) { # reduce/reduce conflict 1946 if(foutput != nil) 1947 foutput.puts( 1948 "\n" + string i + ": reduce/reduce conflict (red'ns " 1949 + string -temp1[k] + " and " + string lastred + " ) on " + symnam(k)); 1950 if(-temp1[k] > lastred) 1951 temp1[k] = -lastred; 1952 zzrrconf++; 1953 } else 1954 # potential shift/reduce conflict 1955 precftn(lastred, k, i); 1956 } 1957 } 1958 wract(i); 1959 } 1960 1961 if(fdebug != nil) 1962 fdebug.puts("};\n"); 1963 ftable.puts("};\n"); 1964 ftable.puts("YYNPROD: con " + string nprod + ";\n"); 1965 ftable.puts("YYPRIVATE: con " + string PRIVATE + ";\n"); 1966 ftable.puts("yytoknames: array of string;\n"); 1967 ftable.puts("yystates: array of string;\n"); 1968 if(yydebug != nil){ 1969 ftable.puts("include \"y.debug\";\n"); 1970 ftable.puts("yydebug: con " + yydebug + ";\n"); 1971 }else{ 1972 ftable.puts("yydebug: con 0;\n"); 1973 } 1974} 1975 1976# 1977# decide a shift/reduce conflict by precedence. 1978# r is a rule number, t a token number 1979# the conflict is in state s 1980# temp1[t] is changed to reflect the action 1981# 1982precftn(r, t, s: int) 1983{ 1984 action: int; 1985 1986 lp := levprd[r]; 1987 lt := toklev[t]; 1988 if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) { 1989 1990 # conflict 1991 if(foutput != nil) 1992 foutput.puts( 1993 "\n" + string s + ": shift/reduce conflict (shift " 1994 + string temp1[t] + "(" + string PLEVEL(lt) + "), red'n " 1995 + string r + "(" + string PLEVEL(lp) + ")) on " + symnam(t)); 1996 zzsrconf++; 1997 return; 1998 } 1999 if(PLEVEL(lt) == PLEVEL(lp)) 2000 action = ASSOC(lt); 2001 else if(PLEVEL(lt) > PLEVEL(lp)) 2002 action = RASC; # shift 2003 else 2004 action = LASC; # reduce 2005 case action{ 2006 BASC => # error action 2007 temp1[t] = ERRCODE; 2008 LASC => # reduce 2009 temp1[t] = -r; 2010 } 2011} 2012 2013# 2014# output state i 2015# temp1 has the actions, lastred the default 2016# 2017wract(i: int) 2018{ 2019 p, p1: int; 2020 2021 # find the best choice for lastred 2022 lastred = 0; 2023 ntimes := 0; 2024 for(j:=0; j<=ntokens; j++) { 2025 if(temp1[j] >= 0) 2026 continue; 2027 if(temp1[j]+lastred == 0) 2028 continue; 2029 # count the number of appearances of temp1[j] 2030 count := 0; 2031 tred := -temp1[j]; 2032 levprd[tred] |= REDFLAG; 2033 for(p=0; p<=ntokens; p++) 2034 if(temp1[p]+tred == 0) 2035 count++; 2036 if(count > ntimes) { 2037 lastred = tred; 2038 ntimes = count; 2039 } 2040 } 2041 2042 # 2043 # for error recovery, arrange that, if there is a shift on the 2044 # error recovery token, `error', that the default be the error action 2045 # 2046 if(temp1[2] > 0) 2047 lastred = 0; 2048 2049 # clear out entries in temp1 which equal lastred 2050 # count entries in optst table 2051 n := 0; 2052 for(p=0; p<=ntokens; p++) { 2053 p1 = temp1[p]; 2054 if(p1+lastred == 0) 2055 temp1[p] = p1 = 0; 2056 if(p1 > 0 && p1 != ACCEPTCODE && p1 != ERRCODE) 2057 n++; 2058 } 2059 2060 wrstate(i); 2061 defact[i] = lastred; 2062 flag := 0; 2063 os := array[n*2] of int; 2064 n = 0; 2065 for(p=0; p<=ntokens; p++) { 2066 if((p1=temp1[p]) != 0) { 2067 if(p1 < 0) { 2068 p1 = -p1; 2069 } else if(p1 == ACCEPTCODE) { 2070 p1 = -1; 2071 } else if(p1 == ERRCODE) { 2072 p1 = 0; 2073 } else { 2074 os[n++] = p; 2075 os[n++] = p1; 2076 zzacent++; 2077 continue; 2078 } 2079 if(flag++ == 0) 2080 ftable.puts("-1, " + string i + ",\n"); 2081 ftable.puts("\t" + string p + ", " + string p1 + ",\n"); 2082 zzexcp++; 2083 } 2084 } 2085 if(flag) { 2086 defact[i] = -2; 2087 ftable.puts("\t-2, " + string lastred + ",\n"); 2088 } 2089 optst[i] = os; 2090} 2091 2092# 2093# writes state i 2094# 2095wrstate(i: int) 2096{ 2097 j0, j1, u: int; 2098 pp, qq: int; 2099 2100 if(fdebug != nil) { 2101 if(lastred) { 2102 fdebug.puts(" nil, #" + string i + "\n"); 2103 } else { 2104 fdebug.puts(" \""); 2105 qq = pstate[i+1]; 2106 for(pp=pstate[i]; pp<qq; pp++){ 2107 fdebug.puts(writem(statemem[pp].pitem)); 2108 fdebug.puts("\\n"); 2109 } 2110 if(tystate[i] == MUSTLOOKAHEAD) 2111 for(u = pstate[i+1] - pstate[i]; u < cwp; u++) 2112 if(wsets[u].pitem.first < 0){ 2113 fdebug.puts(writem(wsets[u].pitem)); 2114 fdebug.puts("\\n"); 2115 } 2116 fdebug.puts("\", #" + string i + "/\n"); 2117 } 2118 } 2119 if(foutput == nil) 2120 return; 2121 foutput.puts("\nstate " + string i + "\n"); 2122 qq = pstate[i+1]; 2123 for(pp=pstate[i]; pp<qq; pp++){ 2124 foutput.putc('\t'); 2125 foutput.puts(writem(statemem[pp].pitem)); 2126 foutput.putc('\n'); 2127 } 2128 if(tystate[i] == MUSTLOOKAHEAD) { 2129 # print out empty productions in closure 2130 for(u = pstate[i+1] - pstate[i]; u < cwp; u++) { 2131 if(wsets[u].pitem.first < 0) { 2132 foutput.putc('\t'); 2133 foutput.puts(writem(wsets[u].pitem)); 2134 foutput.putc('\n'); 2135 } 2136 } 2137 } 2138 2139 # check for state equal to another 2140 for(j0=0; j0<=ntokens; j0++) 2141 if((j1=temp1[j0]) != 0) { 2142 foutput.puts("\n\t" + symnam(j0) + " "); 2143 # shift, error, or accept 2144 if(j1 > 0) { 2145 if(j1 == ACCEPTCODE) 2146 foutput.puts("accept"); 2147 else if(j1 == ERRCODE) 2148 foutput.puts("error"); 2149 else 2150 foutput.puts("shift "+string j1); 2151 } else 2152 foutput.puts("reduce " + string -j1 + " (src line " + string rlines[-j1] + ")"); 2153 } 2154 2155 # output the final production 2156 if(lastred) 2157 foutput.puts("\n\t. reduce " + string lastred + " (src line " + string rlines[lastred] + ")\n\n"); 2158 else 2159 foutput.puts("\n\t. error\n\n"); 2160 2161 # now, output nonterminal actions 2162 j1 = ntokens; 2163 for(j0 = 1; j0 <= nnonter; j0++) { 2164 j1++; 2165 if(temp1[j1]) 2166 foutput.puts("\t" + symnam(j0+NTBASE) + " goto " + string temp1[j1] + "\n"); 2167 } 2168} 2169 2170# 2171# output the gotos for the nontermninals 2172# 2173go2out() 2174{ 2175 for(i := 1; i <= nnonter; i++) { 2176 go2gen(i); 2177 2178 # find the best one to make default 2179 best := -1; 2180 times := 0; 2181 2182 # is j the most frequent 2183 for(j := 0; j < nstate; j++) { 2184 if(tystate[j] == 0) 2185 continue; 2186 if(tystate[j] == best) 2187 continue; 2188 2189 # is tystate[j] the most frequent 2190 count := 0; 2191 cbest := tystate[j]; 2192 for(k := j; k < nstate; k++) 2193 if(tystate[k] == cbest) 2194 count++; 2195 if(count > times) { 2196 best = cbest; 2197 times = count; 2198 } 2199 } 2200 2201 # best is now the default entry 2202 zzgobest += times-1; 2203 n := 0; 2204 for(j = 0; j < nstate; j++) 2205 if(tystate[j] != 0 && tystate[j] != best) 2206 n++; 2207 goent := array[2*n+1] of int; 2208 n = 0; 2209 for(j = 0; j < nstate; j++) 2210 if(tystate[j] != 0 && tystate[j] != best) { 2211 goent[n++] = j; 2212 goent[n++] = tystate[j]; 2213 zzgoent++; 2214 } 2215 2216 # now, the default 2217 if(best == -1) 2218 best = 0; 2219 zzgoent++; 2220 goent[n] = best; 2221 yypgo[i] = goent; 2222 } 2223} 2224 2225# 2226# output the gotos for nonterminal c 2227# 2228go2gen(c: int) 2229{ 2230 i, cc, p, q: int; 2231 2232 # first, find nonterminals with gotos on c 2233 aryfil(temp1, nnonter+1, 0); 2234 temp1[c] = 1; 2235 work := 1; 2236 while(work) { 2237 work = 0; 2238 for(i=0; i<nprod; i++) { 2239 # cc is a nonterminal with a goto on c 2240 cc = prdptr[i][1]-NTBASE; 2241 if(cc >= 0 && temp1[cc] != 0) { 2242 # thus, the left side of production i does too 2243 cc = prdptr[i][0]-NTBASE; 2244 if(temp1[cc] == 0) { 2245 work = 1; 2246 temp1[cc] = 1; 2247 } 2248 } 2249 } 2250 } 2251 2252 # now, we have temp1[c] = 1 if a goto on c in closure of cc 2253 if(g2debug && foutput != nil) { 2254 foutput.puts(nontrst[c].name); 2255 foutput.puts(": gotos on "); 2256 for(i=0; i<=nnonter; i++) 2257 if(temp1[i]){ 2258 foutput.puts(nontrst[i].name); 2259 foutput.putc(' '); 2260 } 2261 foutput.putc('\n'); 2262 } 2263 2264 # now, go through and put gotos into tystate 2265 aryfil(tystate, nstate, 0); 2266 for(i=0; i<nstate; i++) { 2267 q = pstate[i+1]; 2268 for(p=pstate[i]; p<q; p++) { 2269 if((cc = statemem[p].pitem.first) >= NTBASE) { 2270 # goto on c is possible 2271 if(temp1[cc-NTBASE]) { 2272 tystate[i] = amem[indgo[i]+c]; 2273 break; 2274 } 2275 } 2276 } 2277 } 2278} 2279 2280# 2281# in order to free up the mem and amem arrays for the optimizer, 2282# and still be able to output yyr1, etc., after the sizes of 2283# the action array is known, we hide the nonterminals 2284# derived by productions in levprd. 2285# 2286hideprod() 2287{ 2288 j := 0; 2289 levprd[0] = 0; 2290 for(i:=1; i<nprod; i++) { 2291 if(!(levprd[i] & REDFLAG)) { 2292 j++; 2293 if(foutput != nil) { 2294 foutput.puts("Rule not reduced: "); 2295 foutput.puts(writem(Pitem(prdptr[i], 0, 0, i))); 2296 foutput.putc('\n'); 2297 } 2298 } 2299 levprd[i] = prdptr[i][0] - NTBASE; 2300 } 2301 if(j) 2302 print("%d rules never reduced\n", j); 2303} 2304 2305callopt() 2306{ 2307 j, k, p, q: int; 2308 v: array of int; 2309 2310 pgo = array[nnonter+1] of int; 2311 pgo[0] = 0; 2312 maxoff = 0; 2313 maxspr = 0; 2314 for(i := 0; i < nstate; i++) { 2315 k = 32000; 2316 j = 0; 2317 v = optst[i]; 2318 q = len v; 2319 for(p = 0; p < q; p += 2) { 2320 if(v[p] > j) 2321 j = v[p]; 2322 if(v[p] < k) 2323 k = v[p]; 2324 } 2325 # nontrivial situation 2326 if(k <= j) { 2327 # j is now the range 2328# j -= k; # call scj 2329 if(k > maxoff) 2330 maxoff = k; 2331 } 2332 tystate[i] = q + 2*j; 2333 if(j > maxspr) 2334 maxspr = j; 2335 } 2336 2337 # initialize ggreed table 2338 ggreed = array[nnonter+1] of int; 2339 for(i = 1; i <= nnonter; i++) { 2340 ggreed[i] = 1; 2341 j = 0; 2342 2343 # minimum entry index is always 0 2344 v = yypgo[i]; 2345 q = len v - 1; 2346 for(p = 0; p < q ; p += 2) { 2347 ggreed[i] += 2; 2348 if(v[p] > j) 2349 j = v[p]; 2350 } 2351 ggreed[i] = ggreed[i] + 2*j; 2352 if(j > maxoff) 2353 maxoff = j; 2354 } 2355 2356 # now, prepare to put the shift actions into the amem array 2357 for(i = 0; i < ACTSIZE; i++) 2358 amem[i] = 0; 2359 maxa = 0; 2360 for(i = 0; i < nstate; i++) { 2361 if(tystate[i] == 0 && adb > 1) 2362 ftable.puts("State " + string i + ": null\n"); 2363 indgo[i] = YYFLAG1; 2364 } 2365 while((i = nxti()) != NOMORE) 2366 if(i >= 0) 2367 stin(i); 2368 else 2369 gin(-i); 2370 2371 # print amem array 2372 if(adb > 2) 2373 for(p = 0; p <= maxa; p += 10) { 2374 ftable.puts(string p + " "); 2375 for(i = 0; i < 10; i++) 2376 ftable.puts(string amem[p+i] + " "); 2377 ftable.putc('\n'); 2378 } 2379 2380 aoutput(); 2381 osummary(); 2382} 2383 2384# 2385# finds the next i 2386# 2387nxti(): int 2388{ 2389 max := 0; 2390 maxi := 0; 2391 for(i := 1; i <= nnonter; i++) 2392 if(ggreed[i] >= max) { 2393 max = ggreed[i]; 2394 maxi = -i; 2395 } 2396 for(i = 0; i < nstate; i++) 2397 if(tystate[i] >= max) { 2398 max = tystate[i]; 2399 maxi = i; 2400 } 2401 if(max == 0) 2402 return NOMORE; 2403 return maxi; 2404} 2405 2406gin(i: int) 2407{ 2408 s: int; 2409 2410 # enter gotos on nonterminal i into array amem 2411 ggreed[i] = 0; 2412 2413 q := yypgo[i]; 2414 nq := len q - 1; 2415 # now, find amem place for it 2416nextgp: for(p := 0; p < ACTSIZE; p++) { 2417 if(amem[p]) 2418 continue; 2419 for(r := 0; r < nq; r += 2) { 2420 s = p + q[r] + 1; 2421 if(s > maxa){ 2422 maxa = s; 2423 if(maxa >= ACTSIZE) 2424 error("a array overflow"); 2425 } 2426 if(amem[s]) 2427 continue nextgp; 2428 } 2429 # we have found amem spot 2430 amem[p] = q[nq]; 2431 if(p > maxa) 2432 maxa = p; 2433 for(r = 0; r < nq; r += 2) { 2434 s = p + q[r] + 1; 2435 amem[s] = q[r+1]; 2436 } 2437 pgo[i] = p; 2438 if(adb > 1) 2439 ftable.puts("Nonterminal " + string i + ", entry at " + string pgo[i] + "\n"); 2440 return; 2441 } 2442 error("cannot place goto " + string i + "\n"); 2443} 2444 2445stin(i: int) 2446{ 2447 s: int; 2448 2449 tystate[i] = 0; 2450 2451 # enter state i into the amem array 2452 q := optst[i]; 2453 nq := len q; 2454 # find an acceptable place 2455nextn: for(n := -maxoff; n < ACTSIZE; n++) { 2456 flag := 0; 2457 for(r := 0; r < nq; r += 2) { 2458 s = q[r] + n; 2459 if(s < 0 || s > ACTSIZE) 2460 continue nextn; 2461 if(amem[s] == 0) 2462 flag++; 2463 else if(amem[s] != q[r+1]) 2464 continue nextn; 2465 } 2466 2467 # check the position equals another only if the states are identical 2468 for(j:=0; j<nstate; j++) { 2469 if(indgo[j] == n) { 2470 2471 # we have some disagreement 2472 if(flag) 2473 continue nextn; 2474 if(nq == len optst[j]) { 2475 2476 # states are equal 2477 indgo[i] = n; 2478 if(adb > 1) 2479 ftable.puts("State " + string i + ": entry at " 2480 + string n + " equals state " + string j + "\n"); 2481 return; 2482 } 2483 2484 # we have some disagreement 2485 continue nextn; 2486 } 2487 } 2488 2489 for(r = 0; r < nq; r += 2) { 2490 s = q[r] + n; 2491 if(s > maxa) 2492 maxa = s; 2493 if(amem[s] != 0 && amem[s] != q[r+1]) 2494 error("clobber of a array, pos'n " + string s + ", by " + string q[r+1] + ""); 2495 amem[s] = q[r+1]; 2496 } 2497 indgo[i] = n; 2498 if(adb > 1) 2499 ftable.puts("State " + string i + ": entry at " + string indgo[i] + "\n"); 2500 return; 2501 } 2502 error("Error; failure to place state " + string i + "\n"); 2503} 2504 2505# 2506# this version is for limbo 2507# write out the optimized parser 2508# 2509aoutput() 2510{ 2511 ftable.puts("YYLAST:\tcon "+string (maxa+1)+";\n"); 2512 arout("yyact", amem, maxa+1); 2513 arout("yypact", indgo, nstate); 2514 arout("yypgo", pgo, nnonter+1); 2515} 2516 2517# 2518# put out other arrays, copy the parsers 2519# 2520others() 2521{ 2522 finput = bufio->open(parser, Bufio->OREAD); 2523 if(finput == nil) 2524 error("cannot find parser " + parser); 2525 arout("yyr1", levprd, nprod); 2526 aryfil(temp1, nprod, 0); 2527 2528 # 2529 #yyr2 is the number of rules for each production 2530 # 2531 for(i:=1; i<nprod; i++) 2532 temp1[i] = len prdptr[i] - 2; 2533 arout("yyr2", temp1, nprod); 2534 2535 aryfil(temp1, nstate, -1000); 2536 for(i=0; i<=ntokens; i++) 2537 for(j:=tstates[i]; j!=0; j=mstates[j]) 2538 temp1[j] = i; 2539 for(i=0; i<=nnonter; i++) 2540 for(j=ntstates[i]; j!=0; j=mstates[j]) 2541 temp1[j] = -i; 2542 arout("yychk", temp1, nstate); 2543 arout("yydef", defact, nstate); 2544 2545 # put out token translation tables 2546 # table 1 has 0-256 2547 aryfil(temp1, 256, 0); 2548 c := 0; 2549 for(i=1; i<=ntokens; i++) { 2550 j = tokset[i].value; 2551 if(j >= 0 && j < 256) { 2552 if(temp1[j]) { 2553 print("yacc bug -- cant have 2 different Ts with same value\n"); 2554 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name); 2555 nerrors++; 2556 } 2557 temp1[j] = i; 2558 if(j > c) 2559 c = j; 2560 } 2561 } 2562 for(i = 0; i <= c; i++) 2563 if(temp1[i] == 0) 2564 temp1[i] = YYLEXUNK; 2565 arout("yytok1", temp1, c+1); 2566 2567 # table 2 has PRIVATE-PRIVATE+256 2568 aryfil(temp1, 256, 0); 2569 c = 0; 2570 for(i=1; i<=ntokens; i++) { 2571 j = tokset[i].value - PRIVATE; 2572 if(j >= 0 && j < 256) { 2573 if(temp1[j]) { 2574 print("yacc bug -- cant have 2 different Ts with same value\n"); 2575 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name); 2576 nerrors++; 2577 } 2578 temp1[j] = i; 2579 if(j > c) 2580 c = j; 2581 } 2582 } 2583 arout("yytok2", temp1, c+1); 2584 2585 # table 3 has everything else 2586 ftable.puts("yytok3 := array[] of {\n"); 2587 c = 0; 2588 for(i=1; i<=ntokens; i++) { 2589 j = tokset[i].value; 2590 if(j >= 0 && j < 256) 2591 continue; 2592 if(j >= PRIVATE && j < 256+PRIVATE) 2593 continue; 2594 2595 ftable.puts(sprint("%4d,%4d,", j, i)); 2596 c++; 2597 if(c%5 == 0) 2598 ftable.putc('\n'); 2599 } 2600 ftable.puts(sprint("%4d\n};\n", 0)); 2601 2602 # copy parser text 2603 while((c=finput.getc()) != Bufio->EOF) { 2604 if(c == '$') { 2605 if((c = finput.getc()) != 'A') 2606 ftable.putc('$'); 2607 else { # copy actions 2608 if(codehead == nil) 2609 ftable.puts("* => ;"); 2610 else 2611 dumpcode(-1); 2612 c = finput.getc(); 2613 } 2614 } 2615 ftable.putc(c); 2616 } 2617 ftable.close(); 2618} 2619 2620arout(s: string, v: array of int, n: int) 2621{ 2622 ftable.puts(s+" := array[] of {"); 2623 for(i := 0; i < n; i++) { 2624 if(i%10 == 0) 2625 ftable.putc('\n'); 2626 ftable.puts(sprint("%4d", v[i])); 2627 ftable.putc(','); 2628 } 2629 ftable.puts("\n};\n"); 2630} 2631 2632# 2633# output the summary on y.output 2634# 2635summary() 2636{ 2637 if(foutput != nil) { 2638 foutput.puts("\n" + string ntokens + " terminals, " + string(nnonter + 1) + " nonterminals\n"); 2639 foutput.puts("" + string nprod + " grammar rules, " + string nstate + "/" + string NSTATES + " states\n"); 2640 foutput.puts("" + string zzsrconf + " shift/reduce, " + string zzrrconf + " reduce/reduce conflicts reported\n"); 2641 foutput.puts("" + string len wsets + " working sets used\n"); 2642 foutput.puts("memory: parser " + string memp + "/" + string ACTSIZE + "\n"); 2643 foutput.puts(string (zzclose - 2*nstate) + " extra closures\n"); 2644 foutput.puts(string zzacent + " shift entries, " + string zzexcp + " exceptions\n"); 2645 foutput.puts(string zzgoent + " goto entries\n"); 2646 foutput.puts(string zzgobest + " entries saved by goto default\n"); 2647 } 2648 if(zzsrconf != 0 || zzrrconf != 0) { 2649 print("\nconflicts: "); 2650 if(zzsrconf) 2651 print("%d shift/reduce", zzsrconf); 2652 if(zzsrconf && zzrrconf) 2653 print(", "); 2654 if(zzrrconf) 2655 print("%d reduce/reduce", zzrrconf); 2656 print("\n"); 2657 } 2658 if(fdefine != nil) 2659 fdefine.close(); 2660} 2661 2662# 2663# write optimizer summary 2664# 2665osummary() 2666{ 2667 if(foutput == nil) 2668 return; 2669 i := 0; 2670 for(p := maxa; p >= 0; p--) 2671 if(amem[p] == 0) 2672 i++; 2673 2674 foutput.puts("Optimizer space used: output " + string (maxa+1) + "/" + string ACTSIZE + "\n"); 2675 foutput.puts(string(maxa+1) + " table entries, " + string i + " zero\n"); 2676 foutput.puts("maximum spread: " + string maxspr + ", maximum offset: " + string maxoff + "\n"); 2677} 2678 2679# 2680# copies and protects "'s in q 2681# 2682chcopy(q: string): string 2683{ 2684 s := ""; 2685 j := 0; 2686 for(i := 0; i < len q; i++) { 2687 if(q[i] == '"') { 2688 s += q[j:i] + "\\"; 2689 j = i; 2690 } 2691 } 2692 return s + q[j:i]; 2693} 2694 2695usage() 2696{ 2697 fprint(stderr, "usage: yacc [-vdm] [-Dn] [-o output] [-s stem] file\n"); 2698 raise "fail:usage"; 2699} 2700 2701bitset(set: Lkset, bit: int): int 2702{ 2703 return set[bit>>5] & (1<<(bit&31)); 2704} 2705 2706setbit(set: Lkset, bit: int): int 2707{ 2708 return set[bit>>5] |= (1<<(bit&31)); 2709} 2710 2711mkset(): Lkset 2712{ 2713 return array[tbitset] of {* => 0}; 2714} 2715 2716# 2717# set a to the union of a and b 2718# return 1 if b is not a subset of a, 0 otherwise 2719# 2720setunion(a, b: array of int): int 2721{ 2722 sub := 0; 2723 for(i:=0; i<tbitset; i++) { 2724 x := a[i]; 2725 y := x | b[i]; 2726 a[i] = y; 2727 if(y != x) 2728 sub = 1; 2729 } 2730 return sub; 2731} 2732 2733prlook(p: Lkset) 2734{ 2735 if(p == nil){ 2736 foutput.puts("\tNULL"); 2737 return; 2738 } 2739 foutput.puts(" { "); 2740 for(j:=0; j<=ntokens; j++){ 2741 if(bitset(p, j)){ 2742 foutput.puts(symnam(j)); 2743 foutput.putc(' '); 2744 } 2745 } 2746 foutput.putc('}'); 2747} 2748 2749# 2750# utility routines 2751# 2752isdigit(c: int): int 2753{ 2754 return c >= '0' && c <= '9'; 2755} 2756 2757isword(c: int): int 2758{ 2759 return c >= 16ra0 || c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'; 2760} 2761 2762mktemp(t: string): string 2763{ 2764 return t; 2765} 2766 2767# 2768# arg processing 2769# 2770Arg.init(argv: list of string): ref Arg 2771{ 2772 if(argv != nil) 2773 argv = tl argv; 2774 return ref Arg(argv, 0, ""); 2775} 2776 2777Arg.opt(arg: self ref Arg): int 2778{ 2779 opts := arg.opts; 2780 if(opts != ""){ 2781 arg.c = opts[0]; 2782 arg.opts = opts[1:]; 2783 return arg.c; 2784 } 2785 argv := arg.argv; 2786 if(argv == nil) 2787 return arg.c = 0; 2788 opts = hd argv; 2789 if(len opts < 2 || opts[0] != '-') 2790 return arg.c = 0; 2791 arg.argv = tl argv; 2792 if(opts == "--") 2793 return arg.c = 0; 2794 arg.opts = opts[2:]; 2795 return arg.c = opts[1]; 2796} 2797 2798Arg.arg(arg: self ref Arg): string 2799{ 2800 s := arg.opts; 2801 arg.opts = ""; 2802 if(s != "") 2803 return s; 2804 argv := arg.argv; 2805 if(argv == nil) 2806 return ""; 2807 arg.argv = tl argv; 2808 return hd argv; 2809} 2810