1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
5
6 #define Bungetrune Bungetc /* ok for now. */
7
8 /*
9 * all these are 32 bit
10 */
11 #define TBITSET ((32+NTERMS)/32) /* BOTCH?? +31 */
12 #define BIT(a,i) ((a)[(i)>>5] & (1<<((i)&037)))
13 #define SETBIT(a,i) ((a)[(i)>>5] |= (1<<((i)&037)))
14 #define NWORDS(n) (((n)+32)/32)
15
16 #define PARSER "/sys/lib/yaccpar"
17 #define PARSERS "/sys/lib/yaccpars"
18 #define TEMPNAME "y.tmp.XXXXXX"
19 #define ACTNAME "y.acts.XXXXXX"
20 #define OFILE "tab.c"
21 #define FILEU "output"
22 #define FILED "tab.h"
23 #define FILEDEBUG "debug"
24
25 enum
26 {
27 /*
28 * the following are adjustable
29 * according to memory size
30 */
31 ACTSIZE = 40000,
32 MEMSIZE = 40000,
33 NSTATES = 2000,
34 NTERMS = 511,
35 NPROD = 1600,
36 NNONTERM = 600,
37 TEMPSIZE = 2000,
38 CNAMSZ = 10000,
39 LSETSIZE = 2400,
40 WSETSIZE = 350,
41
42 NAMESIZE = 50,
43 NTYPES = 63,
44 ISIZE = 400,
45
46 PRIVATE = 0xE000, /* unicode private use */
47
48 /* relationships which must hold:
49 TBITSET ints must hold NTERMS+1 bits...
50 WSETSIZE >= NNONTERM
51 LSETSIZE >= NNONTERM
52 TEMPSIZE >= NTERMS + NNONTERM + 1
53 TEMPSIZE >= NSTATES
54 */
55
56 NTBASE = 010000,
57 ERRCODE = 8190,
58 ACCEPTCODE = 8191,
59
60 NOASC = 0, /* no assoc. */
61 LASC = 1, /* left assoc. */
62 RASC = 2, /* right assoc. */
63 BASC = 3, /* binary assoc. */
64
65 /* flags for state generation */
66
67 DONE = 0,
68 MUSTDO = 1,
69 MUSTLOOKAHEAD = 2,
70
71 /* flags for a rule having an action, and being reduced */
72
73 ACTFLAG = 04,
74 REDFLAG = 010,
75
76 /* output parser flags */
77 YYFLAG1 = -1000,
78
79 /* parse tokens */
80 IDENTIFIER = PRIVATE,
81 MARK,
82 TERM,
83 LEFT,
84 RIGHT,
85 BINARY,
86 PREC,
87 LCURLY,
88 IDENTCOLON,
89 NUMBER,
90 START,
91 TYPEDEF,
92 TYPENAME,
93 UNION,
94
95 ENDFILE = 0,
96
97 EMPTY = 1,
98 WHOKNOWS = 0,
99 OK = 1,
100 NOMORE = -1000,
101 };
102
103 /* macros for getting associativity and precedence levels */
104
105 #define ASSOC(i) ((i)&03)
106 #define PLEVEL(i) (((i)>>4)&077)
107 #define TYPE(i) (((i)>>10)&077)
108
109 /* macros for setting associativity and precedence levels */
110
111 #define SETASC(i,j) i |= j
112 #define SETPLEV(i,j) i |= (j<<4)
113 #define SETTYPE(i,j) i |= (j<<10)
114
115 /* looping macros */
116
117 #define TLOOP(i) for(i=1; i<=ntokens; i++)
118 #define NTLOOP(i) for(i=0; i<=nnonter; i++)
119 #define PLOOP(s,i) for(i=s; i<nprod; i++)
120 #define SLOOP(i) for(i=0; i<nstate; i++)
121 #define WSBUMP(x) x++
122 #define WSLOOP(s,j) for(j=s; j<cwp; j++)
123 #define ITMLOOP(i,p,q) for(q=pstate[i+1], p=pstate[i]; p<q; p++)
124 #define SETLOOP(i) for(i=0; i<tbitset; i++)
125
126 /* command to clobber tempfiles after use */
127
128 #define ZAPFILE(x) if(x) remove(x)
129
130 /* I/O descriptors */
131
132 Biobuf* faction; /* file for saving actions */
133 Biobuf* fdefine; /* file for #defines */
134 Biobuf* fdebug; /* y.debug for strings for debugging */
135 Biobuf* ftable; /* y.tab.c file */
136 Biobuf* ftemp; /* tempfile to pass 2 */
137 Biobuf* finput; /* input file */
138 Biobuf* foutput; /* y.output file */
139
140 /* communication variables between various I/O routines */
141
142 char* infile; /* input file name */
143 int numbval; /* value of an input number */
144 char tokname[NAMESIZE+UTFmax+1]; /* input token name, slop for runes and 0 */
145
146 /* structure declarations */
147
148 typedef
149 struct
150 {
151 int lset[TBITSET];
152 } Lkset;
153
154 typedef
155 struct
156 {
157 int* pitem;
158 Lkset* look;
159 } Item;
160
161 typedef
162 struct
163 {
164 char* name;
165 int value;
166 } Symb;
167
168 typedef
169 struct
170 {
171 int* pitem;
172 int flag;
173 Lkset ws;
174 } Wset;
175
176 /* storage of names */
177
178 char cnames[CNAMSZ]; /* place where token and nonterminal names are stored */
179 int cnamsz = CNAMSZ; /* size of cnames */
180 char* cnamp = cnames; /* place where next name is to be put in */
181 int ndefout = 4; /* number of defined symbols output */
182 char* tempname;
183 char* actname;
184 char ttempname[] = TEMPNAME;
185 char tactname[] = ACTNAME;
186 char* parser = PARSER;
187 char* yydebug;
188
189 /* storage of types */
190 int ntypes; /* number of types defined */
191 char* typeset[NTYPES]; /* pointers to type tags */
192
193 /* token information */
194
195 int ntokens = 0 ; /* number of tokens */
196 Symb tokset[NTERMS];
197 int toklev[NTERMS]; /* vector with the precedence of the terminals */
198
199 /* nonterminal information */
200
201 int nnonter = -1; /* the number of nonterminals */
202 Symb nontrst[NNONTERM];
203 int start; /* start symbol */
204
205 /* assigned token type values */
206 int extval = 0;
207
208 char* ytabc = OFILE; /* name of y.tab.c */
209
210 /* grammar rule information */
211
212 int mem0[MEMSIZE] ; /* production storage */
213 int* mem = mem0;
214 int nprod = 1; /* number of productions */
215 int* prdptr[NPROD]; /* pointers to descriptions of productions */
216 int levprd[NPROD]; /* precedence levels for the productions */
217 int rlines[NPROD]; /* line number for this rule */
218
219 /* state information */
220
221 int nstate = 0; /* number of states */
222 Item* pstate[NSTATES+2]; /* pointers to the descriptions of the states */
223 int tystate[NSTATES]; /* contains type information about the states */
224 int defact[NSTATES]; /* the default actions of states */
225 int tstates[NTERMS]; /* states generated by terminal gotos */
226 int ntstates[NNONTERM]; /* states generated by nonterminal gotos */
227 int mstates[NSTATES]; /* chain of overflows of term/nonterm generation lists */
228 int lastred; /* the number of the last reduction of a state */
229
230 /* lookahead set information */
231
232 Lkset lkst[LSETSIZE];
233 int nolook; /* flag to turn off lookahead computations */
234 int tbitset; /* size of lookahead sets */
235 int nlset = 0; /* next lookahead set index */
236 int nolook = 0; /* flag to suppress lookahead computations */
237 Lkset clset; /* temporary storage for lookahead computations */
238
239 /* working set information */
240
241 Wset wsets[WSETSIZE];
242 Wset* cwp;
243
244 /* storage for action table */
245
246 int amem[ACTSIZE]; /* action table storage */
247 int* memp = amem; /* next free action table position */
248 int indgo[NSTATES]; /* index to the stored goto table */
249
250 /* temporary vector, indexable by states, terms, or ntokens */
251
252 int temp1[TEMPSIZE]; /* temporary storage, indexed by terms + ntokens or states */
253 int lineno = 1; /* current input line number */
254 int fatfl = 1; /* if on, error is fatal */
255 int nerrors = 0; /* number of errors */
256
257 /* statistics collection variables */
258
259 int zzgoent;
260 int zzgobest;
261 int zzacent;
262 int zzexcp;
263 int zzclose;
264 int zzrrconf;
265 int zzsrconf;
266
267 int* ggreed = lkst[0].lset;
268 int* pgo = wsets[0].ws.lset;
269 int* yypgo = &nontrst[0].value;
270
271 int maxspr = 0; /* maximum spread of any entry */
272 int maxoff = 0; /* maximum offset into a array */
273 int* pmem = mem0;
274 int* maxa;
275 int nxdb = 0;
276 int adb = 0;
277
278
279 /* storage for information about the nonterminals */
280
281 int** pres[NNONTERM+2]; /* vector of pointers to productions yielding each nonterminal */
282 Lkset* pfirst[NNONTERM+2]; /* vector of pointers to first sets for each nonterminal */
283 int pempty[NNONTERM+1]; /* vector of nonterminals nontrivially deriving e */
284
285 /* random stuff picked out from between functions */
286
287 int indebug = 0;
288 Wset* zzcwp = wsets;
289 int zzgoent = 0;
290 int zzgobest = 0;
291 int zzacent = 0;
292 int zzexcp = 0;
293 int zzclose = 0;
294 int zzsrconf = 0;
295 int* zzmemsz = mem0;
296 int zzrrconf = 0;
297 int pidebug = 0; /* debugging flag for putitem */
298 int gsdebug = 0;
299 int cldebug = 0; /* debugging flag for closure */
300 int pkdebug = 0;
301 int g2debug = 0;
302
303 struct
304 {
305 char* name;
306 long value;
307 } resrv[] =
308 {
309 "binary", BINARY,
310 "left", LEFT,
311 "nonassoc", BINARY,
312 "prec", PREC,
313 "right", RIGHT,
314 "start", START,
315 "term", TERM,
316 "token", TERM,
317 "type", TYPEDEF,
318 "union", UNION,
319 0,
320 };
321
322 /* define functions */
323
324 void main(int, char**);
325 void others(void);
326 char* chcopy(char*, char*);
327 char* writem(int*);
328 char* symnam(int);
329 void summary(void);
330 void error(char*, ...);
331 void aryfil(int*, int, int);
332 int setunion(int*, int*);
333 void prlook(Lkset*);
334 void cpres(void);
335 void cpfir(void);
336 int state(int);
337 void putitem(int*, Lkset*);
338 void cempty(void);
339 void stagen(void);
340 void closure(int);
341 Lkset* flset(Lkset*);
342 void cleantmp(void);
343 void intr(void);
344 void setup(int, char**);
345 void finact(void);
346 int defin(int, char*);
347 void defout(int);
348 char* cstash(char*);
349 long gettok(void);
350 int fdtype(int);
351 int chfind(int, char*);
352 void cpyunion(void);
353 void cpycode(void);
354 int skipcom(void);
355 void cpyact(int);
356 void openup(char*, int, int, int, char*);
357 void output(void);
358 int apack(int*, int);
359 void go2out(void);
360 void go2gen(int);
361 void precftn(int, int, int);
362 void wract(int);
363 void wrstate(int);
364 void warray(char*, int*, int);
365 void hideprod(void);
366 void callopt(void);
367 void gin(int);
368 void stin(int);
369 int nxti(void);
370 void osummary(void);
371 void aoutput(void);
372 void arout(char*, int*, int);
373 int gtnm(void);
374
375 void
main(int argc,char * argv[])376 main(int argc, char *argv[])
377 {
378
379 setup(argc, argv); /* initialize and read productions */
380 tbitset = NWORDS(ntokens);
381 cpres(); /* make table of which productions yield a given nonterminal */
382 cempty(); /* make a table of which nonterminals can match the empty string */
383 cpfir(); /* make a table of firsts of nonterminals */
384 stagen(); /* generate the states */
385 output(); /* write the states and the tables */
386 go2out();
387 hideprod();
388 summary();
389 callopt();
390 others();
391 exits(0);
392 }
393
394 /*
395 * put out other arrays, copy the parsers
396 */
397 void
others(void)398 others(void)
399 {
400 int c, i, j;
401
402 finput = Bopen(parser, OREAD);
403 if(finput == 0)
404 error("cannot find parser %s", parser);
405 warray("yyr1", levprd, nprod);
406 aryfil(temp1, nprod, 0);
407 PLOOP(1, i)
408 temp1[i] = prdptr[i+1]-prdptr[i]-2;
409 warray("yyr2", temp1, nprod);
410
411 aryfil(temp1, nstate, -1000);
412 TLOOP(i)
413 for(j=tstates[i]; j!=0; j=mstates[j])
414 temp1[j] = i;
415 NTLOOP(i)
416 for(j=ntstates[i]; j!=0; j=mstates[j])
417 temp1[j] = -i;
418 warray("yychk", temp1, nstate);
419 warray("yydef", defact, nstate);
420
421 /* put out token translation tables */
422 /* table 1 has 0-256 */
423 aryfil(temp1, 256, 0);
424 c = 0;
425 TLOOP(i) {
426 j = tokset[i].value;
427 if(j >= 0 && j < 256) {
428 if(temp1[j]) {
429 print("yacc bug -- cant have 2 different Ts with same value\n");
430 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
431 nerrors++;
432 }
433 temp1[j] = i;
434 if(j > c)
435 c = j;
436 }
437 }
438 warray("yytok1", temp1, c+1);
439
440 /* table 2 has PRIVATE-PRIVATE+256 */
441 aryfil(temp1, 256, 0);
442 c = 0;
443 TLOOP(i) {
444 j = tokset[i].value - PRIVATE;
445 if(j >= 0 && j < 256) {
446 if(temp1[j]) {
447 print("yacc bug -- cant have 2 different Ts with same value\n");
448 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
449 nerrors++;
450 }
451 temp1[j] = i;
452 if(j > c)
453 c = j;
454 }
455 }
456 warray("yytok2", temp1, c+1);
457
458 /* table 3 has everything else */
459 Bprint(ftable, "long yytok3[] =\n{\n");
460 c = 0;
461 TLOOP(i) {
462 j = tokset[i].value;
463 if(j >= 0 && j < 256)
464 continue;
465 if(j >= PRIVATE && j < 256+PRIVATE)
466 continue;
467
468 Bprint(ftable, "%4d,%4d,", j, i);
469 c++;
470 if(c%5 == 0)
471 Bprint(ftable, "\n");
472 }
473 Bprint(ftable, "%4d\n};\n", 0);
474
475 /* copy parser text */
476 while((c=Bgetrune(finput)) != Beof) {
477 if(c == '$') {
478 if((c = Bgetrune(finput)) != 'A')
479 Bputrune(ftable, '$');
480 else { /* copy actions */
481 faction = Bopen(actname, OREAD);
482 if(faction == 0)
483 error("cannot reopen action tempfile");
484 while((c=Bgetrune(faction)) != Beof)
485 Bputrune(ftable, c);
486 Bterm(faction);
487 ZAPFILE(actname);
488 c = Bgetrune(finput);
489 }
490 }
491 Bputrune(ftable, c);
492 }
493 Bterm(ftable);
494 }
495
496 /*
497 * copies string q into p, returning next free char ptr
498 */
499 char*
chcopy(char * p,char * q)500 chcopy(char* p, char* q)
501 {
502 int c;
503
504 while(c = *q) {
505 if(c == '"')
506 *p++ = '\\';
507 *p++ = c;
508 q++;
509 }
510 *p = 0;
511 return p;
512 }
513
514 /*
515 * creates output string for item pointed to by pp
516 */
517 char*
writem(int * pp)518 writem(int *pp)
519 {
520 int i,*p;
521 static char sarr[ISIZE];
522 char* q;
523
524 for(p=pp; *p>0; p++)
525 ;
526 p = prdptr[-*p];
527 q = chcopy(sarr, nontrst[*p-NTBASE].name);
528 q = chcopy(q, ": ");
529 for(;;) {
530 *q = ' ';
531 p++;
532 if(p == pp)
533 *q = '.';
534 q++;
535 *q = '\0';
536 i = *p;
537 if(i <= 0)
538 break;
539 q = chcopy(q, symnam(i));
540 if(q > &sarr[ISIZE-30])
541 error("item too big");
542 }
543
544 /* an item calling for a reduction */
545 i = *pp;
546 if(i < 0 ) {
547 q = chcopy(q, " (");
548 sprint(q, "%d)", -i);
549 }
550 return sarr;
551 }
552
553 /*
554 * return a pointer to the name of symbol i
555 */
556 char*
symnam(int i)557 symnam(int i)
558 {
559 char* cp;
560
561 cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
562 if(*cp == ' ')
563 cp++;
564 return cp;
565 }
566
567 /*
568 * output the summary on y.output
569 */
570 void
summary(void)571 summary(void)
572 {
573
574 if(foutput != 0) {
575 Bprint(foutput, "\n%d/%d terminals, %d/%d nonterminals\n",
576 ntokens, NTERMS, nnonter, NNONTERM);
577 Bprint(foutput, "%d/%d grammar rules, %d/%d states\n",
578 nprod, NPROD, nstate, NSTATES);
579 Bprint(foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n",
580 zzsrconf, zzrrconf);
581 Bprint(foutput, "%d/%d working sets used\n",
582 (int)(zzcwp-wsets), WSETSIZE);
583 Bprint(foutput, "memory: states,etc. %d/%d, parser %d/%d\n",
584 (int)(zzmemsz-mem0), MEMSIZE, (int)(memp-amem), ACTSIZE);
585 Bprint(foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE);
586 Bprint(foutput, "%d extra closures\n", zzclose - 2*nstate);
587 Bprint(foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp);
588 Bprint(foutput, "%d goto entries\n", zzgoent);
589 Bprint(foutput, "%d entries saved by goto default\n", zzgobest);
590 }
591 if(zzsrconf != 0 || zzrrconf != 0) {
592 print("\nconflicts: ");
593 if(zzsrconf)
594 print("%d shift/reduce", zzsrconf);
595 if(zzsrconf && zzrrconf)
596 print(", ");
597 if(zzrrconf)
598 print("%d reduce/reduce", zzrrconf);
599 print("\n");
600 }
601 if(ftemp != 0) {
602 Bterm(ftemp);
603 ftemp = 0;
604 }
605 if(fdefine != 0) {
606 Bterm(fdefine);
607 fdefine = 0;
608 }
609 }
610
611 /*
612 * write out error comment -- NEEDS WORK
613 */
614 void
error(char * s,...)615 error(char *s, ...)
616 {
617
618 nerrors++;
619 fprint(2, "\n fatal error:");
620 fprint(2, s, (&s)[1]);
621 fprint(2, ", %s:%d\n", infile, lineno);
622 if(!fatfl)
623 return;
624 summary();
625 cleantmp();
626 exits("error");
627 }
628
629 /*
630 * set elements 0 through n-1 to c
631 */
632 void
aryfil(int * v,int n,int c)633 aryfil(int *v, int n, int c)
634 {
635 int i;
636
637 for(i=0; i<n; i++)
638 v[i] = c;
639 }
640
641 /*
642 * set a to the union of a and b
643 * return 1 if b is not a subset of a, 0 otherwise
644 */
645 int
setunion(int * a,int * b)646 setunion(int *a, int *b)
647 {
648 int i, x, sub;
649
650 sub = 0;
651 SETLOOP(i) {
652 x = *a;
653 *a |= *b;
654 if(*a != x)
655 sub = 1;
656 a++;
657 b++;
658 }
659 return sub;
660 }
661
662 void
prlook(Lkset * p)663 prlook(Lkset* p)
664 {
665 int j, *pp;
666
667 pp = p->lset;
668 if(pp == 0)
669 Bprint(foutput, "\tNULL");
670 else {
671 Bprint(foutput, " { ");
672 TLOOP(j)
673 if(BIT(pp,j))
674 Bprint(foutput, "%s ", symnam(j));
675 Bprint(foutput, "}");
676 }
677 }
678
679 /*
680 * compute an array with the beginnings of productions yielding given nonterminals
681 * The array pres points to these lists
682 * the array pyield has the lists: the total size is only NPROD+1
683 */
684 void
cpres(void)685 cpres(void)
686 {
687 int c, j, i, **pmem;
688 static int *pyield[NPROD];
689
690 pmem = pyield;
691 NTLOOP(i) {
692 c = i+NTBASE;
693 pres[i] = pmem;
694 fatfl = 0; /* make undefined symbols nonfatal */
695 PLOOP(0, j)
696 if(*prdptr[j] == c)
697 *pmem++ = prdptr[j]+1;
698 if(pres[i] == pmem)
699 error("nonterminal %s not defined!", nontrst[i].name);
700 }
701 pres[i] = pmem;
702 fatfl = 1;
703 if(nerrors) {
704 summary();
705 cleantmp();
706 exits("error");
707 }
708 if(pmem != &pyield[nprod])
709 error("internal Yacc error: pyield %d", pmem-&pyield[nprod]);
710 }
711
712 /*
713 * compute an array with the first of nonterminals
714 */
715 void
cpfir(void)716 cpfir(void)
717 {
718 int *p, **s, i, **t, ch, changes;
719
720 zzcwp = &wsets[nnonter];
721 NTLOOP(i) {
722 aryfil(wsets[i].ws.lset, tbitset, 0);
723 t = pres[i+1];
724 /* initially fill the sets */
725 for(s=pres[i]; s<t; ++s)
726 for(p = *s; (ch = *p) > 0; ++p) {
727 if(ch < NTBASE) {
728 SETBIT(wsets[i].ws.lset, ch);
729 break;
730 }
731 if(!pempty[ch-NTBASE])
732 break;
733 }
734 }
735
736 /* now, reflect transitivity */
737 changes = 1;
738 while(changes) {
739 changes = 0;
740 NTLOOP(i) {
741 t = pres[i+1];
742 for(s = pres[i]; s < t; ++s)
743 for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) {
744 changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset);
745 if(!pempty[ch])
746 break;
747 }
748 }
749 }
750
751 NTLOOP(i)
752 pfirst[i] = flset(&wsets[i].ws);
753 if(!indebug)
754 return;
755 if(foutput != 0)
756 NTLOOP(i) {
757 Bprint(foutput, "\n%s: ", nontrst[i].name);
758 prlook(pfirst[i]);
759 Bprint(foutput, " %d\n", pempty[i]);
760 }
761 }
762
763 /*
764 * sorts last state,and sees if it equals earlier ones. returns state number
765 */
766 int
state(int c)767 state(int c)
768 {
769 Item *p1, *p2, *k, *l, *q1, *q2;
770 int size1, size2, i;
771
772 p1 = pstate[nstate];
773 p2 = pstate[nstate+1];
774 if(p1 == p2)
775 return 0; /* null state */
776 /* sort the items */
777 for(k = p2-1; k > p1; k--) /* make k the biggest */
778 for(l = k-1; l >= p1; --l)
779 if(l->pitem > k->pitem) {
780 int *s;
781 Lkset *ss;
782
783 s = k->pitem;
784 k->pitem = l->pitem;
785 l->pitem = s;
786 ss = k->look;
787 k->look = l->look;
788 l->look = ss;
789 }
790 size1 = p2 - p1; /* size of state */
791
792 for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) {
793 /* get ith state */
794 q1 = pstate[i];
795 q2 = pstate[i+1];
796 size2 = q2 - q1;
797 if(size1 != size2)
798 continue;
799 k = p1;
800 for(l = q1; l < q2; l++) {
801 if(l->pitem != k->pitem)
802 break;
803 k++;
804 }
805 if(l != q2)
806 continue;
807 /* found it */
808 pstate[nstate+1] = pstate[nstate]; /* delete last state */
809 /* fix up lookaheads */
810 if(nolook)
811 return i;
812 for(l = q1, k = p1; l < q2; ++l, ++k ) {
813 int s;
814
815 SETLOOP(s)
816 clset.lset[s] = l->look->lset[s];
817 if(setunion(clset.lset, k->look->lset)) {
818 tystate[i] = MUSTDO;
819 /* register the new set */
820 l->look = flset( &clset );
821 }
822 }
823 return i;
824 }
825 /* state is new */
826 if(nolook)
827 error("yacc state/nolook error");
828 pstate[nstate+2] = p2;
829 if(nstate+1 >= NSTATES)
830 error("too many states");
831 if(c >= NTBASE) {
832 mstates[nstate] = ntstates[c-NTBASE];
833 ntstates[c-NTBASE] = nstate;
834 } else {
835 mstates[nstate] = tstates[c];
836 tstates[c] = nstate;
837 }
838 tystate[nstate] = MUSTDO;
839 return nstate++;
840 }
841
842 void
putitem(int * ptr,Lkset * lptr)843 putitem(int *ptr, Lkset *lptr)
844 {
845 Item *j;
846
847 if(pidebug && foutput != 0)
848 Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate);
849 j = pstate[nstate+1];
850 j->pitem = ptr;
851 if(!nolook)
852 j->look = flset(lptr);
853 pstate[nstate+1] = ++j;
854 if((int*)j > zzmemsz) {
855 zzmemsz = (int*)j;
856 if(zzmemsz >= &mem0[MEMSIZE])
857 error("out of state space");
858 }
859 }
860
861 /*
862 * mark nonterminals which derive the empty string
863 * also, look for nonterminals which don't derive any token strings
864 */
865 void
cempty(void)866 cempty(void)
867 {
868
869 int i, *p;
870
871 /* first, use the array pempty to detect productions that can never be reduced */
872 /* set pempty to WHONOWS */
873 aryfil(pempty, nnonter+1, WHOKNOWS);
874
875 /* now, look at productions, marking nonterminals which derive something */
876 more:
877 PLOOP(0, i) {
878 if(pempty[*prdptr[i] - NTBASE])
879 continue;
880 for(p = prdptr[i]+1; *p >= 0; ++p)
881 if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS)
882 break;
883 /* production can be derived */
884 if(*p < 0) {
885 pempty[*prdptr[i]-NTBASE] = OK;
886 goto more;
887 }
888 }
889
890 /* now, look at the nonterminals, to see if they are all OK */
891 NTLOOP(i) {
892 /* the added production rises or falls as the start symbol ... */
893 if(i == 0)
894 continue;
895 if(pempty[i] != OK) {
896 fatfl = 0;
897 error("nonterminal %s never derives any token string", nontrst[i].name);
898 }
899 }
900
901 if(nerrors) {
902 summary();
903 cleantmp();
904 exits("error");
905 }
906
907 /* now, compute the pempty array, to see which nonterminals derive the empty string */
908 /* set pempty to WHOKNOWS */
909 aryfil( pempty, nnonter+1, WHOKNOWS);
910
911 /* loop as long as we keep finding empty nonterminals */
912
913 again:
914 PLOOP(1, i) {
915 /* not known to be empty */
916 if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) {
917 for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p)
918 ;
919 /* we have a nontrivially empty nonterminal */
920 if(*p < 0) {
921 pempty[*prdptr[i]-NTBASE] = EMPTY;
922 /* got one ... try for another */
923 goto again;
924 }
925 }
926 }
927 }
928
929 /*
930 * generate the states
931 */
932 void
stagen(void)933 stagen(void)
934 {
935
936 int c, i, j, more;
937 Wset *p, *q;
938
939 /* initialize */
940 nstate = 0;
941
942 /* THIS IS FUNNY from the standpoint of portability
943 * it represents the magic moment when the mem0 array, which has
944 * been holding the productions, starts to hold item pointers, of a
945 * different type...
946 * someday, alloc should be used to allocate all this stuff... for now, we
947 * accept that if pointers don't fit in integers, there is a problem...
948 */
949
950 pstate[0] = pstate[1] = (Item*)mem;
951 aryfil(clset.lset, tbitset, 0);
952 putitem(prdptr[0]+1, &clset);
953 tystate[0] = MUSTDO;
954 nstate = 1;
955 pstate[2] = pstate[1];
956
957 aryfil(amem, ACTSIZE, 0);
958
959 /* now, the main state generation loop */
960 for(more=1; more;) {
961 more = 0;
962 SLOOP(i) {
963 if(tystate[i] != MUSTDO)
964 continue;
965 tystate[i] = DONE;
966 aryfil(temp1, nnonter+1, 0);
967 /* take state i, close it, and do gotos */
968 closure(i);
969 /* generate goto's */
970 WSLOOP(wsets, p) {
971 if(p->flag)
972 continue;
973 p->flag = 1;
974 c = *(p->pitem);
975 if(c <= 1) {
976 if(pstate[i+1]-pstate[i] <= p-wsets)
977 tystate[i] = MUSTLOOKAHEAD;
978 continue;
979 }
980 /* do a goto on c */
981 WSLOOP(p, q)
982 /* this item contributes to the goto */
983 if(c == *(q->pitem)) {
984 putitem(q->pitem+1, &q->ws);
985 q->flag = 1;
986 }
987 if(c < NTBASE)
988 state(c); /* register new state */
989 else
990 temp1[c-NTBASE] = state(c);
991 }
992 if(gsdebug && foutput != 0) {
993 Bprint(foutput, "%d: ", i);
994 NTLOOP(j)
995 if(temp1[j])
996 Bprint(foutput, "%s %d, ",
997 nontrst[j].name, temp1[j]);
998 Bprint(foutput, "\n");
999 }
1000 indgo[i] = apack(&temp1[1], nnonter-1) - 1;
1001 /* do some more */
1002 more = 1;
1003 }
1004 }
1005 }
1006
1007 /*
1008 * generate the closure of state i
1009 */
1010 void
closure(int i)1011 closure(int i)
1012 {
1013
1014 Wset *u, *v;
1015 Item *p, *q;
1016 int c, ch, work, k, *pi, **s, **t;
1017
1018 zzclose++;
1019
1020 /* first, copy kernel of state i to wsets */
1021 cwp = wsets;
1022 ITMLOOP(i, p, q) {
1023 cwp->pitem = p->pitem;
1024 cwp->flag = 1; /* this item must get closed */
1025 SETLOOP(k)
1026 cwp->ws.lset[k] = p->look->lset[k];
1027 WSBUMP(cwp);
1028 }
1029
1030 /* now, go through the loop, closing each item */
1031 work = 1;
1032 while(work) {
1033 work = 0;
1034 WSLOOP(wsets, u) {
1035 if(u->flag == 0)
1036 continue;
1037 /* dot is before c */
1038 c = *(u->pitem);
1039 if(c < NTBASE) {
1040 u->flag = 0;
1041 /* only interesting case is where . is before nonterminal */
1042 continue;
1043 }
1044
1045 /* compute the lookahead */
1046 aryfil(clset.lset, tbitset, 0);
1047
1048 /* find items involving c */
1049 WSLOOP(u, v)
1050 if(v->flag == 1 && *(pi=v->pitem) == c) {
1051 v->flag = 0;
1052 if(nolook)
1053 continue;
1054 while((ch = *++pi) > 0) {
1055 /* terminal symbol */
1056 if(ch < NTBASE) {
1057 SETBIT(clset.lset, ch);
1058 break;
1059 }
1060 /* nonterminal symbol */
1061 setunion(clset.lset, pfirst[ch-NTBASE]->lset);
1062 if(!pempty[ch-NTBASE])
1063 break;
1064 }
1065 if(ch <= 0)
1066 setunion(clset.lset, v->ws.lset);
1067 }
1068
1069 /*
1070 * now loop over productions derived from c
1071 * c is now nonterminal number
1072 */
1073 c -= NTBASE;
1074 t = pres[c+1];
1075 for(s = pres[c]; s < t; ++s) {
1076 /*
1077 * put these items into the closure
1078 * is the item there
1079 */
1080 WSLOOP(wsets, v)
1081 /* yes, it is there */
1082 if(v->pitem == *s) {
1083 if(nolook)
1084 goto nexts;
1085 if(setunion(v->ws.lset, clset.lset))
1086 v->flag = work = 1;
1087 goto nexts;
1088 }
1089
1090 /* not there; make a new entry */
1091 if(cwp-wsets+1 >= WSETSIZE)
1092 error( "working set overflow");
1093 cwp->pitem = *s;
1094 cwp->flag = 1;
1095 if(!nolook) {
1096 work = 1;
1097 SETLOOP(k) cwp->ws.lset[k] = clset.lset[k];
1098 }
1099 WSBUMP(cwp);
1100
1101 nexts:;
1102 }
1103 }
1104 }
1105
1106 /* have computed closure; flags are reset; return */
1107 if(cwp > zzcwp)
1108 zzcwp = cwp;
1109 if(cldebug && foutput != 0) {
1110 Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook);
1111 WSLOOP(wsets, u) {
1112 if(u->flag)
1113 Bprint(foutput, "flag set!\n");
1114 u->flag = 0;
1115 Bprint(foutput, "\t%s", writem(u->pitem));
1116 prlook(&u->ws);
1117 Bprint(foutput, "\n");
1118 }
1119 }
1120 }
1121
1122 /*
1123 * decide if the lookahead set pointed to by p is known
1124 * return pointer to a perminent location for the set
1125 */
1126 Lkset*
flset(Lkset * p)1127 flset(Lkset *p)
1128 {
1129 Lkset *q;
1130 int *u, *v, *w, j;
1131
1132 for(q = &lkst[nlset]; q-- > lkst;) {
1133 u = p->lset;
1134 v = q->lset;
1135 w = &v[tbitset];
1136 while(v < w)
1137 if(*u++ != *v++)
1138 goto more;
1139 /* we have matched */
1140 return q;
1141 more:;
1142 }
1143 /* add a new one */
1144 q = &lkst[nlset++];
1145 if(nlset >= LSETSIZE)
1146 error("too many lookahead sets");
1147 SETLOOP(j)
1148 q->lset[j] = p->lset[j];
1149 return q;
1150 }
1151
1152 void
cleantmp(void)1153 cleantmp(void)
1154 {
1155 ZAPFILE(actname);
1156 ZAPFILE(tempname);
1157 }
1158
1159 void
intr(void)1160 intr(void)
1161 {
1162 cleantmp();
1163 exits("interrupted");
1164 }
1165
1166 void
usage(void)1167 usage(void)
1168 {
1169 fprint(2, "usage: yacc [-Dn] [-vdS] [-o outputfile] [-s stem] grammar\n");
1170 exits("usage");
1171 }
1172
1173 void
setup(int argc,char * argv[])1174 setup(int argc, char *argv[])
1175 {
1176 long c, t;
1177 int i, j, lev, ty, ytab, *p;
1178 int vflag, dflag, stem;
1179 char actnm[8], *stemc, *s, dirbuf[128];
1180
1181 ytab = 0;
1182 vflag = 0;
1183 dflag = 0;
1184 stem = 0;
1185 stemc = "y";
1186 foutput = 0;
1187 fdefine = 0;
1188 fdebug = 0;
1189 ARGBEGIN{
1190 case 'v':
1191 case 'V':
1192 vflag++;
1193 break;
1194 case 'D':
1195 yydebug = EARGF(usage());
1196 break;
1197 case 'd':
1198 dflag++;
1199 break;
1200 case 'o':
1201 ytab++;
1202 ytabc = EARGF(usage());
1203 break;
1204 case 's':
1205 stem++;
1206 stemc = ARGF();
1207 break;
1208 case 'S':
1209 parser = PARSERS;
1210 break;
1211 default:
1212 error("illegal option: %c", ARGC());
1213 }ARGEND
1214 openup(stemc, dflag, vflag, ytab, ytabc);
1215
1216 ftemp = Bopen(tempname = mktemp(ttempname), OWRITE);
1217 faction = Bopen(actname = mktemp(tactname), OWRITE);
1218 if(ftemp == 0 || faction == 0)
1219 error("cannot open temp file");
1220 if(argc < 1)
1221 error("no input file");
1222 infile = argv[0];
1223 if(infile[0] != '/' && getwd(dirbuf, sizeof dirbuf)!=nil){
1224 i = strlen(infile)+1+strlen(dirbuf)+1+10;
1225 s = malloc(i);
1226 if(s != nil){
1227 snprint(s, i, "%s/%s", dirbuf, infile);
1228 cleanname(s);
1229 infile = s;
1230 }
1231 }
1232 finput = Bopen(infile, OREAD);
1233 if(finput == 0)
1234 error("cannot open '%s'", argv[0]);
1235 cnamp = cnames;
1236
1237 defin(0, "$end");
1238 extval = PRIVATE; /* tokens start in unicode 'private use' */
1239 defin(0, "error");
1240 defin(1, "$accept");
1241 defin(0, "$unk");
1242 mem = mem0;
1243 i = 0;
1244
1245 for(t = gettok(); t != MARK && t != ENDFILE;)
1246 switch(t) {
1247 case ';':
1248 t = gettok();
1249 break;
1250
1251 case START:
1252 if(gettok() != IDENTIFIER)
1253 error("bad %%start construction");
1254 start = chfind(1, tokname);
1255 t = gettok();
1256 continue;
1257
1258 case TYPEDEF:
1259 if(gettok() != TYPENAME)
1260 error("bad syntax in %%type");
1261 ty = numbval;
1262 for(;;) {
1263 t = gettok();
1264 switch(t) {
1265 case IDENTIFIER:
1266 if((t=chfind(1, tokname)) < NTBASE) {
1267 j = TYPE(toklev[t]);
1268 if(j != 0 && j != ty)
1269 error("type redeclaration of token %s",
1270 tokset[t].name);
1271 else
1272 SETTYPE(toklev[t], ty);
1273 } else {
1274 j = nontrst[t-NTBASE].value;
1275 if(j != 0 && j != ty)
1276 error("type redeclaration of nonterminal %s",
1277 nontrst[t-NTBASE].name );
1278 else
1279 nontrst[t-NTBASE].value = ty;
1280 }
1281 case ',':
1282 continue;
1283 case ';':
1284 t = gettok();
1285 default:
1286 break;
1287 }
1288 break;
1289 }
1290 continue;
1291
1292 case UNION:
1293 /* copy the union declaration to the output */
1294 cpyunion();
1295 t = gettok();
1296 continue;
1297
1298 case LEFT:
1299 case BINARY:
1300 case RIGHT:
1301 i++;
1302
1303 case TERM:
1304 /* nonzero means new prec. and assoc. */
1305 lev = t-TERM;
1306 ty = 0;
1307
1308 /* get identifiers so defined */
1309 t = gettok();
1310
1311 /* there is a type defined */
1312 if(t == TYPENAME) {
1313 ty = numbval;
1314 t = gettok();
1315 }
1316 for(;;) {
1317 switch(t) {
1318 case ',':
1319 t = gettok();
1320 continue;
1321
1322 case ';':
1323 break;
1324
1325 case IDENTIFIER:
1326 j = chfind(0, tokname);
1327 if(j >= NTBASE)
1328 error("%s defined earlier as nonterminal", tokname);
1329 if(lev) {
1330 if(ASSOC(toklev[j]))
1331 error("redeclaration of precedence of %s", tokname);
1332 SETASC(toklev[j], lev);
1333 SETPLEV(toklev[j], i);
1334 }
1335 if(ty) {
1336 if(TYPE(toklev[j]))
1337 error("redeclaration of type of %s", tokname);
1338 SETTYPE(toklev[j],ty);
1339 }
1340 t = gettok();
1341 if(t == NUMBER) {
1342 tokset[j].value = numbval;
1343 if(j < ndefout && j > 3)
1344 error("please define type number of %s earlier",
1345 tokset[j].name);
1346 t = gettok();
1347 }
1348 continue;
1349 }
1350 break;
1351 }
1352 continue;
1353
1354 case LCURLY:
1355 defout(0);
1356 cpycode();
1357 t = gettok();
1358 continue;
1359
1360 default:
1361 error("syntax error");
1362 }
1363 if(t == ENDFILE)
1364 error("unexpected EOF before %%");
1365
1366 /* t is MARK */
1367 Bprint(ftable, "extern int yyerrflag;\n");
1368 Bprint(ftable, "#ifndef YYMAXDEPTH\n");
1369 Bprint(ftable, "#define YYMAXDEPTH 150\n");
1370 Bprint(ftable, "#endif\n" );
1371 if(!ntypes) {
1372 Bprint(ftable, "#ifndef YYSTYPE\n");
1373 Bprint(ftable, "#define YYSTYPE int\n");
1374 Bprint(ftable, "#endif\n");
1375 }
1376 Bprint(ftable, "YYSTYPE yylval;\n");
1377 Bprint(ftable, "YYSTYPE yyval;\n");
1378
1379 prdptr[0] = mem;
1380
1381 /* added production */
1382 *mem++ = NTBASE;
1383
1384 /* if start is 0, we will overwrite with the lhs of the first rule */
1385 *mem++ = start;
1386 *mem++ = 1;
1387 *mem++ = 0;
1388 prdptr[1] = mem;
1389 while((t=gettok()) == LCURLY)
1390 cpycode();
1391 if(t != IDENTCOLON)
1392 error("bad syntax on first rule");
1393
1394 if(!start)
1395 prdptr[0][1] = chfind(1, tokname);
1396
1397 /* read rules */
1398 while(t != MARK && t != ENDFILE) {
1399 /* process a rule */
1400 rlines[nprod] = lineno;
1401 if(t == '|')
1402 *mem++ = *prdptr[nprod-1];
1403 else
1404 if(t == IDENTCOLON) {
1405 *mem = chfind(1, tokname);
1406 if(*mem < NTBASE)
1407 error("token illegal on LHS of grammar rule");
1408 mem++;
1409 } else
1410 error("illegal rule: missing semicolon or | ?");
1411 /* read rule body */
1412 t = gettok();
1413
1414 more_rule:
1415 while(t == IDENTIFIER) {
1416 *mem = chfind(1, tokname);
1417 if(*mem < NTBASE)
1418 levprd[nprod] = toklev[*mem];
1419 mem++;
1420 t = gettok();
1421 }
1422 if(t == PREC) {
1423 if(gettok() != IDENTIFIER)
1424 error("illegal %%prec syntax");
1425 j = chfind(2, tokname);
1426 if(j >= NTBASE)
1427 error("nonterminal %s illegal after %%prec",
1428 nontrst[j-NTBASE].name);
1429 levprd[nprod] = toklev[j];
1430 t = gettok();
1431 }
1432 if(t == '=') {
1433 levprd[nprod] |= ACTFLAG;
1434 Bprint(faction, "\ncase %d:", nprod);
1435 cpyact(mem-prdptr[nprod]-1);
1436 Bprint(faction, " break;");
1437 if((t=gettok()) == IDENTIFIER) {
1438
1439 /* action within rule... */
1440 sprint(actnm, "$$%d", nprod);
1441
1442 /* make it a nonterminal */
1443 j = chfind(1, actnm);
1444
1445 /*
1446 * the current rule will become rule number nprod+1
1447 * move the contents down, and make room for the null
1448 */
1449 for(p = mem; p >= prdptr[nprod]; --p)
1450 p[2] = *p;
1451 mem += 2;
1452
1453 /* enter null production for action */
1454 p = prdptr[nprod];
1455 *p++ = j;
1456 *p++ = -nprod;
1457
1458 /* update the production information */
1459 levprd[nprod+1] = levprd[nprod] & ~ACTFLAG;
1460 levprd[nprod] = ACTFLAG;
1461 if(++nprod >= NPROD)
1462 error("more than %d rules", NPROD);
1463 prdptr[nprod] = p;
1464
1465 /* make the action appear in the original rule */
1466 *mem++ = j;
1467
1468 /* get some more of the rule */
1469 goto more_rule;
1470 }
1471 }
1472
1473 while(t == ';')
1474 t = gettok();
1475 *mem++ = -nprod;
1476
1477 /* check that default action is reasonable */
1478 if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].value) {
1479
1480 /* no explicit action, LHS has value */
1481 int tempty;
1482
1483 tempty = prdptr[nprod][1];
1484 if(tempty < 0)
1485 error("must return a value, since LHS has a type");
1486 else
1487 if(tempty >= NTBASE)
1488 tempty = nontrst[tempty-NTBASE].value;
1489 else
1490 tempty = TYPE(toklev[tempty]);
1491 if(tempty != nontrst[*prdptr[nprod]-NTBASE].value)
1492 error("default action causes potential type clash");
1493 }
1494 nprod++;
1495 if(nprod >= NPROD)
1496 error("more than %d rules", NPROD);
1497 prdptr[nprod] = mem;
1498 levprd[nprod] = 0;
1499 }
1500
1501 /* end of all rules */
1502 defout(1);
1503
1504 finact();
1505 if(t == MARK) {
1506 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1507 while((c=Bgetrune(finput)) != Beof)
1508 Bputrune(ftable, c);
1509 }
1510 Bterm(finput);
1511 }
1512
1513 /*
1514 * finish action routine
1515 */
1516 void
finact(void)1517 finact(void)
1518 {
1519
1520 Bterm(faction);
1521 Bprint(ftable, "#define YYEOFCODE %d\n", 1);
1522 Bprint(ftable, "#define YYERRCODE %d\n", 2);
1523 }
1524
1525 /*
1526 * define s to be a terminal if t=0
1527 * or a nonterminal if t=1
1528 */
1529 int
defin(int nt,char * s)1530 defin(int nt, char *s)
1531 {
1532 int val;
1533 Rune rune;
1534
1535 val = 0;
1536 if(nt) {
1537 nnonter++;
1538 if(nnonter >= NNONTERM)
1539 error("too many nonterminals, limit %d",NNONTERM);
1540 nontrst[nnonter].name = cstash(s);
1541 return NTBASE + nnonter;
1542 }
1543
1544 /* must be a token */
1545 ntokens++;
1546 if(ntokens >= NTERMS)
1547 error("too many terminals, limit %d", NTERMS);
1548 tokset[ntokens].name = cstash(s);
1549
1550 /* establish value for token */
1551 /* single character literal */
1552 if(s[0] == ' ') {
1553 val = chartorune(&rune, &s[1]);
1554 if(s[val+1] == 0) {
1555 val = rune;
1556 goto out;
1557 }
1558 }
1559
1560 /* escape sequence */
1561 if(s[0] == ' ' && s[1] == '\\') {
1562 if(s[3] == 0) {
1563 /* single character escape sequence */
1564 switch(s[2]) {
1565 case 'n': val = '\n'; break;
1566 case 'r': val = '\r'; break;
1567 case 'b': val = '\b'; break;
1568 case 't': val = '\t'; break;
1569 case 'f': val = '\f'; break;
1570 case '\'': val = '\''; break;
1571 case '"': val = '"'; break;
1572 case '\\': val = '\\'; break;
1573 default: error("invalid escape");
1574 }
1575 goto out;
1576 }
1577
1578 /* \nnn sequence */
1579 if(s[2] >= '0' && s[2] <= '7') {
1580 if(s[3] < '0' ||
1581 s[3] > '7' ||
1582 s[4] < '0' ||
1583 s[4] > '7' ||
1584 s[5] != 0)
1585 error("illegal \\nnn construction");
1586 val = 64*s[2] + 8*s[3] + s[4] - 73*'0';
1587 if(val == 0)
1588 error("'\\000' is illegal");
1589 goto out;
1590 }
1591 error("unknown escape");
1592 }
1593 val = extval++;
1594
1595 out:
1596 tokset[ntokens].value = val;
1597 toklev[ntokens] = 0;
1598 return ntokens;
1599 }
1600
1601 /*
1602 * write out the defines (at the end of the declaration section)
1603 */
1604 void
defout(int last)1605 defout(int last)
1606 {
1607 int i, c;
1608 char sar[NAMESIZE+10];
1609
1610 for(i=ndefout; i<=ntokens; i++) {
1611 /* non-literals */
1612 c = tokset[i].name[0];
1613 if(c != ' ' && c != '$') {
1614 Bprint(ftable, "#define %s %d\n",
1615 tokset[i].name, tokset[i].value);
1616 if(fdefine)
1617 Bprint(fdefine, "#define\t%s\t%d\n",
1618 tokset[i].name, tokset[i].value);
1619 }
1620 }
1621 ndefout = ntokens+1;
1622 if(last && fdebug) {
1623 Bprint(fdebug, "char* yytoknames[] =\n{\n");
1624 TLOOP(i) {
1625 if(tokset[i].name) {
1626 chcopy(sar, tokset[i].name);
1627 Bprint(fdebug, "\t\"%s\",\n", sar);
1628 continue;
1629 }
1630 Bprint(fdebug, "\t0,\n");
1631 }
1632 Bprint(fdebug, "};\n");
1633 }
1634 }
1635
1636 char*
cstash(char * s)1637 cstash(char *s)
1638 {
1639 char *temp;
1640
1641 temp = cnamp;
1642 do {
1643 if(cnamp >= &cnames[cnamsz])
1644 error("too many characters in id's and literals");
1645 else
1646 *cnamp++ = *s;
1647 } while(*s++);
1648 return temp;
1649 }
1650
1651 long
gettok(void)1652 gettok(void)
1653 {
1654 long c;
1655 Rune rune;
1656 int i, base, match, reserve;
1657 static int peekline;
1658
1659 begin:
1660 reserve = 0;
1661 lineno += peekline;
1662 peekline = 0;
1663 c = Bgetrune(finput);
1664 while(c == ' ' || c == '\n' || c == '\t' || c == '\f') {
1665 if(c == '\n')
1666 lineno++;
1667 c = Bgetrune(finput);
1668 }
1669
1670 /* skip comment */
1671 if(c == '/') {
1672 lineno += skipcom();
1673 goto begin;
1674 }
1675 switch(c) {
1676 case Beof:
1677 return ENDFILE;
1678
1679 case '{':
1680 Bungetrune(finput);
1681 return '=';
1682
1683 case '<':
1684 /* get, and look up, a type name (union member name) */
1685 i = 0;
1686 while((c=Bgetrune(finput)) != '>' && c >= 0 && c != '\n') {
1687 rune = c;
1688 c = runetochar(&tokname[i], &rune);
1689 if(i < NAMESIZE)
1690 i += c;
1691 }
1692 if(c != '>')
1693 error("unterminated < ... > clause");
1694 tokname[i] = 0;
1695 for(i=1; i<=ntypes; i++)
1696 if(!strcmp(typeset[i], tokname)) {
1697 numbval = i;
1698 return TYPENAME;
1699 }
1700 ntypes++;
1701 numbval = ntypes;
1702 typeset[numbval] = cstash(tokname);
1703 return TYPENAME;
1704
1705 case '"':
1706 case '\'':
1707 match = c;
1708 tokname[0] = ' ';
1709 i = 1;
1710 for(;;) {
1711 c = Bgetrune(finput);
1712 if(c == '\n' || c <= 0)
1713 error("illegal or missing ' or \"" );
1714 if(c == '\\') {
1715 tokname[i] = '\\';
1716 if(i < NAMESIZE)
1717 i++;
1718 c = Bgetrune(finput);
1719 } else
1720 if(c == match)
1721 break;
1722 rune = c;
1723 c = runetochar(&tokname[i], &rune);
1724 if(i < NAMESIZE)
1725 i += c;
1726 }
1727 break;
1728
1729 case '%':
1730 case '\\':
1731 switch(c = Bgetrune(finput)) {
1732 case '0': return TERM;
1733 case '<': return LEFT;
1734 case '2': return BINARY;
1735 case '>': return RIGHT;
1736 case '%':
1737 case '\\': return MARK;
1738 case '=': return PREC;
1739 case '{': return LCURLY;
1740 default: reserve = 1;
1741 }
1742
1743 default:
1744 /* number */
1745 if(isdigit(c)) {
1746 numbval = c-'0';
1747 base = (c=='0')? 8: 10;
1748 for(c = Bgetrune(finput); isdigit(c); c = Bgetrune(finput))
1749 numbval = numbval*base + (c-'0');
1750 Bungetrune(finput);
1751 return NUMBER;
1752 }
1753 if(islower(c) || isupper(c) || c=='_' || c=='.' || c=='$') {
1754 i = 0;
1755 while(islower(c) || isupper(c) || isdigit(c) ||
1756 c == '-' || c=='_' || c=='.' || c=='$') {
1757 if(reserve && isupper(c))
1758 c += 'a'-'A';
1759 rune = c;
1760 c = runetochar(&tokname[i], &rune);
1761 if(i < NAMESIZE)
1762 i += c;
1763 c = Bgetrune(finput);
1764 }
1765 } else
1766 return c;
1767 Bungetrune(finput);
1768 }
1769 tokname[i] = 0;
1770
1771 /* find a reserved word */
1772 if(reserve) {
1773 for(c=0; resrv[c].name; c++)
1774 if(strcmp(tokname, resrv[c].name) == 0)
1775 return resrv[c].value;
1776 error("invalid escape, or illegal reserved word: %s", tokname);
1777 }
1778
1779 /* look ahead to distinguish IDENTIFIER from IDENTCOLON */
1780 c = Bgetrune(finput);
1781 while(c == ' ' || c == '\t'|| c == '\n' || c == '\f' || c == '/') {
1782 if(c == '\n')
1783 peekline++;
1784 /* look for comments */
1785 if(c == '/')
1786 peekline += skipcom();
1787 c = Bgetrune(finput);
1788 }
1789 if(c == ':')
1790 return IDENTCOLON;
1791 Bungetrune(finput);
1792 return IDENTIFIER;
1793 }
1794
1795 /*
1796 * determine the type of a symbol
1797 */
1798 int
fdtype(int t)1799 fdtype(int t)
1800 {
1801 int v;
1802
1803 if(t >= NTBASE)
1804 v = nontrst[t-NTBASE].value;
1805 else
1806 v = TYPE(toklev[t]);
1807 if(v <= 0)
1808 error("must specify type for %s", (t>=NTBASE)?
1809 nontrst[t-NTBASE].name: tokset[t].name);
1810 return v;
1811 }
1812
1813 int
chfind(int t,char * s)1814 chfind(int t, char *s)
1815 {
1816 int i;
1817
1818 if(s[0] == ' ')
1819 t = 0;
1820 TLOOP(i)
1821 if(!strcmp(s, tokset[i].name))
1822 return i;
1823 NTLOOP(i)
1824 if(!strcmp(s, nontrst[i].name))
1825 return NTBASE+i;
1826
1827 /* cannot find name */
1828 if(t > 1)
1829 error("%s should have been defined earlier", s);
1830 return defin(t, s);
1831 }
1832
1833 /*
1834 * copy the union declaration to the output, and the define file if present
1835 */
1836 void
cpyunion(void)1837 cpyunion(void)
1838 {
1839 long c;
1840 int level;
1841
1842 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1843 Bprint(ftable, "typedef union ");
1844 if(fdefine != 0)
1845 Bprint(fdefine, "\ntypedef union ");
1846
1847 level = 0;
1848 for(;;) {
1849 if((c=Bgetrune(finput)) == Beof)
1850 error("EOF encountered while processing %%union");
1851 Bputrune(ftable, c);
1852 if(fdefine != 0)
1853 Bputrune(fdefine, c);
1854 switch(c) {
1855 case '\n':
1856 lineno++;
1857 break;
1858 case '{':
1859 level++;
1860 break;
1861 case '}':
1862 level--;
1863
1864 /* we are finished copying */
1865 if(level == 0) {
1866 Bprint(ftable, " YYSTYPE;\n");
1867 if(fdefine != 0)
1868 Bprint(fdefine, "\tYYSTYPE;\nextern\tYYSTYPE\tyylval;\n");
1869 return;
1870 }
1871 }
1872 }
1873 }
1874
1875 /*
1876 * copies code between \{ and \}
1877 */
1878 void
cpycode(void)1879 cpycode(void)
1880 {
1881
1882 long c;
1883
1884 c = Bgetrune(finput);
1885 if(c == '\n') {
1886 c = Bgetrune(finput);
1887 lineno++;
1888 }
1889 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1890 while(c != Beof) {
1891 if(c == '\\') {
1892 if((c=Bgetrune(finput)) == '}')
1893 return;
1894 Bputc(ftable, '\\');
1895 }
1896 if(c == '%') {
1897 if((c=Bgetrune(finput)) == '}')
1898 return;
1899 Bputc(ftable, '%');
1900 }
1901 Bputrune(ftable, c);
1902 if(c == '\n')
1903 lineno++;
1904 c = Bgetrune(finput);
1905 }
1906 error("eof before %%}");
1907 }
1908
1909 /*
1910 * skip over comments
1911 * skipcom is called after reading a '/'
1912 */
1913 int
skipcom(void)1914 skipcom(void)
1915 {
1916 long c;
1917 int i;
1918
1919 /* i is the number of lines skipped */
1920 i = 0;
1921 c = Bgetrune(finput);
1922 if(c == '/'){ /* C++ //: skip to end of line */
1923 while((c = Bgetrune(finput)) != Beof)
1924 if(c == '\n')
1925 return 1;
1926 }else if(c == '*'){ /* normal C comment */
1927 while((c = Bgetrune(finput)) != Beof) {
1928 while(c == '*')
1929 if((c = Bgetrune(finput)) == '/')
1930 return i;
1931 if(c == '\n')
1932 i++;
1933 }
1934 }else
1935 error("illegal comment");
1936
1937 error("EOF inside comment");
1938 return 0;
1939 }
1940
1941 /*
1942 * copy C action to the next ; or closing }
1943 */
1944 void
cpyact(int offset)1945 cpyact(int offset)
1946 {
1947 long c;
1948 int brac, match, j, s, fnd, tok;
1949
1950 Bprint(faction, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1951 brac = 0;
1952
1953 loop:
1954 c = Bgetrune(finput);
1955 swt:
1956 switch(c) {
1957 case ';':
1958 if(brac == 0) {
1959 Bputrune(faction, c);
1960 return;
1961 }
1962 goto lcopy;
1963
1964 case '{':
1965 brac++;
1966 goto lcopy;
1967
1968 case '$':
1969 s = 1;
1970 tok = -1;
1971 c = Bgetrune(finput);
1972
1973 /* type description */
1974 if(c == '<') {
1975 Bungetrune(finput);
1976 if(gettok() != TYPENAME)
1977 error("bad syntax on $<ident> clause");
1978 tok = numbval;
1979 c = Bgetrune(finput);
1980 }
1981 if(c == '$') {
1982 Bprint(faction, "yyval");
1983
1984 /* put out the proper tag... */
1985 if(ntypes) {
1986 if(tok < 0)
1987 tok = fdtype(*prdptr[nprod]);
1988 Bprint(faction, ".%s", typeset[tok]);
1989 }
1990 goto loop;
1991 }
1992 if(c == '-') {
1993 s = -s;
1994 c = Bgetrune(finput);
1995 }
1996 if(isdigit(c)) {
1997 j = 0;
1998 while(isdigit(c)) {
1999 j = j*10 + (c-'0');
2000 c = Bgetrune(finput);
2001 }
2002 Bungetrune(finput);
2003 j = j*s - offset;
2004 if(j > 0)
2005 error("Illegal use of $%d", j+offset);
2006
2007 dollar:
2008 Bprint(faction, "yypt[-%d].yyv", -j);
2009
2010 /* put out the proper tag */
2011 if(ntypes) {
2012 if(j+offset <= 0 && tok < 0)
2013 error("must specify type of $%d", j+offset);
2014 if(tok < 0)
2015 tok = fdtype(prdptr[nprod][j+offset]);
2016 Bprint(faction, ".%s", typeset[tok]);
2017 }
2018 goto loop;
2019 }
2020 if(isupper(c) || islower(c) || c == '_' || c == '.') {
2021 int tok; /* tok used oustide for type info */
2022
2023 /* look for $name */
2024 Bungetrune(finput);
2025 if(gettok() != IDENTIFIER)
2026 error("$ must be followed by an identifier");
2027 tok = chfind(2, tokname);
2028 if((c = Bgetrune(finput)) != '#') {
2029 Bungetrune(finput);
2030 fnd = -1;
2031 } else
2032 if(gettok() != NUMBER) {
2033 error("# must be followed by number");
2034 fnd = -1;
2035 } else
2036 fnd = numbval;
2037 for(j=1; j<=offset; ++j)
2038 if(tok == prdptr[nprod][j]) {
2039 if(--fnd <= 0) {
2040 j -= offset;
2041 goto dollar;
2042 }
2043 }
2044 error("$name or $name#number not found");
2045 }
2046 Bputc(faction, '$');
2047 if(s < 0 )
2048 Bputc(faction, '-');
2049 goto swt;
2050
2051 case '}':
2052 brac--;
2053 if(brac)
2054 goto lcopy;
2055 Bputrune(faction, c);
2056 return;
2057
2058 case '/':
2059 /* look for comments */
2060 Bputrune(faction, c);
2061 c = Bgetrune(finput);
2062 if(c != '*')
2063 goto swt;
2064
2065 /* it really is a comment; copy it */
2066 Bputrune(faction, c);
2067 c = Bgetrune(finput);
2068 while(c >= 0) {
2069 while(c == '*') {
2070 Bputrune(faction, c);
2071 if((c=Bgetrune(finput)) == '/')
2072 goto lcopy;
2073 }
2074 Bputrune(faction, c);
2075 if(c == '\n')
2076 lineno++;
2077 c = Bgetrune(finput);
2078 }
2079 error("EOF inside comment");
2080
2081 case '\'':
2082 /* character constant */
2083 match = '\'';
2084 goto string;
2085
2086 case '"':
2087 /* character string */
2088 match = '"';
2089
2090 string:
2091 Bputrune(faction, c);
2092 while(c = Bgetrune(finput)) {
2093 if(c == '\\') {
2094 Bputrune(faction, c);
2095 c = Bgetrune(finput);
2096 if(c == '\n')
2097 lineno++;
2098 } else
2099 if(c == match)
2100 goto lcopy;
2101 if(c == '\n')
2102 error("newline in string or char. const.");
2103 Bputrune(faction, c);
2104 }
2105 error("EOF in string or character constant");
2106
2107 case Beof:
2108 error("action does not terminate");
2109
2110 case '\n':
2111 lineno++;
2112 goto lcopy;
2113 }
2114
2115 lcopy:
2116 Bputrune(faction, c);
2117 goto loop;
2118 }
2119
2120 void
openup(char * stem,int dflag,int vflag,int ytab,char * ytabc)2121 openup(char *stem, int dflag, int vflag, int ytab, char *ytabc)
2122 {
2123 char buf[256];
2124
2125 if(vflag) {
2126 snprint(buf, sizeof buf, "%s.%s", stem, FILEU);
2127 foutput = Bopen(buf, OWRITE);
2128 if(foutput == 0)
2129 error("cannot open %s", buf);
2130 }
2131 if(yydebug) {
2132 snprint(buf, sizeof buf, "%s.%s", stem, FILEDEBUG);
2133 if((fdebug = Bopen(buf, OWRITE)) == 0)
2134 error("can't open %s", buf);
2135 }
2136 if(dflag) {
2137 snprint(buf, sizeof buf, "%s.%s", stem, FILED);
2138 fdefine = Bopen(buf, OWRITE);
2139 if(fdefine == 0)
2140 error("can't create %s", buf);
2141 }
2142 if(ytab == 0)
2143 snprint(buf, sizeof buf, "%s.%s", stem, OFILE);
2144 else
2145 strecpy(buf, buf+sizeof buf, ytabc);
2146 ftable = Bopen(buf, OWRITE);
2147 if(ftable == 0)
2148 error("cannot open table file %s", buf);
2149 }
2150
2151 /*
2152 * print the output for the states
2153 */
2154 void
output(void)2155 output(void)
2156 {
2157 int i, k, c;
2158 Wset *u, *v;
2159
2160 Bprint(ftable, "short yyexca[] =\n{");
2161 if(fdebug)
2162 Bprint(fdebug, "char* yystates[] =\n{\n");
2163
2164 /* output the stuff for state i */
2165 SLOOP(i) {
2166 nolook = tystate[i]!=MUSTLOOKAHEAD;
2167 closure(i);
2168
2169 /* output actions */
2170 nolook = 1;
2171 aryfil(temp1, ntokens+nnonter+1, 0);
2172 WSLOOP(wsets, u) {
2173 c = *(u->pitem);
2174 if(c > 1 && c < NTBASE && temp1[c] == 0) {
2175 WSLOOP(u, v)
2176 if(c == *(v->pitem))
2177 putitem(v->pitem+1, (Lkset*)0);
2178 temp1[c] = state(c);
2179 } else
2180 if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0)
2181 temp1[c+ntokens] = amem[indgo[i]+c];
2182 }
2183 if(i == 1)
2184 temp1[1] = ACCEPTCODE;
2185
2186 /* now, we have the shifts; look at the reductions */
2187 lastred = 0;
2188 WSLOOP(wsets, u) {
2189 c = *u->pitem;
2190
2191 /* reduction */
2192 if(c <= 0) {
2193 lastred = -c;
2194 TLOOP(k)
2195 if(BIT(u->ws.lset, k)) {
2196 if(temp1[k] == 0)
2197 temp1[k] = c;
2198 else
2199 if(temp1[k] < 0) { /* reduce/reduce conflict */
2200 if(foutput)
2201 Bprint(foutput,
2202 "\n%d: reduce/reduce conflict"
2203 " (red'ns %d and %d ) on %s",
2204 i, -temp1[k], lastred,
2205 symnam(k));
2206 if(-temp1[k] > lastred)
2207 temp1[k] = -lastred;
2208 zzrrconf++;
2209 } else
2210 /* potential shift/reduce conflict */
2211 precftn( lastred, k, i );
2212 }
2213 }
2214 }
2215 wract(i);
2216 }
2217
2218 if(fdebug)
2219 Bprint(fdebug, "};\n");
2220 Bprint(ftable, "};\n");
2221 Bprint(ftable, "#define YYNPROD %d\n", nprod);
2222 Bprint(ftable, "#define YYPRIVATE %d\n", PRIVATE);
2223 if(yydebug)
2224 Bprint(ftable, "#define yydebug %s\n", yydebug);
2225 }
2226
2227 /*
2228 * pack state i from temp1 into amem
2229 */
2230 int
apack(int * p,int n)2231 apack(int *p, int n)
2232 {
2233 int *pp, *qq, *rr, off, *q, *r;
2234
2235 /* we don't need to worry about checking because
2236 * we will only look at entries known to be there...
2237 * eliminate leading and trailing 0's
2238 */
2239
2240 q = p+n;
2241 for(pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off)
2242 ;
2243 /* no actions */
2244 if(pp > q)
2245 return 0;
2246 p = pp;
2247
2248 /* now, find a place for the elements from p to q, inclusive */
2249 r = &amem[ACTSIZE-1];
2250 for(rr = amem; rr <= r; rr++, off++) {
2251 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2252 if(*pp != 0)
2253 if(*pp != *qq && *qq != 0)
2254 goto nextk;
2255
2256 /* we have found an acceptable k */
2257 if(pkdebug && foutput != 0)
2258 Bprint(foutput, "off = %d, k = %d\n", off, (int)(rr-amem));
2259 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2260 if(*pp) {
2261 if(qq > r)
2262 error("action table overflow");
2263 if(qq > memp)
2264 memp = qq;
2265 *qq = *pp;
2266 }
2267 if(pkdebug && foutput != 0)
2268 for(pp = amem; pp <= memp; pp += 10) {
2269 Bprint(foutput, "\t");
2270 for(qq = pp; qq <= pp+9; qq++)
2271 Bprint(foutput, "%d ", *qq);
2272 Bprint(foutput, "\n");
2273 }
2274 return(off);
2275 nextk:;
2276 }
2277 error("no space in action table");
2278 return 0;
2279 }
2280
2281 /*
2282 * output the gotos for the nontermninals
2283 */
2284 void
go2out(void)2285 go2out(void)
2286 {
2287 int i, j, k, best, count, cbest, times;
2288
2289 /* mark begining of gotos */
2290 Bprint(ftemp, "$\n");
2291 for(i = 1; i <= nnonter; i++) {
2292 go2gen(i);
2293
2294 /* find the best one to make default */
2295 best = -1;
2296 times = 0;
2297
2298 /* is j the most frequent */
2299 for(j = 0; j <= nstate; j++) {
2300 if(tystate[j] == 0)
2301 continue;
2302 if(tystate[j] == best)
2303 continue;
2304
2305 /* is tystate[j] the most frequent */
2306 count = 0;
2307 cbest = tystate[j];
2308 for(k = j; k <= nstate; k++)
2309 if(tystate[k] == cbest)
2310 count++;
2311 if(count > times) {
2312 best = cbest;
2313 times = count;
2314 }
2315 }
2316
2317 /* best is now the default entry */
2318 zzgobest += times-1;
2319 for(j = 0; j <= nstate; j++)
2320 if(tystate[j] != 0 && tystate[j] != best) {
2321 Bprint(ftemp, "%d,%d,", j, tystate[j]);
2322 zzgoent++;
2323 }
2324
2325 /* now, the default */
2326 if(best == -1)
2327 best = 0;
2328 zzgoent++;
2329 Bprint(ftemp, "%d\n", best);
2330 }
2331 }
2332
2333 /*
2334 * output the gotos for nonterminal c
2335 */
2336 void
go2gen(int c)2337 go2gen(int c)
2338 {
2339 int i, work, cc;
2340 Item *p, *q;
2341
2342
2343 /* first, find nonterminals with gotos on c */
2344 aryfil(temp1, nnonter+1, 0);
2345 temp1[c] = 1;
2346 work = 1;
2347 while(work) {
2348 work = 0;
2349 PLOOP(0, i)
2350
2351 /* cc is a nonterminal */
2352 if((cc=prdptr[i][1]-NTBASE) >= 0)
2353 /* cc has a goto on c */
2354 if(temp1[cc] != 0) {
2355
2356 /* thus, the left side of production i does too */
2357 cc = *prdptr[i]-NTBASE;
2358 if(temp1[cc] == 0) {
2359 work = 1;
2360 temp1[cc] = 1;
2361 }
2362 }
2363 }
2364
2365 /* now, we have temp1[c] = 1 if a goto on c in closure of cc */
2366 if(g2debug && foutput != 0) {
2367 Bprint(foutput, "%s: gotos on ", nontrst[c].name);
2368 NTLOOP(i)
2369 if(temp1[i])
2370 Bprint(foutput, "%s ", nontrst[i].name);
2371 Bprint(foutput, "\n");
2372 }
2373
2374 /* now, go through and put gotos into tystate */
2375 aryfil(tystate, nstate, 0);
2376 SLOOP(i)
2377 ITMLOOP(i, p, q)
2378 if((cc = *p->pitem) >= NTBASE)
2379 /* goto on c is possible */
2380 if(temp1[cc-NTBASE]) {
2381 tystate[i] = amem[indgo[i]+c];
2382 break;
2383 }
2384 }
2385
2386 /*
2387 * decide a shift/reduce conflict by precedence.
2388 * r is a rule number, t a token number
2389 * the conflict is in state s
2390 * temp1[t] is changed to reflect the action
2391 */
2392 void
precftn(int r,int t,int s)2393 precftn(int r, int t, int s)
2394 {
2395 int lp, lt, action;
2396
2397 lp = levprd[r];
2398 lt = toklev[t];
2399 if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) {
2400
2401 /* conflict */
2402 if(foutput != 0)
2403 Bprint(foutput,
2404 "\n%d: shift/reduce conflict (shift %d(%d), red'n %d(%d)) on %s",
2405 s, temp1[t], PLEVEL(lt), r, PLEVEL(lp), symnam(t));
2406 zzsrconf++;
2407 return;
2408 }
2409 if(PLEVEL(lt) == PLEVEL(lp))
2410 action = ASSOC(lt);
2411 else
2412 if(PLEVEL(lt) > PLEVEL(lp))
2413 action = RASC; /* shift */
2414 else
2415 action = LASC; /* reduce */
2416 switch(action) {
2417 case BASC: /* error action */
2418 temp1[t] = ERRCODE;
2419 break;
2420 case LASC: /* reduce */
2421 temp1[t] = -r;
2422 break;
2423 }
2424 }
2425
2426 /*
2427 * output state i
2428 * temp1 has the actions, lastred the default
2429 */
2430 void
wract(int i)2431 wract(int i)
2432 {
2433 int p, p0, p1, ntimes, tred, count, j, flag;
2434
2435 /* find the best choice for lastred */
2436 lastred = 0;
2437 ntimes = 0;
2438 TLOOP(j) {
2439 if(temp1[j] >= 0)
2440 continue;
2441 if(temp1[j]+lastred == 0)
2442 continue;
2443 /* count the number of appearances of temp1[j] */
2444 count = 0;
2445 tred = -temp1[j];
2446 levprd[tred] |= REDFLAG;
2447 TLOOP(p)
2448 if(temp1[p]+tred == 0)
2449 count++;
2450 if(count > ntimes) {
2451 lastred = tred;
2452 ntimes = count;
2453 }
2454 }
2455
2456 /*
2457 * for error recovery, arrange that, if there is a shift on the
2458 * error recovery token, `error', that the default be the error action
2459 */
2460 if(temp1[2] > 0)
2461 lastred = 0;
2462
2463 /* clear out entries in temp1 which equal lastred */
2464 TLOOP(p)
2465 if(temp1[p]+lastred == 0)
2466 temp1[p] = 0;
2467
2468 wrstate(i);
2469 defact[i] = lastred;
2470 flag = 0;
2471 TLOOP(p0)
2472 if((p1=temp1[p0]) != 0) {
2473 if(p1 < 0) {
2474 p1 = -p1;
2475 goto exc;
2476 }
2477 if(p1 == ACCEPTCODE) {
2478 p1 = -1;
2479 goto exc;
2480 }
2481 if(p1 == ERRCODE) {
2482 p1 = 0;
2483 exc:
2484 if(flag++ == 0)
2485 Bprint(ftable, "-1, %d,\n", i);
2486 Bprint(ftable, "\t%d, %d,\n", p0, p1);
2487 zzexcp++;
2488 continue;
2489 }
2490 Bprint(ftemp, "%d,%d,", p0, p1);
2491 zzacent++;
2492 }
2493 if(flag) {
2494 defact[i] = -2;
2495 Bprint(ftable, "\t-2, %d,\n", lastred);
2496 }
2497 Bprint(ftemp, "\n");
2498 }
2499
2500 /*
2501 * writes state i
2502 */
2503 void
wrstate(int i)2504 wrstate(int i)
2505 {
2506 int j0, j1;
2507 Item *pp, *qq;
2508 Wset *u;
2509
2510 if(fdebug) {
2511 if(lastred) {
2512 Bprint(fdebug, " 0, /*%d*/\n", i);
2513 } else {
2514 Bprint(fdebug, " \"");
2515 ITMLOOP(i, pp, qq)
2516 Bprint(fdebug, "%s\\n", writem(pp->pitem));
2517 if(tystate[i] == MUSTLOOKAHEAD)
2518 WSLOOP(wsets + (pstate[i+1] - pstate[i]), u)
2519 if(*u->pitem < 0)
2520 Bprint(fdebug, "%s\\n", writem(u->pitem));
2521 Bprint(fdebug, "\", /*%d*/\n", i);
2522 }
2523 }
2524 if(foutput == 0)
2525 return;
2526 Bprint(foutput, "\nstate %d\n", i);
2527 ITMLOOP(i, pp, qq)
2528 Bprint(foutput, "\t%s\n", writem(pp->pitem));
2529 if(tystate[i] == MUSTLOOKAHEAD)
2530 /* print out empty productions in closure */
2531 WSLOOP(wsets+(pstate[i+1]-pstate[i]), u)
2532 if(*u->pitem < 0)
2533 Bprint(foutput, "\t%s\n", writem(u->pitem));
2534
2535 /* check for state equal to another */
2536 TLOOP(j0)
2537 if((j1=temp1[j0]) != 0) {
2538 Bprint(foutput, "\n\t%s ", symnam(j0));
2539 /* shift, error, or accept */
2540 if(j1 > 0) {
2541 if(j1 == ACCEPTCODE)
2542 Bprint(foutput, "accept");
2543 else
2544 if(j1 == ERRCODE)
2545 Bprint(foutput, "error");
2546 else
2547 Bprint(foutput, "shift %d", j1);
2548 } else
2549 Bprint(foutput, "reduce %d (src line %d)", -j1, rlines[-j1]);
2550 }
2551
2552 /* output the final production */
2553 if(lastred)
2554 Bprint(foutput, "\n\t. reduce %d (src line %d)\n\n",
2555 lastred, rlines[lastred]);
2556 else
2557 Bprint(foutput, "\n\t. error\n\n");
2558
2559 /* now, output nonterminal actions */
2560 j1 = ntokens;
2561 for(j0 = 1; j0 <= nnonter; j0++) {
2562 j1++;
2563 if(temp1[j1])
2564 Bprint(foutput, "\t%s goto %d\n", symnam(j0+NTBASE), temp1[j1]);
2565 }
2566 }
2567
2568 void
warray(char * s,int * v,int n)2569 warray(char *s, int *v, int n)
2570 {
2571 int i;
2572
2573 Bprint(ftable, "short %s[] =\n{", s);
2574 for(i=0;;) {
2575 if(i%10 == 0)
2576 Bprint(ftable, "\n");
2577 Bprint(ftable, "%4d", v[i]);
2578 i++;
2579 if(i >= n) {
2580 Bprint(ftable, "\n};\n");
2581 break;
2582 }
2583 Bprint(ftable, ",");
2584 }
2585 }
2586
2587 /*
2588 * in order to free up the mem and amem arrays for the optimizer,
2589 * and still be able to output yyr1, etc., after the sizes of
2590 * the action array is known, we hide the nonterminals
2591 * derived by productions in levprd.
2592 */
2593
2594 void
hideprod(void)2595 hideprod(void)
2596 {
2597 int i, j;
2598
2599 j = 0;
2600 levprd[0] = 0;
2601 PLOOP(1, i) {
2602 if(!(levprd[i] & REDFLAG)) {
2603 j++;
2604 if(foutput != 0)
2605 Bprint(foutput, "Rule not reduced: %s\n", writem(prdptr[i]));
2606 }
2607 levprd[i] = *prdptr[i] - NTBASE;
2608 }
2609 if(j)
2610 print("%d rules never reduced\n", j);
2611 }
2612
2613 void
callopt(void)2614 callopt(void)
2615 {
2616 int i, *p, j, k, *q;
2617
2618 /* read the arrays from tempfile and set parameters */
2619 finput = Bopen(tempname, OREAD);
2620 if(finput == 0)
2621 error("optimizer cannot open tempfile");
2622
2623 pgo[0] = 0;
2624 temp1[0] = 0;
2625 nstate = 0;
2626 nnonter = 0;
2627 for(;;) {
2628 switch(gtnm()) {
2629 case '\n':
2630 nstate++;
2631 pmem--;
2632 temp1[nstate] = pmem - mem0;
2633 case ',':
2634 continue;
2635 case '$':
2636 break;
2637 default:
2638 error("bad tempfile");
2639 }
2640 break;
2641 }
2642
2643 pmem--;
2644 temp1[nstate] = yypgo[0] = pmem - mem0;
2645 for(;;) {
2646 switch(gtnm()) {
2647 case '\n':
2648 nnonter++;
2649 yypgo[nnonter] = pmem-mem0;
2650 case ',':
2651 continue;
2652 case -1:
2653 break;
2654 default:
2655 error("bad tempfile");
2656 }
2657 break;
2658 }
2659 pmem--;
2660 yypgo[nnonter--] = pmem - mem0;
2661 for(i = 0; i < nstate; i++) {
2662 k = 32000;
2663 j = 0;
2664 q = mem0 + temp1[i+1];
2665 for(p = mem0 + temp1[i]; p < q ; p += 2) {
2666 if(*p > j)
2667 j = *p;
2668 if(*p < k)
2669 k = *p;
2670 }
2671 /* nontrivial situation */
2672 if(k <= j) {
2673 /* j is now the range */
2674 /* j -= k; /* call scj */
2675 if(k > maxoff)
2676 maxoff = k;
2677 }
2678 tystate[i] = (temp1[i+1]-temp1[i]) + 2*j;
2679 if(j > maxspr)
2680 maxspr = j;
2681 }
2682
2683 /* initialize ggreed table */
2684 for(i = 1; i <= nnonter; i++) {
2685 ggreed[i] = 1;
2686 j = 0;
2687
2688 /* minimum entry index is always 0 */
2689 q = mem0 + yypgo[i+1] - 1;
2690 for(p = mem0+yypgo[i]; p < q ; p += 2) {
2691 ggreed[i] += 2;
2692 if(*p > j)
2693 j = *p;
2694 }
2695 ggreed[i] = ggreed[i] + 2*j;
2696 if(j > maxoff)
2697 maxoff = j;
2698 }
2699
2700 /* now, prepare to put the shift actions into the amem array */
2701 for(i = 0; i < ACTSIZE; i++)
2702 amem[i] = 0;
2703 maxa = amem;
2704 for(i = 0; i < nstate; i++) {
2705 if(tystate[i] == 0 && adb > 1)
2706 Bprint(ftable, "State %d: null\n", i);
2707 indgo[i] = YYFLAG1;
2708 }
2709 while((i = nxti()) != NOMORE)
2710 if(i >= 0)
2711 stin(i);
2712 else
2713 gin(-i);
2714
2715 /* print amem array */
2716 if(adb > 2 )
2717 for(p = amem; p <= maxa; p += 10) {
2718 Bprint(ftable, "%4d ", (int)(p-amem));
2719 for(i = 0; i < 10; ++i)
2720 Bprint(ftable, "%4d ", p[i]);
2721 Bprint(ftable, "\n");
2722 }
2723
2724 /* write out the output appropriate to the language */
2725 aoutput();
2726 osummary();
2727 ZAPFILE(tempname);
2728 }
2729
2730 void
gin(int i)2731 gin(int i)
2732 {
2733 int *p, *r, *s, *q1, *q2;
2734
2735 /* enter gotos on nonterminal i into array amem */
2736 ggreed[i] = 0;
2737
2738 q2 = mem0+ yypgo[i+1] - 1;
2739 q1 = mem0 + yypgo[i];
2740
2741 /* now, find amem place for it */
2742 for(p = amem; p < &amem[ACTSIZE]; p++) {
2743 if(*p)
2744 continue;
2745 for(r = q1; r < q2; r += 2) {
2746 s = p + *r + 1;
2747 if(*s)
2748 goto nextgp;
2749 if(s > maxa)
2750 if((maxa = s) > &amem[ACTSIZE])
2751 error("a array overflow");
2752 }
2753 /* we have found amem spot */
2754 *p = *q2;
2755 if(p > maxa)
2756 if((maxa = p) > &amem[ACTSIZE])
2757 error("a array overflow");
2758 for(r = q1; r < q2; r += 2) {
2759 s = p + *r + 1;
2760 *s = r[1];
2761 }
2762 pgo[i] = p-amem;
2763 if(adb > 1)
2764 Bprint(ftable, "Nonterminal %d, entry at %d\n", i, pgo[i]);
2765 return;
2766
2767 nextgp:;
2768 }
2769 error("cannot place goto %d\n", i);
2770 }
2771
2772 void
stin(int i)2773 stin(int i)
2774 {
2775 int *r, *s, n, flag, j, *q1, *q2;
2776
2777 tystate[i] = 0;
2778
2779 /* enter state i into the amem array */
2780 q2 = mem0+temp1[i+1];
2781 q1 = mem0+temp1[i];
2782 /* find an acceptable place */
2783 for(n = -maxoff; n < ACTSIZE; n++) {
2784 flag = 0;
2785 for(r = q1; r < q2; r += 2) {
2786 if((s = *r + n + amem) < amem)
2787 goto nextn;
2788 if(*s == 0)
2789 flag++;
2790 else
2791 if(*s != r[1])
2792 goto nextn;
2793 }
2794
2795 /* check that the position equals another only if the states are identical */
2796 for(j=0; j<nstate; j++) {
2797 if(indgo[j] == n) {
2798
2799 /* we have some disagreement */
2800 if(flag)
2801 goto nextn;
2802 if(temp1[j+1]+temp1[i] == temp1[j]+temp1[i+1]) {
2803
2804 /* states are equal */
2805 indgo[i] = n;
2806 if(adb > 1)
2807 Bprint(ftable,
2808 "State %d: entry at %d equals state %d\n",
2809 i, n, j);
2810 return;
2811 }
2812
2813 /* we have some disagreement */
2814 goto nextn;
2815 }
2816 }
2817
2818 for(r = q1; r < q2; r += 2) {
2819 if((s = *r+n+amem) >= &amem[ACTSIZE])
2820 error("out of space in optimizer a array");
2821 if(s > maxa)
2822 maxa = s;
2823 if(*s != 0 && *s != r[1])
2824 error("clobber of a array, pos'n %d, by %d", s-amem, r[1]);
2825 *s = r[1];
2826 }
2827 indgo[i] = n;
2828 if(adb > 1)
2829 Bprint(ftable, "State %d: entry at %d\n", i, indgo[i]);
2830 return;
2831 nextn:;
2832 }
2833 error("Error; failure to place state %d\n", i);
2834 }
2835
2836 /*
2837 * finds the next i
2838 */
2839 int
nxti(void)2840 nxti(void)
2841 {
2842 int i, max, maxi;
2843
2844 max = 0;
2845 maxi = 0;
2846 for(i = 1; i <= nnonter; i++)
2847 if(ggreed[i] >= max) {
2848 max = ggreed[i];
2849 maxi = -i;
2850 }
2851 for(i = 0; i < nstate; ++i)
2852 if(tystate[i] >= max) {
2853 max = tystate[i];
2854 maxi = i;
2855 }
2856 if(nxdb)
2857 Bprint(ftable, "nxti = %d, max = %d\n", maxi, max);
2858 if(max == 0)
2859 return NOMORE;
2860 return maxi;
2861 }
2862
2863 /*
2864 * write summary
2865 */
2866 void
osummary(void)2867 osummary(void)
2868 {
2869
2870 int i, *p;
2871
2872 if(foutput == 0)
2873 return;
2874 i = 0;
2875 for(p = maxa; p >= amem; p--)
2876 if(*p == 0)
2877 i++;
2878
2879 Bprint(foutput, "Optimizer space used: input %d/%d, output %d/%d\n",
2880 (int)(pmem-mem0+1), MEMSIZE, (int)(maxa-amem+1), ACTSIZE);
2881 Bprint(foutput, "%d table entries, %d zero\n", (int)(maxa-amem+1), i);
2882 Bprint(foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff);
2883 }
2884
2885 /*
2886 * this version is for C
2887 * write out the optimized parser
2888 */
2889 void
aoutput(void)2890 aoutput(void)
2891 {
2892 Bprint(ftable, "#define\tYYLAST\t%d\n", (int)(maxa-amem+1));
2893 arout("yyact", amem, (maxa-amem)+1);
2894 arout("yypact", indgo, nstate);
2895 arout("yypgo", pgo, nnonter+1);
2896 }
2897
2898 void
arout(char * s,int * v,int n)2899 arout(char *s, int *v, int n)
2900 {
2901 int i;
2902
2903 Bprint(ftable, "short %s[] =\n{", s);
2904 for(i = 0; i < n;) {
2905 if(i%10 == 0)
2906 Bprint(ftable, "\n");
2907 Bprint(ftable, "%4d", v[i]);
2908 i++;
2909 if(i == n)
2910 Bprint(ftable, "\n};\n");
2911 else
2912 Bprint(ftable, ",");
2913 }
2914 }
2915
2916 /*
2917 * read and convert an integer from the standard input
2918 * return the terminating character
2919 * blanks, tabs, and newlines are ignored
2920 */
2921 int
gtnm(void)2922 gtnm(void)
2923 {
2924 int sign, val, c;
2925
2926 sign = 0;
2927 val = 0;
2928 while((c=Bgetrune(finput)) != Beof) {
2929 if(isdigit(c)) {
2930 val = val*10 + c-'0';
2931 continue;
2932 }
2933 if(c == '-') {
2934 sign = 1;
2935 continue;
2936 }
2937 break;
2938 }
2939 if(sign)
2940 val = -val;
2941 *pmem++ = val;
2942 if(pmem >= &mem0[MEMSIZE])
2943 error("out of space");
2944 return c;
2945 }
2946