1 #include <lib9.h>
2 #include <bio.h>
3 #include <ctype.h>
4
5 #define Bungetrune Bungetc /* ok for now. */
6
7 /*
8 * all these are 32 bit
9 */
10 #define TBITSET ((32+NTERMS)/32) /* BOTCH?? +31 */
11 #define BIT(a,i) ((a)[(i)>>5] & (1<<((i)&037)))
12 #define SETBIT(a,i) ((a)[(i)>>5] |= (1<<((i)&037)))
13 #define NWORDS(n) (((n)+32)/32)
14
15 #ifndef PARSER
16 #define PARSER "yaccpar"
17 #define PARSERS "yaccpars"
18 #endif
19 #define TEMPNAME "y.tmp.XXXXXX"
20 #define ACTNAME "y.acts.XXXXXX"
21 #define OFILE "tab.c"
22 #define FILEU "output"
23 #define FILED "tab.h"
24 #define FILEDEBUG "debug"
25
26 enum
27 {
28 /*
29 * the following are adjustable
30 * according to memory size
31 */
32 ACTSIZE = 30000,
33 MEMSIZE = 30000,
34 NSTATES = 2000,
35 NTERMS = 255,
36 NPROD = 800,
37 NNONTERM = 300,
38 TEMPSIZE = 2000,
39 CNAMSZ = 5000,
40 LSETSIZE = 2400,
41 WSETSIZE = 350,
42
43 NAMESIZE = 50,
44 NTYPES = 63,
45 ISIZE = 400,
46
47 PRIVATE = 0xE000, /* unicode private use */
48
49 /* relationships which must hold:
50 TBITSET ints must hold NTERMS+1 bits...
51 WSETSIZE >= NNONTERM
52 LSETSIZE >= NNONTERM
53 TEMPSIZE >= NTERMS + NNONTERM + 1
54 TEMPSIZE >= NSTATES
55 */
56
57 NTBASE = 010000,
58 ERRCODE = 8190,
59 ACCEPTCODE = 8191,
60
61 NOASC = 0, /* no assoc. */
62 LASC = 1, /* left assoc. */
63 RASC = 2, /* right assoc. */
64 BASC = 3, /* binary assoc. */
65
66 /* flags for state generation */
67
68 DONE = 0,
69 MUSTDO = 1,
70 MUSTLOOKAHEAD = 2,
71
72 /* flags for a rule having an action, and being reduced */
73
74 ACTFLAG = 04,
75 REDFLAG = 010,
76
77 /* output parser flags */
78 YYFLAG1 = -1000,
79
80 /* parse tokens */
81 IDENTIFIER = PRIVATE,
82 MARK,
83 TERM,
84 LEFT,
85 RIGHT,
86 BINARY,
87 PREC,
88 LCURLY,
89 IDENTCOLON,
90 NUMBER,
91 START,
92 TYPEDEF,
93 TYPENAME,
94 UNION,
95
96 ENDFILE = 0,
97
98 EMPTY = 1,
99 WHOKNOWS = 0,
100 OK = 1,
101 NOMORE = -1000,
102 };
103
104 /* macros for getting associativity and precedence levels */
105
106 #define ASSOC(i) ((i)&03)
107 #define PLEVEL(i) (((i)>>4)&077)
108 #define TYPE(i) (((i)>>10)&077)
109
110 /* macros for setting associativity and precedence levels */
111
112 #define SETASC(i,j) i |= j
113 #define SETPLEV(i,j) i |= (j<<4)
114 #define SETTYPE(i,j) i |= (j<<10)
115
116 /* looping macros */
117
118 #define TLOOP(i) for(i=1; i<=ntokens; i++)
119 #define NTLOOP(i) for(i=0; i<=nnonter; i++)
120 #define PLOOP(s,i) for(i=s; i<nprod; i++)
121 #define SLOOP(i) for(i=0; i<nstate; i++)
122 #define WSBUMP(x) x++
123 #define WSLOOP(s,j) for(j=s; j<cwp; j++)
124 #define ITMLOOP(i,p,q) for(q=pstate[i+1], p=pstate[i]; p<q; p++)
125 #define SETLOOP(i) for(i=0; i<tbitset; i++)
126
127 /* command to clobber tempfiles after use */
128
129 #define ZAPFILE(x) if(x) remove(x)
130
131 /* I/O descriptors */
132
133 Biobuf* faction; /* file for saving actions */
134 Biobuf* fdefine; /* file for #defines */
135 Biobuf* fdebug; /* y.debug for strings for debugging */
136 Biobuf* ftable; /* y.tab.c file */
137 Biobuf* ftemp; /* tempfile to pass 2 */
138 Biobuf* finput; /* input file */
139 Biobuf* foutput; /* y.output file */
140
141 /* communication variables between various I/O routines */
142
143 char* infile; /* input file name */
144 int numbval; /* value of an input number */
145 char tokname[NAMESIZE+4]; /* input token name, slop for runes and 0 */
146
147 /* structure declarations */
148
149 typedef
150 struct
151 {
152 int lset[TBITSET];
153 } Lkset;
154
155 typedef
156 struct
157 {
158 int* pitem;
159 Lkset* look;
160 } Item;
161
162 typedef
163 struct
164 {
165 char* name;
166 int value;
167 } Symb;
168
169 typedef
170 struct
171 {
172 int* pitem;
173 int flag;
174 Lkset ws;
175 } Wset;
176
177 /* storage of names */
178
179 char cnames[CNAMSZ]; /* place where token and nonterminal names are stored */
180 int cnamsz = CNAMSZ; /* size of cnames */
181 char* cnamp = cnames; /* place where next name is to be put in */
182 int ndefout = 4; /* number of defined symbols output */
183 char* tempname;
184 char* actname;
185 char ttempname[] = TEMPNAME;
186 char tactname[] = ACTNAME;
187 char* parser = PARSER;
188 char* yydebug;
189 char par[256]; /* full path of parser */
190
191 /* storage of types */
192 int ntypes; /* number of types defined */
193 char* typeset[NTYPES]; /* pointers to type tags */
194
195 /* token information */
196
197 int ntokens = 0 ; /* number of tokens */
198 Symb tokset[NTERMS];
199 int toklev[NTERMS]; /* vector with the precedence of the terminals */
200
201 /* nonterminal information */
202
203 int nnonter = -1; /* the number of nonterminals */
204 Symb nontrst[NNONTERM];
205 int start; /* start symbol */
206
207 /* assigned token type values */
208 int extval = 0;
209
210 char* ytabc = OFILE; /* name of y.tab.c */
211
212 /* grammar rule information */
213
214 int mem0[MEMSIZE] ; /* production storage */
215 int* mem = mem0;
216 int nprod = 1; /* number of productions */
217 int* prdptr[NPROD]; /* pointers to descriptions of productions */
218 int levprd[NPROD]; /* precedence levels for the productions */
219 int rlines[NPROD]; /* line number for this rule */
220
221 /* state information */
222
223 int nstate = 0; /* number of states */
224 Item* pstate[NSTATES+2]; /* pointers to the descriptions of the states */
225 int tystate[NSTATES]; /* contains type information about the states */
226 int defact[NSTATES]; /* the default actions of states */
227 int tstates[NTERMS]; /* states generated by terminal gotos */
228 int ntstates[NNONTERM]; /* states generated by nonterminal gotos */
229 int mstates[NSTATES]; /* chain of overflows of term/nonterm generation lists */
230 int lastred; /* the number of the last reduction of a state */
231
232 /* lookahead set information */
233
234 Lkset lkst[LSETSIZE];
235 int nolook; /* flag to turn off lookahead computations */
236 int tbitset; /* size of lookahead sets */
237 int nlset = 0; /* next lookahead set index */
238 int nolook = 0; /* flag to suppress lookahead computations */
239 Lkset clset; /* temporary storage for lookahead computations */
240
241 /* working set information */
242
243 Wset wsets[WSETSIZE];
244 Wset* cwp;
245
246 /* storage for action table */
247
248 int amem[ACTSIZE]; /* action table storage */
249 int* memp = amem; /* next free action table position */
250 int indgo[NSTATES]; /* index to the stored goto table */
251
252 /* temporary vector, indexable by states, terms, or ntokens */
253
254 int temp1[TEMPSIZE]; /* temporary storage, indexed by terms + ntokens or states */
255 int lineno = 1; /* current input line number */
256 int fatfl = 1; /* if on, error is fatal */
257 int nerrors = 0; /* number of errors */
258
259 /* statistics collection variables */
260
261 int zzgoent;
262 int zzgobest;
263 int zzacent;
264 int zzexcp;
265 int zzclose;
266 int zzrrconf;
267 int zzsrconf;
268
269 int* ggreed = lkst[0].lset;
270 int* pgo = wsets[0].ws.lset;
271 int* yypgo = &nontrst[0].value;
272
273 int maxspr = 0; /* maximum spread of any entry */
274 int maxoff = 0; /* maximum offset into a array */
275 int* pmem = mem0;
276 int* maxa;
277 int nxdb = 0;
278 int adb = 0;
279
280
281 /* storage for information about the nonterminals */
282
283 int** pres[NNONTERM+2]; /* vector of pointers to productions yielding each nonterminal */
284 Lkset* pfirst[NNONTERM+2]; /* vector of pointers to first sets for each nonterminal */
285 int pempty[NNONTERM+1]; /* vector of nonterminals nontrivially deriving e */
286
287 /* random stuff picked out from between functions */
288
289 int indebug = 0;
290 Wset* zzcwp = wsets;
291 int zzgoent = 0;
292 int zzgobest = 0;
293 int zzacent = 0;
294 int zzexcp = 0;
295 int zzclose = 0;
296 int zzsrconf = 0;
297 int* zzmemsz = mem0;
298 int zzrrconf = 0;
299 int pidebug = 0; /* debugging flag for putitem */
300 int gsdebug = 0;
301 int cldebug = 0; /* debugging flag for closure */
302 int pkdebug = 0;
303 int g2debug = 0;
304
305 struct
306 {
307 char* name;
308 long value;
309 } resrv[] =
310 {
311 "binary", BINARY,
312 "left", LEFT,
313 "nonassoc", BINARY,
314 "prec", PREC,
315 "right", RIGHT,
316 "start", START,
317 "term", TERM,
318 "token", TERM,
319 "type", TYPEDEF,
320 "union", UNION,
321 0,
322 };
323
324 /* define functions */
325
326 void main(int, char**);
327 void others(void);
328 char* chcopy(char*, char*);
329 char* writem(int*);
330 char* symnam(int);
331 void summary(void);
332 void error(char*, ...);
333 void aryfil(int*, int, int);
334 int setunion(int*, int*);
335 void prlook(Lkset*);
336 void cpres(void);
337 void cpfir(void);
338 int state(int);
339 void putitem(int*, Lkset*);
340 void cempty(void);
341 void stagen(void);
342 void closure(int);
343 Lkset* flset(Lkset*);
344 void cleantmp(void);
345 void intr(void);
346 void setup(int, char**);
347 void finact(void);
348 int defin(int, char*);
349 void defout(int);
350 char* cstash(char*);
351 long gettok(void);
352 int fdtype(int);
353 int chfind(int, char*);
354 void cpyunion(void);
355 void cpycode(void);
356 int skipcom(void);
357 void cpyact(int);
358 void openup(char*, int, int, int, char*);
359 void output(void);
360 int apack(int*, int);
361 void go2out(void);
362 void go2gen(int);
363 void precftn(int, int, int);
364 void wract(int);
365 void wrstate(int);
366 void warray(char*, int*, int);
367 void hideprod(void);
368 void callopt(void);
369 void gin(int);
370 void stin(int);
371 int nxti(void);
372 void osummary(void);
373 void aoutput(void);
374 void arout(char*, int*, int);
375 int gtnm(void);
376
377 void
main(int argc,char * argv[])378 main(int argc, char *argv[])
379 {
380
381 setup(argc, argv); /* initialize and read productions */
382 tbitset = NWORDS(ntokens);
383 cpres(); /* make table of which productions yield a given nonterminal */
384 cempty(); /* make a table of which nonterminals can match the empty string */
385 cpfir(); /* make a table of firsts of nonterminals */
386 stagen(); /* generate the states */
387 output(); /* write the states and the tables */
388 go2out();
389 hideprod();
390 summary();
391 callopt();
392 others();
393 exits(0);
394 }
395
396 /*
397 * put out other arrays, copy the parsers
398 */
399 void
others(void)400 others(void)
401 {
402 int c, i, j;
403
404 finput = Bopen(parser, OREAD);
405 if(finput == 0)
406 error("cannot find parser %s", parser);
407 warray("yyr1", levprd, nprod);
408 aryfil(temp1, nprod, 0);
409 PLOOP(1, i)
410 temp1[i] = prdptr[i+1]-prdptr[i]-2;
411 warray("yyr2", temp1, nprod);
412
413 aryfil(temp1, nstate, -1000);
414 TLOOP(i)
415 for(j=tstates[i]; j!=0; j=mstates[j])
416 temp1[j] = i;
417 NTLOOP(i)
418 for(j=ntstates[i]; j!=0; j=mstates[j])
419 temp1[j] = -i;
420 warray("yychk", temp1, nstate);
421 warray("yydef", defact, nstate);
422
423 /* put out token translation tables */
424 /* table 1 has 0-256 */
425 aryfil(temp1, 256, 0);
426 c = 0;
427 TLOOP(i) {
428 j = tokset[i].value;
429 if(j >= 0 && j < 256) {
430 if(temp1[j]) {
431 print("yacc bug -- cant have 2 different Ts with same value\n");
432 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
433 nerrors++;
434 }
435 temp1[j] = i;
436 if(j > c)
437 c = j;
438 }
439 }
440 warray("yytok1", temp1, c+1);
441
442 /* table 2 has PRIVATE-PRIVATE+256 */
443 aryfil(temp1, 256, 0);
444 c = 0;
445 TLOOP(i) {
446 j = tokset[i].value - PRIVATE;
447 if(j >= 0 && j < 256) {
448 if(temp1[j]) {
449 print("yacc bug -- cant have 2 different Ts with same value\n");
450 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
451 nerrors++;
452 }
453 temp1[j] = i;
454 if(j > c)
455 c = j;
456 }
457 }
458 warray("yytok2", temp1, c+1);
459
460 /* table 3 has everything else */
461 Bprint(ftable, "long yytok3[] =\n{\n");
462 c = 0;
463 TLOOP(i) {
464 j = tokset[i].value;
465 if(j >= 0 && j < 256)
466 continue;
467 if(j >= PRIVATE && j < 256+PRIVATE)
468 continue;
469
470 Bprint(ftable, "%4d,%4d,", j, i);
471 c++;
472 if(c%5 == 0)
473 Bprint(ftable, "\n");
474 }
475 Bprint(ftable, "%4d\n};\n", 0);
476
477 /* copy parser text */
478 while((c=Bgetrune(finput)) != Beof) {
479 if(c == '$') {
480 if((c = Bgetrune(finput)) != 'A')
481 Bputrune(ftable, '$');
482 else { /* copy actions */
483 faction = Bopen(actname, OREAD);
484 if(faction == 0)
485 error("cannot reopen action tempfile");
486 while((c=Bgetrune(faction)) != Beof)
487 Bputrune(ftable, c);
488 Bterm(faction);
489 ZAPFILE(actname);
490 c = Bgetrune(finput);
491 }
492 }
493 Bputrune(ftable, c);
494 }
495 Bterm(ftable);
496 }
497
498 /*
499 * copies string q into p, returning next free char ptr
500 */
501 char*
chcopy(char * p,char * q)502 chcopy(char* p, char* q)
503 {
504 int c;
505
506 while(c = *q) {
507 if(c == '"')
508 *p++ = '\\';
509 *p++ = c;
510 q++;
511 }
512 *p = 0;
513 return p;
514 }
515
516 /*
517 * creates output string for item pointed to by pp
518 */
519 char*
writem(int * pp)520 writem(int *pp)
521 {
522 int i,*p;
523 static char sarr[ISIZE];
524 char* q;
525
526 for(p=pp; *p>0; p++)
527 ;
528 p = prdptr[-*p];
529 q = chcopy(sarr, nontrst[*p-NTBASE].name);
530 q = chcopy(q, ": ");
531 for(;;) {
532 *q = ' ';
533 p++;
534 if(p == pp)
535 *q = '.';
536 q++;
537 *q = '\0';
538 i = *p;
539 if(i <= 0)
540 break;
541 q = chcopy(q, symnam(i));
542 if(q > &sarr[ISIZE-30])
543 error("item too big");
544 }
545
546 /* an item calling for a reduction */
547 i = *pp;
548 if(i < 0 ) {
549 q = chcopy(q, " (");
550 sprint(q, "%d)", -i);
551 }
552 return sarr;
553 }
554
555 /*
556 * return a pointer to the name of symbol i
557 */
558 char*
symnam(int i)559 symnam(int i)
560 {
561 char* cp;
562
563 cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
564 if(*cp == ' ')
565 cp++;
566 return cp;
567 }
568
569 /*
570 * output the summary on y.output
571 */
572 void
summary(void)573 summary(void)
574 {
575
576 if(foutput != 0) {
577 Bprint(foutput, "\n%d/%d terminals, %d/%d nonterminals\n",
578 ntokens, NTERMS, nnonter, NNONTERM);
579 Bprint(foutput, "%d/%d grammar rules, %d/%d states\n",
580 nprod, NPROD, nstate, NSTATES);
581 Bprint(foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n",
582 zzsrconf, zzrrconf);
583 Bprint(foutput, "%d/%d working sets used\n",
584 (int)(zzcwp-wsets), WSETSIZE);
585 Bprint(foutput, "memory: states,etc. %d/%d, parser %d/%d\n",
586 (int)(zzmemsz-mem0), MEMSIZE, (int)(memp-amem), ACTSIZE);
587 Bprint(foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE);
588 Bprint(foutput, "%d extra closures\n", zzclose - 2*nstate);
589 Bprint(foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp);
590 Bprint(foutput, "%d goto entries\n", zzgoent);
591 Bprint(foutput, "%d entries saved by goto default\n", zzgobest);
592 }
593 if(zzsrconf != 0 || zzrrconf != 0) {
594 print("\nconflicts: ");
595 if(zzsrconf)
596 print("%d shift/reduce", zzsrconf);
597 if(zzsrconf && zzrrconf)
598 print(", ");
599 if(zzrrconf)
600 print("%d reduce/reduce", zzrrconf);
601 print("\n");
602 }
603 if(ftemp != 0) {
604 Bterm(ftemp);
605 ftemp = 0;
606 }
607 if(fdefine != 0) {
608 Bterm(fdefine);
609 fdefine = 0;
610 }
611 }
612
613 /*
614 * write out error comment -- NEEDS WORK
615 */
616 void
error(char * s,...)617 error(char *s, ...)
618 {
619
620 nerrors++;
621 fprint(2, "\n fatal error:");
622 fprint(2, s, (&s)[1]);
623 fprint(2, ", %s:%d\n", infile, lineno);
624 if(!fatfl)
625 return;
626 summary();
627 cleantmp();
628 exits("error");
629 }
630
631 /*
632 * set elements 0 through n-1 to c
633 */
634 void
aryfil(int * v,int n,int c)635 aryfil(int *v, int n, int c)
636 {
637 int i;
638
639 for(i=0; i<n; i++)
640 v[i] = c;
641 }
642
643 /*
644 * set a to the union of a and b
645 * return 1 if b is not a subset of a, 0 otherwise
646 */
647 int
setunion(int * a,int * b)648 setunion(int *a, int *b)
649 {
650 int i, x, sub;
651
652 sub = 0;
653 SETLOOP(i) {
654 x = *a;
655 *a |= *b;
656 if(*a != x)
657 sub = 1;
658 a++;
659 b++;
660 }
661 return sub;
662 }
663
664 void
prlook(Lkset * p)665 prlook(Lkset* p)
666 {
667 int j, *pp;
668
669 pp = p->lset;
670 if(pp == 0)
671 Bprint(foutput, "\tNULL");
672 else {
673 Bprint(foutput, " { ");
674 TLOOP(j)
675 if(BIT(pp,j))
676 Bprint(foutput, "%s ", symnam(j));
677 Bprint(foutput, "}");
678 }
679 }
680
681 /*
682 * compute an array with the beginnings of productions yielding given nonterminals
683 * The array pres points to these lists
684 * the array pyield has the lists: the total size is only NPROD+1
685 */
686 void
cpres(void)687 cpres(void)
688 {
689 int c, j, i, **pmem;
690 static int *pyield[NPROD];
691
692 pmem = pyield;
693 NTLOOP(i) {
694 c = i+NTBASE;
695 pres[i] = pmem;
696 fatfl = 0; /* make undefined symbols nonfatal */
697 PLOOP(0, j)
698 if(*prdptr[j] == c)
699 *pmem++ = prdptr[j]+1;
700 if(pres[i] == pmem)
701 error("nonterminal %s not defined!", nontrst[i].name);
702 }
703 pres[i] = pmem;
704 fatfl = 1;
705 if(nerrors) {
706 summary();
707 cleantmp();
708 exits("error");
709 }
710 if(pmem != &pyield[nprod])
711 error("internal Yacc error: pyield %d", pmem-&pyield[nprod]);
712 }
713
714 /*
715 * compute an array with the first of nonterminals
716 */
717 void
cpfir(void)718 cpfir(void)
719 {
720 int *p, **s, i, **t, ch, changes;
721
722 zzcwp = &wsets[nnonter];
723 NTLOOP(i) {
724 aryfil(wsets[i].ws.lset, tbitset, 0);
725 t = pres[i+1];
726 /* initially fill the sets */
727 for(s=pres[i]; s<t; ++s)
728 for(p = *s; (ch = *p) > 0; ++p) {
729 if(ch < NTBASE) {
730 SETBIT(wsets[i].ws.lset, ch);
731 break;
732 }
733 if(!pempty[ch-NTBASE])
734 break;
735 }
736 }
737
738 /* now, reflect transitivity */
739 changes = 1;
740 while(changes) {
741 changes = 0;
742 NTLOOP(i) {
743 t = pres[i+1];
744 for(s = pres[i]; s < t; ++s)
745 for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) {
746 changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset);
747 if(!pempty[ch])
748 break;
749 }
750 }
751 }
752
753 NTLOOP(i)
754 pfirst[i] = flset(&wsets[i].ws);
755 if(!indebug)
756 return;
757 if(foutput != 0)
758 NTLOOP(i) {
759 Bprint(foutput, "\n%s: ", nontrst[i].name);
760 prlook(pfirst[i]);
761 Bprint(foutput, " %d\n", pempty[i]);
762 }
763 }
764
765 /*
766 * sorts last state,and sees if it equals earlier ones. returns state number
767 */
768 int
state(int c)769 state(int c)
770 {
771 Item *p1, *p2, *k, *l, *q1, *q2;
772 int size1, size2, i;
773
774 p1 = pstate[nstate];
775 p2 = pstate[nstate+1];
776 if(p1 == p2)
777 return 0; /* null state */
778 /* sort the items */
779 for(k = p2-1; k > p1; k--) /* make k the biggest */
780 for(l = k-1; l >= p1; --l)
781 if(l->pitem > k->pitem) {
782 int *s;
783 Lkset *ss;
784
785 s = k->pitem;
786 k->pitem = l->pitem;
787 l->pitem = s;
788 ss = k->look;
789 k->look = l->look;
790 l->look = ss;
791 }
792 size1 = p2 - p1; /* size of state */
793
794 for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) {
795 /* get ith state */
796 q1 = pstate[i];
797 q2 = pstate[i+1];
798 size2 = q2 - q1;
799 if(size1 != size2)
800 continue;
801 k = p1;
802 for(l = q1; l < q2; l++) {
803 if(l->pitem != k->pitem)
804 break;
805 k++;
806 }
807 if(l != q2)
808 continue;
809 /* found it */
810 pstate[nstate+1] = pstate[nstate]; /* delete last state */
811 /* fix up lookaheads */
812 if(nolook)
813 return i;
814 for(l = q1, k = p1; l < q2; ++l, ++k ) {
815 int s;
816
817 SETLOOP(s)
818 clset.lset[s] = l->look->lset[s];
819 if(setunion(clset.lset, k->look->lset)) {
820 tystate[i] = MUSTDO;
821 /* register the new set */
822 l->look = flset( &clset );
823 }
824 }
825 return i;
826 }
827 /* state is new */
828 if(nolook)
829 error("yacc state/nolook error");
830 pstate[nstate+2] = p2;
831 if(nstate+1 >= NSTATES)
832 error("too many states");
833 if(c >= NTBASE) {
834 mstates[nstate] = ntstates[c-NTBASE];
835 ntstates[c-NTBASE] = nstate;
836 } else {
837 mstates[nstate] = tstates[c];
838 tstates[c] = nstate;
839 }
840 tystate[nstate] = MUSTDO;
841 return nstate++;
842 }
843
844 void
putitem(int * ptr,Lkset * lptr)845 putitem(int *ptr, Lkset *lptr)
846 {
847 Item *j;
848
849 if(pidebug && foutput != 0)
850 Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate);
851 j = pstate[nstate+1];
852 j->pitem = ptr;
853 if(!nolook)
854 j->look = flset(lptr);
855 pstate[nstate+1] = ++j;
856 if((int*)j > zzmemsz) {
857 zzmemsz = (int*)j;
858 if(zzmemsz >= &mem0[MEMSIZE])
859 error("out of state space");
860 }
861 }
862
863 /*
864 * mark nonterminals which derive the empty string
865 * also, look for nonterminals which don't derive any token strings
866 */
867 void
cempty(void)868 cempty(void)
869 {
870
871 int i, *p;
872
873 /* first, use the array pempty to detect productions that can never be reduced */
874 /* set pempty to WHONOWS */
875 aryfil(pempty, nnonter+1, WHOKNOWS);
876
877 /* now, look at productions, marking nonterminals which derive something */
878 more:
879 PLOOP(0, i) {
880 if(pempty[*prdptr[i] - NTBASE])
881 continue;
882 for(p = prdptr[i]+1; *p >= 0; ++p)
883 if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS)
884 break;
885 /* production can be derived */
886 if(*p < 0) {
887 pempty[*prdptr[i]-NTBASE] = OK;
888 goto more;
889 }
890 }
891
892 /* now, look at the nonterminals, to see if they are all OK */
893 NTLOOP(i) {
894 /* the added production rises or falls as the start symbol ... */
895 if(i == 0)
896 continue;
897 if(pempty[i] != OK) {
898 fatfl = 0;
899 error("nonterminal %s never derives any token string", nontrst[i].name);
900 }
901 }
902
903 if(nerrors) {
904 summary();
905 cleantmp();
906 exits("error");
907 }
908
909 /* now, compute the pempty array, to see which nonterminals derive the empty string */
910 /* set pempty to WHOKNOWS */
911 aryfil( pempty, nnonter+1, WHOKNOWS);
912
913 /* loop as long as we keep finding empty nonterminals */
914
915 again:
916 PLOOP(1, i) {
917 /* not known to be empty */
918 if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) {
919 for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p)
920 ;
921 /* we have a nontrivially empty nonterminal */
922 if(*p < 0) {
923 pempty[*prdptr[i]-NTBASE] = EMPTY;
924 /* got one ... try for another */
925 goto again;
926 }
927 }
928 }
929 }
930
931 /*
932 * generate the states
933 */
934 void
stagen(void)935 stagen(void)
936 {
937
938 int c, i, j, more;
939 Wset *p, *q;
940
941 /* initialize */
942 nstate = 0;
943
944 /* THIS IS FUNNY from the standpoint of portability
945 * it represents the magic moment when the mem0 array, which has
946 * been holding the productions, starts to hold item pointers, of a
947 * different type...
948 * someday, alloc should be used to allocate all this stuff... for now, we
949 * accept that if pointers don't fit in integers, there is a problem...
950 */
951
952 pstate[0] = pstate[1] = (Item*)mem;
953 aryfil(clset.lset, tbitset, 0);
954 putitem(prdptr[0]+1, &clset);
955 tystate[0] = MUSTDO;
956 nstate = 1;
957 pstate[2] = pstate[1];
958
959 aryfil(amem, ACTSIZE, 0);
960
961 /* now, the main state generation loop */
962 for(more=1; more;) {
963 more = 0;
964 SLOOP(i) {
965 if(tystate[i] != MUSTDO)
966 continue;
967 tystate[i] = DONE;
968 aryfil(temp1, nnonter+1, 0);
969 /* take state i, close it, and do gotos */
970 closure(i);
971 /* generate goto's */
972 WSLOOP(wsets, p) {
973 if(p->flag)
974 continue;
975 p->flag = 1;
976 c = *(p->pitem);
977 if(c <= 1) {
978 if(pstate[i+1]-pstate[i] <= p-wsets)
979 tystate[i] = MUSTLOOKAHEAD;
980 continue;
981 }
982 /* do a goto on c */
983 WSLOOP(p, q)
984 /* this item contributes to the goto */
985 if(c == *(q->pitem)) {
986 putitem(q->pitem+1, &q->ws);
987 q->flag = 1;
988 }
989 if(c < NTBASE)
990 state(c); /* register new state */
991 else
992 temp1[c-NTBASE] = state(c);
993 }
994 if(gsdebug && foutput != 0) {
995 Bprint(foutput, "%d: ", i);
996 NTLOOP(j)
997 if(temp1[j])
998 Bprint(foutput, "%s %d, ",
999 nontrst[j].name, temp1[j]);
1000 Bprint(foutput, "\n");
1001 }
1002 indgo[i] = apack(&temp1[1], nnonter-1) - 1;
1003 /* do some more */
1004 more = 1;
1005 }
1006 }
1007 }
1008
1009 /*
1010 * generate the closure of state i
1011 */
1012 void
closure(int i)1013 closure(int i)
1014 {
1015
1016 Wset *u, *v;
1017 Item *p, *q;
1018 int c, ch, work, k, *pi, **s, **t;
1019
1020 zzclose++;
1021
1022 /* first, copy kernel of state i to wsets */
1023 cwp = wsets;
1024 ITMLOOP(i, p, q) {
1025 cwp->pitem = p->pitem;
1026 cwp->flag = 1; /* this item must get closed */
1027 SETLOOP(k)
1028 cwp->ws.lset[k] = p->look->lset[k];
1029 WSBUMP(cwp);
1030 }
1031
1032 /* now, go through the loop, closing each item */
1033 work = 1;
1034 while(work) {
1035 work = 0;
1036 WSLOOP(wsets, u) {
1037 if(u->flag == 0)
1038 continue;
1039 /* dot is before c */
1040 c = *(u->pitem);
1041 if(c < NTBASE) {
1042 u->flag = 0;
1043 /* only interesting case is where . is before nonterminal */
1044 continue;
1045 }
1046
1047 /* compute the lookahead */
1048 aryfil(clset.lset, tbitset, 0);
1049
1050 /* find items involving c */
1051 WSLOOP(u, v)
1052 if(v->flag == 1 && *(pi=v->pitem) == c) {
1053 v->flag = 0;
1054 if(nolook)
1055 continue;
1056 while((ch = *++pi) > 0) {
1057 /* terminal symbol */
1058 if(ch < NTBASE) {
1059 SETBIT(clset.lset, ch);
1060 break;
1061 }
1062 /* nonterminal symbol */
1063 setunion(clset.lset, pfirst[ch-NTBASE]->lset);
1064 if(!pempty[ch-NTBASE])
1065 break;
1066 }
1067 if(ch <= 0)
1068 setunion(clset.lset, v->ws.lset);
1069 }
1070
1071 /*
1072 * now loop over productions derived from c
1073 * c is now nonterminal number
1074 */
1075 c -= NTBASE;
1076 t = pres[c+1];
1077 for(s = pres[c]; s < t; ++s) {
1078 /*
1079 * put these items into the closure
1080 * is the item there
1081 */
1082 WSLOOP(wsets, v)
1083 /* yes, it is there */
1084 if(v->pitem == *s) {
1085 if(nolook)
1086 goto nexts;
1087 if(setunion(v->ws.lset, clset.lset))
1088 v->flag = work = 1;
1089 goto nexts;
1090 }
1091
1092 /* not there; make a new entry */
1093 if(cwp-wsets+1 >= WSETSIZE)
1094 error( "working set overflow");
1095 cwp->pitem = *s;
1096 cwp->flag = 1;
1097 if(!nolook) {
1098 work = 1;
1099 SETLOOP(k) cwp->ws.lset[k] = clset.lset[k];
1100 }
1101 WSBUMP(cwp);
1102
1103 nexts:;
1104 }
1105 }
1106 }
1107
1108 /* have computed closure; flags are reset; return */
1109 if(cwp > zzcwp)
1110 zzcwp = cwp;
1111 if(cldebug && foutput != 0) {
1112 Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook);
1113 WSLOOP(wsets, u) {
1114 if(u->flag)
1115 Bprint(foutput, "flag set!\n");
1116 u->flag = 0;
1117 Bprint(foutput, "\t%s", writem(u->pitem));
1118 prlook(&u->ws);
1119 Bprint(foutput, "\n");
1120 }
1121 }
1122 }
1123
1124 /*
1125 * decide if the lookahead set pointed to by p is known
1126 * return pointer to a perminent location for the set
1127 */
1128 Lkset*
flset(Lkset * p)1129 flset(Lkset *p)
1130 {
1131 Lkset *q;
1132 int *u, *v, *w, j;
1133
1134 for(q = &lkst[nlset]; q-- > lkst;) {
1135 u = p->lset;
1136 v = q->lset;
1137 w = &v[tbitset];
1138 while(v < w)
1139 if(*u++ != *v++)
1140 goto more;
1141 /* we have matched */
1142 return q;
1143 more:;
1144 }
1145 /* add a new one */
1146 q = &lkst[nlset++];
1147 if(nlset >= LSETSIZE)
1148 error("too many lookahead sets");
1149 SETLOOP(j)
1150 q->lset[j] = p->lset[j];
1151 return q;
1152 }
1153
1154 void
cleantmp(void)1155 cleantmp(void)
1156 {
1157 ZAPFILE(actname);
1158 ZAPFILE(tempname);
1159 }
1160
1161 void
intr(void)1162 intr(void)
1163 {
1164 cleantmp();
1165 exits("interrupted");
1166 }
1167
1168 void
setup(int argc,char * argv[])1169 setup(int argc, char *argv[])
1170 {
1171 long c, t;
1172 int i, j, lev, ty, ytab, *p;
1173 int vflag, dflag, stem;
1174 char actnm[8], *stemc, *cp;
1175
1176 ytab = 0;
1177 vflag = 0;
1178 dflag = 0;
1179 stem = 0;
1180 stemc = "y";
1181 foutput = 0;
1182 fdefine = 0;
1183 fdebug = 0;
1184 ARGBEGIN{
1185 case 'v':
1186 case 'V':
1187 vflag++;
1188 break;
1189 case 'D':
1190 yydebug = ARGF();
1191 break;
1192 case 'd':
1193 dflag++;
1194 break;
1195 case 'o':
1196 ytab++;
1197 ytabc = ARGF();
1198 break;
1199 case 's':
1200 stem++;
1201 stemc = ARGF();
1202 break;
1203 case 'S':
1204 parser = PARSERS;
1205 break;
1206 default:
1207 error("illegal option: %c", ARGC());
1208 }ARGEND
1209
1210 cp = getenv("ROOT");
1211 if(cp == 0)
1212 cp = ROOT;
1213 snprint(par, sizeof(par), "%s/utils/lib/%s", cp, parser);
1214 parser = par;
1215
1216 openup(stemc, dflag, vflag, ytab, ytabc);
1217
1218 ftemp = Bopen(tempname = mktemp(ttempname), OWRITE);
1219 faction = Bopen(actname = mktemp(tactname), OWRITE);
1220 if(ftemp == 0 || faction == 0)
1221 error("cannot open temp file");
1222 if(argc < 1)
1223 error("no input file");
1224 infile = argv[0];
1225 finput = Bopen(infile, OREAD);
1226 if(finput == 0)
1227 error("cannot open '%s'", argv[0]);
1228 cnamp = cnames;
1229
1230 defin(0, "$end");
1231 extval = PRIVATE; /* tokens start in unicode 'private use' */
1232 defin(0, "error");
1233 defin(1, "$accept");
1234 defin(0, "$unk");
1235 mem = mem0;
1236 i = 0;
1237
1238 for(t = gettok(); t != MARK && t != ENDFILE;)
1239 switch(t) {
1240 case ';':
1241 t = gettok();
1242 break;
1243
1244 case START:
1245 if(gettok() != IDENTIFIER)
1246 error("bad %%start construction");
1247 start = chfind(1, tokname);
1248 t = gettok();
1249 continue;
1250
1251 case TYPEDEF:
1252 if(gettok() != TYPENAME)
1253 error("bad syntax in %%type");
1254 ty = numbval;
1255 for(;;) {
1256 t = gettok();
1257 switch(t) {
1258 case IDENTIFIER:
1259 if((t=chfind(1, tokname)) < NTBASE) {
1260 j = TYPE(toklev[t]);
1261 if(j != 0 && j != ty)
1262 error("type redeclaration of token %s",
1263 tokset[t].name);
1264 else
1265 SETTYPE(toklev[t], ty);
1266 } else {
1267 j = nontrst[t-NTBASE].value;
1268 if(j != 0 && j != ty)
1269 error("type redeclaration of nonterminal %s",
1270 nontrst[t-NTBASE].name );
1271 else
1272 nontrst[t-NTBASE].value = ty;
1273 }
1274 case ',':
1275 continue;
1276 case ';':
1277 t = gettok();
1278 default:
1279 break;
1280 }
1281 break;
1282 }
1283 continue;
1284
1285 case UNION:
1286 /* copy the union declaration to the output */
1287 cpyunion();
1288 t = gettok();
1289 continue;
1290
1291 case LEFT:
1292 case BINARY:
1293 case RIGHT:
1294 i++;
1295
1296 case TERM:
1297 /* nonzero means new prec. and assoc. */
1298 lev = t-TERM;
1299 ty = 0;
1300
1301 /* get identifiers so defined */
1302 t = gettok();
1303
1304 /* there is a type defined */
1305 if(t == TYPENAME) {
1306 ty = numbval;
1307 t = gettok();
1308 }
1309 for(;;) {
1310 switch(t) {
1311 case ',':
1312 t = gettok();
1313 continue;
1314
1315 case ';':
1316 break;
1317
1318 case IDENTIFIER:
1319 j = chfind(0, tokname);
1320 if(j >= NTBASE)
1321 error("%s defined earlier as nonterminal", tokname);
1322 if(lev) {
1323 if(ASSOC(toklev[j]))
1324 error("redeclaration of precedence of %s", tokname);
1325 SETASC(toklev[j], lev);
1326 SETPLEV(toklev[j], i);
1327 }
1328 if(ty) {
1329 if(TYPE(toklev[j]))
1330 error("redeclaration of type of %s", tokname);
1331 SETTYPE(toklev[j],ty);
1332 }
1333 t = gettok();
1334 if(t == NUMBER) {
1335 tokset[j].value = numbval;
1336 if(j < ndefout && j > 3)
1337 error("please define type number of %s earlier",
1338 tokset[j].name);
1339 t = gettok();
1340 }
1341 continue;
1342 }
1343 break;
1344 }
1345 continue;
1346
1347 case LCURLY:
1348 defout(0);
1349 cpycode();
1350 t = gettok();
1351 continue;
1352
1353 default:
1354 error("syntax error");
1355 }
1356 if(t == ENDFILE)
1357 error("unexpected EOF before %%");
1358
1359 /* t is MARK */
1360 Bprint(ftable, "extern int yyerrflag;\n");
1361 Bprint(ftable, "#ifndef YYMAXDEPTH\n");
1362 Bprint(ftable, "#define YYMAXDEPTH 150\n");
1363 Bprint(ftable, "#endif\n" );
1364 if(!ntypes) {
1365 Bprint(ftable, "#ifndef YYSTYPE\n");
1366 Bprint(ftable, "#define YYSTYPE int\n");
1367 Bprint(ftable, "#endif\n");
1368 }
1369 Bprint(ftable, "YYSTYPE yylval;\n");
1370 Bprint(ftable, "YYSTYPE yyval;\n");
1371
1372 prdptr[0] = mem;
1373
1374 /* added production */
1375 *mem++ = NTBASE;
1376
1377 /* if start is 0, we will overwrite with the lhs of the first rule */
1378 *mem++ = start;
1379 *mem++ = 1;
1380 *mem++ = 0;
1381 prdptr[1] = mem;
1382 while((t=gettok()) == LCURLY)
1383 cpycode();
1384 if(t != IDENTCOLON)
1385 error("bad syntax on first rule");
1386
1387 if(!start)
1388 prdptr[0][1] = chfind(1, tokname);
1389
1390 /* read rules */
1391 while(t != MARK && t != ENDFILE) {
1392 /* process a rule */
1393 rlines[nprod] = lineno;
1394 if(t == '|')
1395 *mem++ = *prdptr[nprod-1];
1396 else
1397 if(t == IDENTCOLON) {
1398 *mem = chfind(1, tokname);
1399 if(*mem < NTBASE)
1400 error("token illegal on LHS of grammar rule");
1401 mem++;
1402 } else
1403 error("illegal rule: missing semicolon or | ?");
1404 /* read rule body */
1405 t = gettok();
1406
1407 more_rule:
1408 while(t == IDENTIFIER) {
1409 *mem = chfind(1, tokname);
1410 if(*mem < NTBASE)
1411 levprd[nprod] = toklev[*mem];
1412 mem++;
1413 t = gettok();
1414 }
1415 if(t == PREC) {
1416 if(gettok() != IDENTIFIER)
1417 error("illegal %%prec syntax");
1418 j = chfind(2, tokname);
1419 if(j >= NTBASE)
1420 error("nonterminal %s illegal after %%prec",
1421 nontrst[j-NTBASE].name);
1422 levprd[nprod] = toklev[j];
1423 t = gettok();
1424 }
1425 if(t == '=') {
1426 levprd[nprod] |= ACTFLAG;
1427 Bprint(faction, "\ncase %d:", nprod);
1428 cpyact(mem-prdptr[nprod]-1);
1429 Bprint(faction, " break;");
1430 if((t=gettok()) == IDENTIFIER) {
1431
1432 /* action within rule... */
1433 sprint(actnm, "$$%d", nprod);
1434
1435 /* make it a nonterminal */
1436 j = chfind(1, actnm);
1437
1438 /*
1439 * the current rule will become rule number nprod+1
1440 * move the contents down, and make room for the null
1441 */
1442 for(p = mem; p >= prdptr[nprod]; --p)
1443 p[2] = *p;
1444 mem += 2;
1445
1446 /* enter null production for action */
1447 p = prdptr[nprod];
1448 *p++ = j;
1449 *p++ = -nprod;
1450
1451 /* update the production information */
1452 levprd[nprod+1] = levprd[nprod] & ~ACTFLAG;
1453 levprd[nprod] = ACTFLAG;
1454 if(++nprod >= NPROD)
1455 error("more than %d rules", NPROD);
1456 prdptr[nprod] = p;
1457
1458 /* make the action appear in the original rule */
1459 *mem++ = j;
1460
1461 /* get some more of the rule */
1462 goto more_rule;
1463 }
1464 }
1465
1466 while(t == ';')
1467 t = gettok();
1468 *mem++ = -nprod;
1469
1470 /* check that default action is reasonable */
1471 if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].value) {
1472
1473 /* no explicit action, LHS has value */
1474 int tempty;
1475
1476 tempty = prdptr[nprod][1];
1477 if(tempty < 0)
1478 error("must return a value, since LHS has a type");
1479 else
1480 if(tempty >= NTBASE)
1481 tempty = nontrst[tempty-NTBASE].value;
1482 else
1483 tempty = TYPE(toklev[tempty]);
1484 if(tempty != nontrst[*prdptr[nprod]-NTBASE].value)
1485 error("default action causes potential type clash");
1486 }
1487 nprod++;
1488 if(nprod >= NPROD)
1489 error("more than %d rules", NPROD);
1490 prdptr[nprod] = mem;
1491 levprd[nprod] = 0;
1492 }
1493
1494 /* end of all rules */
1495 defout(1);
1496
1497 finact();
1498 if(t == MARK) {
1499 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1500 while((c=Bgetrune(finput)) != Beof)
1501 Bputrune(ftable, c);
1502 }
1503 Bterm(finput);
1504 }
1505
1506 /*
1507 * finish action routine
1508 */
1509 void
finact(void)1510 finact(void)
1511 {
1512
1513 Bterm(faction);
1514 Bprint(ftable, "#define YYEOFCODE %d\n", 1);
1515 Bprint(ftable, "#define YYERRCODE %d\n", 2);
1516 }
1517
1518 /*
1519 * define s to be a terminal if t=0
1520 * or a nonterminal if t=1
1521 */
1522 int
defin(int nt,char * s)1523 defin(int nt, char *s)
1524 {
1525 int val;
1526 Rune rune;
1527
1528 val = 0;
1529 if(nt) {
1530 nnonter++;
1531 if(nnonter >= NNONTERM)
1532 error("too many nonterminals, limit %d",NNONTERM);
1533 nontrst[nnonter].name = cstash(s);
1534 return NTBASE + nnonter;
1535 }
1536
1537 /* must be a token */
1538 ntokens++;
1539 if(ntokens >= NTERMS)
1540 error("too many terminals, limit %d", NTERMS);
1541 tokset[ntokens].name = cstash(s);
1542
1543 /* establish value for token */
1544 /* single character literal */
1545 if(s[0] == ' ') {
1546 val = chartorune(&rune, &s[1]);
1547 if(s[val+1] == 0) {
1548 val = rune;
1549 goto out;
1550 }
1551 }
1552
1553 /* escape sequence */
1554 if(s[0] == ' ' && s[1] == '\\') {
1555 if(s[3] == 0) {
1556 /* single character escape sequence */
1557 switch(s[2]) {
1558 case 'n': val = '\n'; break;
1559 case 'r': val = '\r'; break;
1560 case 'b': val = '\b'; break;
1561 case 't': val = '\t'; break;
1562 case 'f': val = '\f'; break;
1563 case '\'': val = '\''; break;
1564 case '"': val = '"'; break;
1565 case '\\': val = '\\'; break;
1566 default: error("invalid escape");
1567 }
1568 goto out;
1569 }
1570
1571 /* \nnn sequence */
1572 if(s[2] >= '0' && s[2] <= '7') {
1573 if(s[3] < '0' ||
1574 s[3] > '7' ||
1575 s[4] < '0' ||
1576 s[4] > '7' ||
1577 s[5] != 0)
1578 error("illegal \\nnn construction");
1579 val = 64*s[2] + 8*s[3] + s[4] - 73*'0';
1580 if(val == 0)
1581 error("'\\000' is illegal");
1582 goto out;
1583 }
1584 error("unknown escape");
1585 }
1586 val = extval++;
1587
1588 out:
1589 tokset[ntokens].value = val;
1590 toklev[ntokens] = 0;
1591 return ntokens;
1592 }
1593
1594 /*
1595 * write out the defines (at the end of the declaration section)
1596 */
1597 void
defout(int last)1598 defout(int last)
1599 {
1600 int i, c;
1601 char sar[NAMESIZE+10];
1602
1603 for(i=ndefout; i<=ntokens; i++) {
1604 /* non-literals */
1605 c = tokset[i].name[0];
1606 if(c != ' ' && c != '$') {
1607 Bprint(ftable, "#define %s %d\n",
1608 tokset[i].name, tokset[i].value);
1609 if(fdefine)
1610 Bprint(fdefine, "#define\t%s\t%d\n",
1611 tokset[i].name, tokset[i].value);
1612 }
1613 }
1614 ndefout = ntokens+1;
1615 if(last && fdebug) {
1616 Bprint(fdebug, "char* yytoknames[] =\n{\n");
1617 TLOOP(i) {
1618 if(tokset[i].name) {
1619 chcopy(sar, tokset[i].name);
1620 Bprint(fdebug, "\t\"%s\",\n", sar);
1621 continue;
1622 }
1623 Bprint(fdebug, "\t0,\n");
1624 }
1625 Bprint(fdebug, "};\n");
1626 }
1627 }
1628
1629 char*
cstash(char * s)1630 cstash(char *s)
1631 {
1632 char *temp;
1633
1634 temp = cnamp;
1635 do {
1636 if(cnamp >= &cnames[cnamsz])
1637 error("too many characters in id's and literals");
1638 else
1639 *cnamp++ = *s;
1640 } while(*s++);
1641 return temp;
1642 }
1643
1644 long
gettok(void)1645 gettok(void)
1646 {
1647 long c;
1648 Rune rune;
1649 int i, base, match, reserve;
1650 static int peekline;
1651
1652 begin:
1653 reserve = 0;
1654 lineno += peekline;
1655 peekline = 0;
1656 c = Bgetrune(finput);
1657 while(c == ' ' || c == '\n' || c == '\t' || c == '\f' || c == '\r') {
1658 if(c == '\n')
1659 lineno++;
1660 c = Bgetrune(finput);
1661 }
1662
1663 /* skip comment */
1664 if(c == '/') {
1665 lineno += skipcom();
1666 goto begin;
1667 }
1668 switch(c) {
1669 case Beof:
1670 return ENDFILE;
1671
1672 case '{':
1673 Bungetrune(finput);
1674 return '=';
1675
1676 case '<':
1677 /* get, and look up, a type name (union member name) */
1678 i = 0;
1679 while((c=Bgetrune(finput)) != '>' && c >= 0 && c != '\n' && c != '\r') {
1680 rune = c;
1681 c = runetochar(&tokname[i], &rune);
1682 if(i < NAMESIZE)
1683 i += c;
1684 }
1685 if(c != '>')
1686 error("unterminated < ... > clause");
1687 tokname[i] = 0;
1688 for(i=1; i<=ntypes; i++)
1689 if(!strcmp(typeset[i], tokname)) {
1690 numbval = i;
1691 return TYPENAME;
1692 }
1693 ntypes++;
1694 numbval = ntypes;
1695 typeset[numbval] = cstash(tokname);
1696 return TYPENAME;
1697
1698 case '"':
1699 case '\'':
1700 match = c;
1701 tokname[0] = ' ';
1702 i = 1;
1703 for(;;) {
1704 c = Bgetrune(finput);
1705 if(c == '\n' || c == '\r' || c <= 0)
1706 error("illegal or missing ' or \"" );
1707 if(c == '\\') {
1708 tokname[i] = '\\';
1709 if(i < NAMESIZE)
1710 i++;
1711 c = Bgetrune(finput);
1712 } else
1713 if(c == match)
1714 break;
1715 rune = c;
1716 c = runetochar(&tokname[i], &rune);
1717 if(i < NAMESIZE)
1718 i += c;
1719 }
1720 break;
1721
1722 case '%':
1723 case '\\':
1724 switch(c = Bgetrune(finput)) {
1725 case '0': return TERM;
1726 case '<': return LEFT;
1727 case '2': return BINARY;
1728 case '>': return RIGHT;
1729 case '%':
1730 case '\\': return MARK;
1731 case '=': return PREC;
1732 case '{': return LCURLY;
1733 default: reserve = 1;
1734 }
1735
1736 default:
1737 /* number */
1738 if(isdigit(c)) {
1739 numbval = c-'0';
1740 base = (c=='0')? 8: 10;
1741 for(c = Bgetrune(finput); isdigit(c); c = Bgetrune(finput))
1742 numbval = numbval*base + (c-'0');
1743 Bungetrune(finput);
1744 return NUMBER;
1745 }
1746 if(islower(c) || isupper(c) || c=='_' || c=='.' || c=='$') {
1747 i = 0;
1748 while(islower(c) || isupper(c) || isdigit(c) ||
1749 c == '-' || c=='_' || c=='.' || c=='$') {
1750 if(reserve && isupper(c))
1751 c += 'a'-'A';
1752 rune = c;
1753 c = runetochar(&tokname[i], &rune);
1754 if(i < NAMESIZE)
1755 i += c;
1756 c = Bgetrune(finput);
1757 }
1758 } else
1759 return c;
1760 Bungetrune(finput);
1761 }
1762 tokname[i] = 0;
1763
1764 /* find a reserved word */
1765 if(reserve) {
1766 for(c=0; resrv[c].name; c++)
1767 if(strcmp(tokname, resrv[c].name) == 0)
1768 return resrv[c].value;
1769 error("invalid escape, or illegal reserved word: %s", tokname);
1770 }
1771
1772 /* look ahead to distinguish IDENTIFIER from IDENTCOLON */
1773 c = Bgetrune(finput);
1774 while(c == ' ' || c == '\t'|| c == '\n' || c == '\r' || c == '\f' || c == '/') {
1775 if(c == '\n')
1776 peekline++;
1777 /* look for comments */
1778 if(c == '/')
1779 peekline += skipcom();
1780 c = Bgetrune(finput);
1781 }
1782 if(c == ':')
1783 return IDENTCOLON;
1784 Bungetrune(finput);
1785 return IDENTIFIER;
1786 }
1787
1788 /*
1789 * determine the type of a symbol
1790 */
1791 int
fdtype(int t)1792 fdtype(int t)
1793 {
1794 int v;
1795
1796 if(t >= NTBASE)
1797 v = nontrst[t-NTBASE].value;
1798 else
1799 v = TYPE(toklev[t]);
1800 if(v <= 0)
1801 error("must specify type for %s", (t>=NTBASE)?
1802 nontrst[t-NTBASE].name: tokset[t].name);
1803 return v;
1804 }
1805
1806 int
chfind(int t,char * s)1807 chfind(int t, char *s)
1808 {
1809 int i;
1810
1811 if(s[0] == ' ')
1812 t = 0;
1813 TLOOP(i)
1814 if(!strcmp(s, tokset[i].name))
1815 return i;
1816 NTLOOP(i)
1817 if(!strcmp(s, nontrst[i].name))
1818 return NTBASE+i;
1819
1820 /* cannot find name */
1821 if(t > 1)
1822 error("%s should have been defined earlier", s);
1823 return defin(t, s);
1824 }
1825
1826 /*
1827 * copy the union declaration to the output, and the define file if present
1828 */
1829 void
cpyunion(void)1830 cpyunion(void)
1831 {
1832 long c;
1833 int level;
1834
1835 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1836 Bprint(ftable, "typedef union ");
1837 if(fdefine != 0)
1838 Bprint(fdefine, "\ntypedef union ");
1839
1840 level = 0;
1841 for(;;) {
1842 if((c=Bgetrune(finput)) == Beof)
1843 error("EOF encountered while processing %%union");
1844 Bputrune(ftable, c);
1845 if(fdefine != 0)
1846 Bputrune(fdefine, c);
1847 switch(c) {
1848 case '\n':
1849 lineno++;
1850 break;
1851 case '{':
1852 level++;
1853 break;
1854 case '}':
1855 level--;
1856
1857 /* we are finished copying */
1858 if(level == 0) {
1859 Bprint(ftable, " YYSTYPE;\n");
1860 if(fdefine != 0)
1861 Bprint(fdefine, "\tYYSTYPE;\nextern\tYYSTYPE\tyylval;\n");
1862 return;
1863 }
1864 }
1865 }
1866 }
1867
1868 /*
1869 * copies code between \{ and \}
1870 */
1871 void
cpycode(void)1872 cpycode(void)
1873 {
1874
1875 long c;
1876
1877 c = Bgetrune(finput);
1878 if(c == '\n') {
1879 c = Bgetrune(finput);
1880 lineno++;
1881 }
1882 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1883 while(c != Beof) {
1884 if(c == '\\') {
1885 if((c=Bgetrune(finput)) == '}')
1886 return;
1887 Bputc(ftable, '\\');
1888 }
1889 if(c == '%') {
1890 if((c=Bgetrune(finput)) == '}')
1891 return;
1892 Bputc(ftable, '%');
1893 }
1894 Bputrune(ftable, c);
1895 if(c == '\n')
1896 lineno++;
1897 c = Bgetrune(finput);
1898 }
1899 error("eof before %%}");
1900 }
1901
1902 /*
1903 * skip over comments
1904 * skipcom is called after reading a '/'
1905 */
1906 int
skipcom(void)1907 skipcom(void)
1908 {
1909 long c;
1910 int i;
1911
1912 /* i is the number of lines skipped */
1913 i = 0;
1914 if(Bgetrune(finput) != '*')
1915 error("illegal comment");
1916 c = Bgetrune(finput);
1917 while(c != Beof) {
1918 while(c == '*')
1919 if((c=Bgetrune(finput)) == '/')
1920 return i;
1921 if(c == '\n')
1922 i++;
1923 c = Bgetrune(finput);
1924 }
1925 error("EOF inside comment");
1926 return 0;
1927 }
1928
1929 /*
1930 * copy C action to the next ; or closing }
1931 */
1932 void
cpyact(int offset)1933 cpyact(int offset)
1934 {
1935 long c;
1936 int brac, match, j, s, fnd, tok;
1937
1938 Bprint(faction, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1939 brac = 0;
1940
1941 loop:
1942 c = Bgetrune(finput);
1943 swt:
1944 switch(c) {
1945 case ';':
1946 if(brac == 0) {
1947 Bputrune(faction, c);
1948 return;
1949 }
1950 goto lcopy;
1951
1952 case '{':
1953 brac++;
1954 goto lcopy;
1955
1956 case '$':
1957 s = 1;
1958 tok = -1;
1959 c = Bgetrune(finput);
1960
1961 /* type description */
1962 if(c == '<') {
1963 Bungetrune(finput);
1964 if(gettok() != TYPENAME)
1965 error("bad syntax on $<ident> clause");
1966 tok = numbval;
1967 c = Bgetrune(finput);
1968 }
1969 if(c == '$') {
1970 Bprint(faction, "yyval");
1971
1972 /* put out the proper tag... */
1973 if(ntypes) {
1974 if(tok < 0)
1975 tok = fdtype(*prdptr[nprod]);
1976 Bprint(faction, ".%s", typeset[tok]);
1977 }
1978 goto loop;
1979 }
1980 if(c == '-') {
1981 s = -s;
1982 c = Bgetrune(finput);
1983 }
1984 if(isdigit(c)) {
1985 j = 0;
1986 while(isdigit(c)) {
1987 j = j*10 + (c-'0');
1988 c = Bgetrune(finput);
1989 }
1990 Bungetrune(finput);
1991 j = j*s - offset;
1992 if(j > 0)
1993 error("Illegal use of $%d", j+offset);
1994
1995 dollar:
1996 Bprint(faction, "yypt[-%d].yyv", -j);
1997
1998 /* put out the proper tag */
1999 if(ntypes) {
2000 if(j+offset <= 0 && tok < 0)
2001 error("must specify type of $%d", j+offset);
2002 if(tok < 0)
2003 tok = fdtype(prdptr[nprod][j+offset]);
2004 Bprint(faction, ".%s", typeset[tok]);
2005 }
2006 goto loop;
2007 }
2008 if(isupper(c) || islower(c) || c == '_' || c == '.') {
2009 int tok; /* tok used oustide for type info */
2010
2011 /* look for $name */
2012 Bungetrune(finput);
2013 if(gettok() != IDENTIFIER)
2014 error("$ must be followed by an identifier");
2015 tok = chfind(2, tokname);
2016 if((c = Bgetrune(finput)) != '#') {
2017 Bungetrune(finput);
2018 fnd = -1;
2019 } else
2020 if(gettok() != NUMBER) {
2021 error("# must be followed by number");
2022 fnd = -1;
2023 } else
2024 fnd = numbval;
2025 for(j=1; j<=offset; ++j)
2026 if(tok == prdptr[nprod][j]) {
2027 if(--fnd <= 0) {
2028 j -= offset;
2029 goto dollar;
2030 }
2031 }
2032 error("$name or $name#number not found");
2033 }
2034 Bputc(faction, '$');
2035 if(s < 0 )
2036 Bputc(faction, '-');
2037 goto swt;
2038
2039 case '}':
2040 brac--;
2041 if(brac)
2042 goto lcopy;
2043 Bputrune(faction, c);
2044 return;
2045
2046 case '/':
2047 /* look for comments */
2048 Bputrune(faction, c);
2049 c = Bgetrune(finput);
2050 if(c != '*')
2051 goto swt;
2052
2053 /* it really is a comment */
2054 Bputrune(faction, c);
2055 c = Bgetrune(finput);
2056 while(c >= 0) {
2057 while(c == '*') {
2058 Bputrune(faction, c);
2059 if((c=Bgetrune(finput)) == '/')
2060 goto lcopy;
2061 }
2062 Bputrune(faction, c);
2063 if(c == '\n')
2064 lineno++;
2065 c = Bgetrune(finput);
2066 }
2067 error("EOF inside comment");
2068
2069 case '\'':
2070 /* character constant */
2071 match = '\'';
2072 goto string;
2073
2074 case '"':
2075 /* character string */
2076 match = '"';
2077
2078 string:
2079 Bputrune(faction, c);
2080 while(c = Bgetrune(finput)) {
2081 if(c == '\\') {
2082 Bputrune(faction, c);
2083 c = Bgetrune(finput);
2084 if(c == '\n')
2085 lineno++;
2086 } else
2087 if(c == match)
2088 goto lcopy;
2089 if(c == '\n')
2090 error("newline in string or char. const.");
2091 Bputrune(faction, c);
2092 }
2093 error("EOF in string or character constant");
2094
2095 case Beof:
2096 error("action does not terminate");
2097
2098 case '\n':
2099 lineno++;
2100 goto lcopy;
2101 }
2102
2103 lcopy:
2104 Bputrune(faction, c);
2105 goto loop;
2106 }
2107
2108 void
openup(char * stem,int dflag,int vflag,int ytab,char * ytabc)2109 openup(char *stem, int dflag, int vflag, int ytab, char *ytabc)
2110 {
2111 char buf[256];
2112
2113 if(vflag) {
2114 sprint(buf, "%s.%s", stem, FILEU);
2115 foutput = Bopen(buf, OWRITE);
2116 if(foutput == 0)
2117 error("cannot open %s", buf);
2118 }
2119 if(yydebug) {
2120 sprint(buf, "%s.%s", stem, FILEDEBUG);
2121 if((fdebug = Bopen(buf, OWRITE)) == 0)
2122 error("can't open %s", buf);
2123 }
2124 if(dflag) {
2125 sprint(buf, "%s.%s", stem, FILED);
2126 fdefine = Bopen(buf, OWRITE);
2127 if(fdefine == 0)
2128 error("can't create %s", buf);
2129 }
2130 if(ytab == 0)
2131 sprint(buf, "%s.%s", stem, OFILE);
2132 else
2133 strcpy(buf, ytabc);
2134 ftable = Bopen(buf, OWRITE);
2135 if(ftable == 0)
2136 error("cannot open table file %s", buf);
2137 }
2138
2139 /*
2140 * print the output for the states
2141 */
2142 void
output(void)2143 output(void)
2144 {
2145 int i, k, c;
2146 Wset *u, *v;
2147
2148 Bprint(ftable, "short yyexca[] =\n{");
2149 if(fdebug)
2150 Bprint(fdebug, "char* yystates[] =\n{\n");
2151
2152 /* output the stuff for state i */
2153 SLOOP(i) {
2154 nolook = tystate[i]!=MUSTLOOKAHEAD;
2155 closure(i);
2156
2157 /* output actions */
2158 nolook = 1;
2159 aryfil(temp1, ntokens+nnonter+1, 0);
2160 WSLOOP(wsets, u) {
2161 c = *(u->pitem);
2162 if(c > 1 && c < NTBASE && temp1[c] == 0) {
2163 WSLOOP(u, v)
2164 if(c == *(v->pitem))
2165 putitem(v->pitem+1, (Lkset*)0);
2166 temp1[c] = state(c);
2167 } else
2168 if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0)
2169 temp1[c+ntokens] = amem[indgo[i]+c];
2170 }
2171 if(i == 1)
2172 temp1[1] = ACCEPTCODE;
2173
2174 /* now, we have the shifts; look at the reductions */
2175 lastred = 0;
2176 WSLOOP(wsets, u) {
2177 c = *u->pitem;
2178
2179 /* reduction */
2180 if(c <= 0) {
2181 lastred = -c;
2182 TLOOP(k)
2183 if(BIT(u->ws.lset, k)) {
2184 if(temp1[k] == 0)
2185 temp1[k] = c;
2186 else
2187 if(temp1[k] < 0) { /* reduce/reduce conflict */
2188 if(foutput)
2189 Bprint(foutput,
2190 "\n%d: reduce/reduce conflict"
2191 " (red'ns %d and %d ) on %s",
2192 i, -temp1[k], lastred,
2193 symnam(k));
2194 if(-temp1[k] > lastred)
2195 temp1[k] = -lastred;
2196 zzrrconf++;
2197 } else
2198 /* potential shift/reduce conflict */
2199 precftn( lastred, k, i );
2200 }
2201 }
2202 }
2203 wract(i);
2204 }
2205
2206 if(fdebug)
2207 Bprint(fdebug, "};\n");
2208 Bprint(ftable, "};\n");
2209 Bprint(ftable, "#define YYNPROD %d\n", nprod);
2210 Bprint(ftable, "#define YYPRIVATE %d\n", PRIVATE);
2211 if(yydebug)
2212 Bprint(ftable, "#define yydebug %s\n", yydebug);
2213 }
2214
2215 /*
2216 * pack state i from temp1 into amem
2217 */
2218 int
apack(int * p,int n)2219 apack(int *p, int n)
2220 {
2221 int *pp, *qq, *rr, off, *q, *r;
2222
2223 /* we don't need to worry about checking because
2224 * we will only look at entries known to be there...
2225 * eliminate leading and trailing 0's
2226 */
2227
2228 q = p+n;
2229 for(pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off)
2230 ;
2231 /* no actions */
2232 if(pp > q)
2233 return 0;
2234 p = pp;
2235
2236 /* now, find a place for the elements from p to q, inclusive */
2237 r = &amem[ACTSIZE-1];
2238 for(rr = amem; rr <= r; rr++, off++) {
2239 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2240 if(*pp != 0)
2241 if(*pp != *qq && *qq != 0)
2242 goto nextk;
2243
2244 /* we have found an acceptable k */
2245 if(pkdebug && foutput != 0)
2246 Bprint(foutput, "off = %d, k = %d\n", off, (int)(rr-amem));
2247 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2248 if(*pp) {
2249 if(qq > r)
2250 error("action table overflow");
2251 if(qq > memp)
2252 memp = qq;
2253 *qq = *pp;
2254 }
2255 if(pkdebug && foutput != 0)
2256 for(pp = amem; pp <= memp; pp += 10) {
2257 Bprint(foutput, "\t");
2258 for(qq = pp; qq <= pp+9; qq++)
2259 Bprint(foutput, "%d ", *qq);
2260 Bprint(foutput, "\n");
2261 }
2262 return(off);
2263 nextk:;
2264 }
2265 error("no space in action table");
2266 return 0;
2267 }
2268
2269 /*
2270 * output the gotos for the nontermninals
2271 */
2272 void
go2out(void)2273 go2out(void)
2274 {
2275 int i, j, k, best, count, cbest, times;
2276
2277 /* mark begining of gotos */
2278 Bprint(ftemp, "$\n");
2279 for(i = 1; i <= nnonter; i++) {
2280 go2gen(i);
2281
2282 /* find the best one to make default */
2283 best = -1;
2284 times = 0;
2285
2286 /* is j the most frequent */
2287 for(j = 0; j <= nstate; j++) {
2288 if(tystate[j] == 0)
2289 continue;
2290 if(tystate[j] == best)
2291 continue;
2292
2293 /* is tystate[j] the most frequent */
2294 count = 0;
2295 cbest = tystate[j];
2296 for(k = j; k <= nstate; k++)
2297 if(tystate[k] == cbest)
2298 count++;
2299 if(count > times) {
2300 best = cbest;
2301 times = count;
2302 }
2303 }
2304
2305 /* best is now the default entry */
2306 zzgobest += times-1;
2307 for(j = 0; j <= nstate; j++)
2308 if(tystate[j] != 0 && tystate[j] != best) {
2309 Bprint(ftemp, "%d,%d,", j, tystate[j]);
2310 zzgoent++;
2311 }
2312
2313 /* now, the default */
2314 if(best == -1)
2315 best = 0;
2316 zzgoent++;
2317 Bprint(ftemp, "%d\n", best);
2318 }
2319 }
2320
2321 /*
2322 * output the gotos for nonterminal c
2323 */
2324 void
go2gen(int c)2325 go2gen(int c)
2326 {
2327 int i, work, cc;
2328 Item *p, *q;
2329
2330
2331 /* first, find nonterminals with gotos on c */
2332 aryfil(temp1, nnonter+1, 0);
2333 temp1[c] = 1;
2334 work = 1;
2335 while(work) {
2336 work = 0;
2337 PLOOP(0, i)
2338
2339 /* cc is a nonterminal */
2340 if((cc=prdptr[i][1]-NTBASE) >= 0)
2341 /* cc has a goto on c */
2342 if(temp1[cc] != 0) {
2343
2344 /* thus, the left side of production i does too */
2345 cc = *prdptr[i]-NTBASE;
2346 if(temp1[cc] == 0) {
2347 work = 1;
2348 temp1[cc] = 1;
2349 }
2350 }
2351 }
2352
2353 /* now, we have temp1[c] = 1 if a goto on c in closure of cc */
2354 if(g2debug && foutput != 0) {
2355 Bprint(foutput, "%s: gotos on ", nontrst[c].name);
2356 NTLOOP(i)
2357 if(temp1[i])
2358 Bprint(foutput, "%s ", nontrst[i].name);
2359 Bprint(foutput, "\n");
2360 }
2361
2362 /* now, go through and put gotos into tystate */
2363 aryfil(tystate, nstate, 0);
2364 SLOOP(i)
2365 ITMLOOP(i, p, q)
2366 if((cc = *p->pitem) >= NTBASE)
2367 /* goto on c is possible */
2368 if(temp1[cc-NTBASE]) {
2369 tystate[i] = amem[indgo[i]+c];
2370 break;
2371 }
2372 }
2373
2374 /*
2375 * decide a shift/reduce conflict by precedence.
2376 * r is a rule number, t a token number
2377 * the conflict is in state s
2378 * temp1[t] is changed to reflect the action
2379 */
2380 void
precftn(int r,int t,int s)2381 precftn(int r, int t, int s)
2382 {
2383 int lp, lt, action;
2384
2385 lp = levprd[r];
2386 lt = toklev[t];
2387 if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) {
2388
2389 /* conflict */
2390 if(foutput != 0)
2391 Bprint(foutput,
2392 "\n%d: shift/reduce conflict (shift %d(%d), red'n %d(%d)) on %s",
2393 s, temp1[t], PLEVEL(lt), r, PLEVEL(lp), symnam(t));
2394 zzsrconf++;
2395 return;
2396 }
2397 if(PLEVEL(lt) == PLEVEL(lp))
2398 action = ASSOC(lt);
2399 else
2400 if(PLEVEL(lt) > PLEVEL(lp))
2401 action = RASC; /* shift */
2402 else
2403 action = LASC; /* reduce */
2404 switch(action) {
2405 case BASC: /* error action */
2406 temp1[t] = ERRCODE;
2407 break;
2408 case LASC: /* reduce */
2409 temp1[t] = -r;
2410 break;
2411 }
2412 }
2413
2414 /*
2415 * output state i
2416 * temp1 has the actions, lastred the default
2417 */
2418 void
wract(int i)2419 wract(int i)
2420 {
2421 int p, p0, p1, ntimes, tred, count, j, flag;
2422
2423 /* find the best choice for lastred */
2424 lastred = 0;
2425 ntimes = 0;
2426 TLOOP(j) {
2427 if(temp1[j] >= 0)
2428 continue;
2429 if(temp1[j]+lastred == 0)
2430 continue;
2431 /* count the number of appearances of temp1[j] */
2432 count = 0;
2433 tred = -temp1[j];
2434 levprd[tred] |= REDFLAG;
2435 TLOOP(p)
2436 if(temp1[p]+tred == 0)
2437 count++;
2438 if(count > ntimes) {
2439 lastred = tred;
2440 ntimes = count;
2441 }
2442 }
2443
2444 /*
2445 * for error recovery, arrange that, if there is a shift on the
2446 * error recovery token, `error', that the default be the error action
2447 */
2448 if(temp1[2] > 0)
2449 lastred = 0;
2450
2451 /* clear out entries in temp1 which equal lastred */
2452 TLOOP(p)
2453 if(temp1[p]+lastred == 0)
2454 temp1[p] = 0;
2455
2456 wrstate(i);
2457 defact[i] = lastred;
2458 flag = 0;
2459 TLOOP(p0)
2460 if((p1=temp1[p0]) != 0) {
2461 if(p1 < 0) {
2462 p1 = -p1;
2463 goto exc;
2464 }
2465 if(p1 == ACCEPTCODE) {
2466 p1 = -1;
2467 goto exc;
2468 }
2469 if(p1 == ERRCODE) {
2470 p1 = 0;
2471 exc:
2472 if(flag++ == 0)
2473 Bprint(ftable, "-1, %d,\n", i);
2474 Bprint(ftable, "\t%d, %d,\n", p0, p1);
2475 zzexcp++;
2476 continue;
2477 }
2478 Bprint(ftemp, "%d,%d,", p0, p1);
2479 zzacent++;
2480 }
2481 if(flag) {
2482 defact[i] = -2;
2483 Bprint(ftable, "\t-2, %d,\n", lastred);
2484 }
2485 Bprint(ftemp, "\n");
2486 }
2487
2488 /*
2489 * writes state i
2490 */
2491 void
wrstate(int i)2492 wrstate(int i)
2493 {
2494 int j0, j1;
2495 Item *pp, *qq;
2496 Wset *u;
2497
2498 if(fdebug) {
2499 if(lastred) {
2500 Bprint(fdebug, " 0, /*%d*/\n", i);
2501 } else {
2502 Bprint(fdebug, " \"");
2503 ITMLOOP(i, pp, qq)
2504 Bprint(fdebug, "%s\\n", writem(pp->pitem));
2505 if(tystate[i] == MUSTLOOKAHEAD)
2506 WSLOOP(wsets + (pstate[i+1] - pstate[i]), u)
2507 if(*u->pitem < 0)
2508 Bprint(fdebug, "%s\\n", writem(u->pitem));
2509 Bprint(fdebug, "\", /*%d*/\n", i);
2510 }
2511 }
2512 if(foutput == 0)
2513 return;
2514 Bprint(foutput, "\nstate %d\n", i);
2515 ITMLOOP(i, pp, qq)
2516 Bprint(foutput, "\t%s\n", writem(pp->pitem));
2517 if(tystate[i] == MUSTLOOKAHEAD)
2518 /* print out empty productions in closure */
2519 WSLOOP(wsets+(pstate[i+1]-pstate[i]), u)
2520 if(*u->pitem < 0)
2521 Bprint(foutput, "\t%s\n", writem(u->pitem));
2522
2523 /* check for state equal to another */
2524 TLOOP(j0)
2525 if((j1=temp1[j0]) != 0) {
2526 Bprint(foutput, "\n\t%s ", symnam(j0));
2527 /* shift, error, or accept */
2528 if(j1 > 0) {
2529 if(j1 == ACCEPTCODE)
2530 Bprint(foutput, "accept");
2531 else
2532 if(j1 == ERRCODE)
2533 Bprint(foutput, "error");
2534 else
2535 Bprint(foutput, "shift %d", j1);
2536 } else
2537 Bprint(foutput, "reduce %d (src line %d)", -j1, rlines[-j1]);
2538 }
2539
2540 /* output the final production */
2541 if(lastred)
2542 Bprint(foutput, "\n\t. reduce %d (src line %d)\n\n",
2543 lastred, rlines[lastred]);
2544 else
2545 Bprint(foutput, "\n\t. error\n\n");
2546
2547 /* now, output nonterminal actions */
2548 j1 = ntokens;
2549 for(j0 = 1; j0 <= nnonter; j0++) {
2550 j1++;
2551 if(temp1[j1])
2552 Bprint(foutput, "\t%s goto %d\n", symnam(j0+NTBASE), temp1[j1]);
2553 }
2554 }
2555
2556 void
warray(char * s,int * v,int n)2557 warray(char *s, int *v, int n)
2558 {
2559 int i;
2560
2561 Bprint(ftable, "short %s[] =\n{", s);
2562 for(i=0;;) {
2563 if(i%10 == 0)
2564 Bprint(ftable, "\n");
2565 Bprint(ftable, "%4d", v[i]);
2566 i++;
2567 if(i >= n) {
2568 Bprint(ftable, "\n};\n");
2569 break;
2570 }
2571 Bprint(ftable, ",");
2572 }
2573 }
2574
2575 /*
2576 * in order to free up the mem and amem arrays for the optimizer,
2577 * and still be able to output yyr1, etc., after the sizes of
2578 * the action array is known, we hide the nonterminals
2579 * derived by productions in levprd.
2580 */
2581
2582 void
hideprod(void)2583 hideprod(void)
2584 {
2585 int i, j;
2586
2587 j = 0;
2588 levprd[0] = 0;
2589 PLOOP(1, i) {
2590 if(!(levprd[i] & REDFLAG)) {
2591 j++;
2592 if(foutput != 0)
2593 Bprint(foutput, "Rule not reduced: %s\n", writem(prdptr[i]));
2594 }
2595 levprd[i] = *prdptr[i] - NTBASE;
2596 }
2597 if(j)
2598 print("%d rules never reduced\n", j);
2599 }
2600
2601 void
callopt(void)2602 callopt(void)
2603 {
2604 int i, *p, j, k, *q;
2605
2606 /* read the arrays from tempfile and set parameters */
2607 finput = Bopen(tempname, OREAD);
2608 if(finput == 0)
2609 error("optimizer cannot open tempfile");
2610 pgo[0] = 0;
2611 temp1[0] = 0;
2612 nstate = 0;
2613 nnonter = 0;
2614 for(;;) {
2615 switch(gtnm()) {
2616 case '\n':
2617 nstate++;
2618 pmem--;
2619 temp1[nstate] = pmem - mem0;
2620 case ',':
2621 continue;
2622 case '$':
2623 break;
2624 default:
2625 error("bad tempfile");
2626 }
2627 break;
2628 }
2629
2630 pmem--;
2631 temp1[nstate] = yypgo[0] = pmem - mem0;
2632 for(;;) {
2633 switch(gtnm()) {
2634 case '\n':
2635 nnonter++;
2636 yypgo[nnonter] = pmem-mem0;
2637 case ',':
2638 continue;
2639 case -1:
2640 break;
2641 default:
2642 error("bad tempfile");
2643 }
2644 break;
2645 }
2646 pmem--;
2647 yypgo[nnonter--] = pmem - mem0;
2648 for(i = 0; i < nstate; i++) {
2649 k = 32000;
2650 j = 0;
2651 q = mem0 + temp1[i+1];
2652 for(p = mem0 + temp1[i]; p < q ; p += 2) {
2653 if(*p > j)
2654 j = *p;
2655 if(*p < k)
2656 k = *p;
2657 }
2658 /* nontrivial situation */
2659 if(k <= j) {
2660 /* j is now the range */
2661 /* j -= k; /* call scj */
2662 if(k > maxoff)
2663 maxoff = k;
2664 }
2665 tystate[i] = (temp1[i+1]-temp1[i]) + 2*j;
2666 if(j > maxspr)
2667 maxspr = j;
2668 }
2669
2670 /* initialize ggreed table */
2671 for(i = 1; i <= nnonter; i++) {
2672 ggreed[i] = 1;
2673 j = 0;
2674
2675 /* minimum entry index is always 0 */
2676 q = mem0 + yypgo[i+1] - 1;
2677 for(p = mem0+yypgo[i]; p < q ; p += 2) {
2678 ggreed[i] += 2;
2679 if(*p > j)
2680 j = *p;
2681 }
2682 ggreed[i] = ggreed[i] + 2*j;
2683 if(j > maxoff)
2684 maxoff = j;
2685 }
2686
2687 /* now, prepare to put the shift actions into the amem array */
2688 for(i = 0; i < ACTSIZE; i++)
2689 amem[i] = 0;
2690 maxa = amem;
2691 for(i = 0; i < nstate; i++) {
2692 if(tystate[i] == 0 && adb > 1)
2693 Bprint(ftable, "State %d: null\n", i);
2694 indgo[i] = YYFLAG1;
2695 }
2696 while((i = nxti()) != NOMORE)
2697 if(i >= 0)
2698 stin(i);
2699 else
2700 gin(-i);
2701
2702 /* print amem array */
2703 if(adb > 2 )
2704 for(p = amem; p <= maxa; p += 10) {
2705 Bprint(ftable, "%4d ", (int)(p-amem));
2706 for(i = 0; i < 10; ++i)
2707 Bprint(ftable, "%4d ", p[i]);
2708 Bprint(ftable, "\n");
2709 }
2710
2711 /* write out the output appropriate to the language */
2712 aoutput();
2713 osummary();
2714 Bterm(finput);
2715 ZAPFILE(tempname);
2716 }
2717
2718 void
gin(int i)2719 gin(int i)
2720 {
2721 int *p, *r, *s, *q1, *q2;
2722
2723 /* enter gotos on nonterminal i into array amem */
2724 ggreed[i] = 0;
2725
2726 q2 = mem0+ yypgo[i+1] - 1;
2727 q1 = mem0 + yypgo[i];
2728
2729 /* now, find amem place for it */
2730 for(p = amem; p < &amem[ACTSIZE]; p++) {
2731 if(*p)
2732 continue;
2733 for(r = q1; r < q2; r += 2) {
2734 s = p + *r + 1;
2735 if(*s)
2736 goto nextgp;
2737 if(s > maxa)
2738 if((maxa = s) > &amem[ACTSIZE])
2739 error("a array overflow");
2740 }
2741 /* we have found amem spot */
2742 *p = *q2;
2743 if(p > maxa)
2744 if((maxa = p) > &amem[ACTSIZE])
2745 error("a array overflow");
2746 for(r = q1; r < q2; r += 2) {
2747 s = p + *r + 1;
2748 *s = r[1];
2749 }
2750 pgo[i] = p-amem;
2751 if(adb > 1)
2752 Bprint(ftable, "Nonterminal %d, entry at %d\n", i, pgo[i]);
2753 return;
2754
2755 nextgp:;
2756 }
2757 error("cannot place goto %d\n", i);
2758 }
2759
2760 void
stin(int i)2761 stin(int i)
2762 {
2763 int *r, *s, n, flag, j, *q1, *q2;
2764
2765 tystate[i] = 0;
2766
2767 /* enter state i into the amem array */
2768 q2 = mem0+temp1[i+1];
2769 q1 = mem0+temp1[i];
2770 /* find an acceptable place */
2771 for(n = -maxoff; n < ACTSIZE; n++) {
2772 flag = 0;
2773 for(r = q1; r < q2; r += 2) {
2774 if((s = *r + n + amem) < amem)
2775 goto nextn;
2776 if(*s == 0)
2777 flag++;
2778 else
2779 if(*s != r[1])
2780 goto nextn;
2781 }
2782
2783 /* check that the position equals another only if the states are identical */
2784 for(j=0; j<nstate; j++) {
2785 if(indgo[j] == n) {
2786
2787 /* we have some disagreement */
2788 if(flag)
2789 goto nextn;
2790 if(temp1[j+1]+temp1[i] == temp1[j]+temp1[i+1]) {
2791
2792 /* states are equal */
2793 indgo[i] = n;
2794 if(adb > 1)
2795 Bprint(ftable,
2796 "State %d: entry at %d equals state %d\n",
2797 i, n, j);
2798 return;
2799 }
2800
2801 /* we have some disagreement */
2802 goto nextn;
2803 }
2804 }
2805
2806 for(r = q1; r < q2; r += 2) {
2807 if((s = *r+n+amem) >= &amem[ACTSIZE])
2808 error("out of space in optimizer a array");
2809 if(s > maxa)
2810 maxa = s;
2811 if(*s != 0 && *s != r[1])
2812 error("clobber of a array, pos'n %d, by %d", s-amem, r[1]);
2813 *s = r[1];
2814 }
2815 indgo[i] = n;
2816 if(adb > 1)
2817 Bprint(ftable, "State %d: entry at %d\n", i, indgo[i]);
2818 return;
2819 nextn:;
2820 }
2821 error("Error; failure to place state %d\n", i);
2822 }
2823
2824 /*
2825 * finds the next i
2826 */
2827 int
nxti(void)2828 nxti(void)
2829 {
2830 int i, max, maxi;
2831
2832 max = 0;
2833 maxi = 0;
2834 for(i = 1; i <= nnonter; i++)
2835 if(ggreed[i] >= max) {
2836 max = ggreed[i];
2837 maxi = -i;
2838 }
2839 for(i = 0; i < nstate; ++i)
2840 if(tystate[i] >= max) {
2841 max = tystate[i];
2842 maxi = i;
2843 }
2844 if(nxdb)
2845 Bprint(ftable, "nxti = %d, max = %d\n", maxi, max);
2846 if(max == 0)
2847 return NOMORE;
2848 return maxi;
2849 }
2850
2851 /*
2852 * write summary
2853 */
2854 void
osummary(void)2855 osummary(void)
2856 {
2857
2858 int i, *p;
2859
2860 if(foutput == 0)
2861 return;
2862 i = 0;
2863 for(p = maxa; p >= amem; p--)
2864 if(*p == 0)
2865 i++;
2866
2867 Bprint(foutput, "Optimizer space used: input %d/%d, output %d/%d\n",
2868 (int)(pmem-mem0+1), MEMSIZE, (int)(maxa-amem+1), ACTSIZE);
2869 Bprint(foutput, "%d table entries, %d zero\n", (int)(maxa-amem+1), i);
2870 Bprint(foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff);
2871 }
2872
2873 /*
2874 * this version is for C
2875 * write out the optimized parser
2876 */
2877 void
aoutput(void)2878 aoutput(void)
2879 {
2880 Bprint(ftable, "#define\tYYLAST\t%d\n", (int)(maxa-amem+1));
2881 arout("yyact", amem, (maxa-amem)+1);
2882 arout("yypact", indgo, nstate);
2883 arout("yypgo", pgo, nnonter+1);
2884 }
2885
2886 void
arout(char * s,int * v,int n)2887 arout(char *s, int *v, int n)
2888 {
2889 int i;
2890
2891 Bprint(ftable, "short %s[] =\n{", s);
2892 for(i = 0; i < n;) {
2893 if(i%10 == 0)
2894 Bprint(ftable, "\n");
2895 Bprint(ftable, "%4d", v[i]);
2896 i++;
2897 if(i == n)
2898 Bprint(ftable, "\n};\n");
2899 else
2900 Bprint(ftable, ",");
2901 }
2902 }
2903
2904 /*
2905 * read and convert an integer from the standard input
2906 * return the terminating character
2907 * blanks, tabs, and newlines are ignored
2908 */
2909 int
gtnm(void)2910 gtnm(void)
2911 {
2912 int sign, val, c;
2913
2914 sign = 0;
2915 val = 0;
2916 while((c=Bgetrune(finput)) != Beof) {
2917 if(isdigit(c)) {
2918 val = val*10 + c-'0';
2919 continue;
2920 }
2921 if(c == '-') {
2922 sign = 1;
2923 continue;
2924 }
2925 break;
2926 }
2927 if(sign)
2928 val = -val;
2929 *pmem++ = val;
2930 if(pmem >= &mem0[MEMSIZE])
2931 error("out of space");
2932 return c;
2933 }
2934