xref: /plan9/sys/src/cmd/yacc.c (revision f8f814447ad79df616c586571506a210f98c48bc)
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
5 
6 #define	Bungetrune	Bungetc		/* ok for now. */
7 
8 /*
9  * all these are 32 bit
10  */
11 #define TBITSET		((32+NTERMS)/32)	/* BOTCH?? +31 */
12 #define BIT(a,i)	((a)[(i)>>5] & (1<<((i)&037)))
13 #define SETBIT(a,i)	((a)[(i)>>5] |= (1<<((i)&037)))
14 #define NWORDS(n)	(((n)+32)/32)
15 
16 #define PARSER		"/sys/lib/yaccpar"
17 #define PARSERS		"/sys/lib/yaccpars"
18 #define TEMPNAME	"y.tmp.XXXXXX"
19 #define ACTNAME		"y.acts.XXXXXX"
20 #define OFILE		"tab.c"
21 #define FILEU		"output"
22 #define FILED		"tab.h"
23 #define FILEDEBUG	"debug"
24 
25 enum
26 {
27 /*
28  * the following are adjustable
29  * according to memory size
30  */
31 	ACTSIZE		= 40000,
32 	MEMSIZE		= 40000,
33 	NSTATES		= 2000,
34 	NTERMS		= 511,
35 	NPROD		= 1600,
36 	NNONTERM	= 600,
37 	TEMPSIZE	= 2000,
38 	CNAMSZ		= 10000,
39 	LSETSIZE	= 2400,
40 	WSETSIZE	= 350,
41 
42 	NAMESIZE	= 50,
43 	NTYPES		= 63,
44 	ISIZE		= 400,
45 
46 	PRIVATE		= 0xE000,	/* unicode private use */
47 
48 	/* relationships which must hold:
49 		TBITSET ints must hold NTERMS+1 bits...
50 		WSETSIZE >= NNONTERM
51 		LSETSIZE >= NNONTERM
52 		TEMPSIZE >= NTERMS + NNONTERM + 1
53 		TEMPSIZE >= NSTATES
54 	*/
55 
56 	NTBASE		= 010000,
57 	ERRCODE		= 8190,
58 	ACCEPTCODE	= 8191,
59 
60 	NOASC		= 0,	/* no assoc. */
61 	LASC		= 1,	/* left assoc. */
62 	RASC		= 2,	/* right assoc. */
63 	BASC		= 3,	/* binary assoc. */
64 
65 	/* flags for state generation */
66 
67 	DONE		= 0,
68 	MUSTDO		= 1,
69 	MUSTLOOKAHEAD	= 2,
70 
71 	/* flags for a rule having an action, and being reduced */
72 
73 	ACTFLAG		= 04,
74 	REDFLAG		= 010,
75 
76 	/* output parser flags */
77 	YYFLAG1		= -1000,
78 
79 	/* parse tokens */
80 	IDENTIFIER	= PRIVATE,
81 	MARK,
82 	TERM,
83 	LEFT,
84 	RIGHT,
85 	BINARY,
86 	PREC,
87 	LCURLY,
88 	IDENTCOLON,
89 	NUMBER,
90 	START,
91 	TYPEDEF,
92 	TYPENAME,
93 	UNION,
94 
95 	ENDFILE		= 0,
96 
97 	EMPTY		= 1,
98 	WHOKNOWS	= 0,
99 	OK		= 1,
100 	NOMORE		= -1000,
101 };
102 
103 	/* macros for getting associativity and precedence levels */
104 
105 #define ASSOC(i)	((i)&03)
106 #define PLEVEL(i)	(((i)>>4)&077)
107 #define TYPE(i)		(((i)>>10)&077)
108 
109 	/* macros for setting associativity and precedence levels */
110 
111 #define SETASC(i,j)	i |= j
112 #define SETPLEV(i,j)	i |= (j<<4)
113 #define SETTYPE(i,j)	i |= (j<<10)
114 
115 	/* looping macros */
116 
117 #define TLOOP(i)	for(i=1; i<=ntokens; i++)
118 #define NTLOOP(i)	for(i=0; i<=nnonter; i++)
119 #define PLOOP(s,i)	for(i=s; i<nprod; i++)
120 #define SLOOP(i)	for(i=0; i<nstate; i++)
121 #define WSBUMP(x)	x++
122 #define WSLOOP(s,j)	for(j=s; j<cwp; j++)
123 #define ITMLOOP(i,p,q)	for(q=pstate[i+1], p=pstate[i]; p<q; p++)
124 #define SETLOOP(i)	for(i=0; i<tbitset; i++)
125 
126 	/* command to clobber tempfiles after use */
127 
128 #define	ZAPFILE(x)	if(x) remove(x)
129 
130 	/* I/O descriptors */
131 
132 Biobuf*	faction;	/* file for saving actions */
133 Biobuf*	fdefine;	/* file for #defines */
134 Biobuf*	fdebug;		/* y.debug for strings for debugging */
135 Biobuf*	ftable;		/* y.tab.c file */
136 Biobuf*	ftemp;		/* tempfile to pass 2 */
137 Biobuf*	finput;		/* input file */
138 Biobuf*	foutput;	/* y.output file */
139 
140 	/* communication variables between various I/O routines */
141 
142 char*	infile;			/* input file name */
143 int	numbval;		/* value of an input number */
144 char	tokname[NAMESIZE+UTFmax+1]; /* input token name, slop for runes and 0 */
145 
146 	/* structure declarations */
147 
148 typedef
149 struct
150 {
151 	int	lset[TBITSET];
152 } Lkset;
153 
154 typedef
155 struct
156 {
157 	int*	pitem;
158 	Lkset*	look;
159 } Item;
160 
161 typedef
162 struct
163 {
164 	char*	name;
165 	int	value;
166 } Symb;
167 
168 typedef
169 struct
170 {
171 	int*	pitem;
172 	int	flag;
173 	Lkset	ws;
174 } Wset;
175 
176 	/* storage of names */
177 
178 char	cnames[CNAMSZ];		/* place where token and nonterminal names are stored */
179 int	cnamsz = CNAMSZ;	/* size of cnames */
180 char*	cnamp = cnames;		/* place where next name is to be put in */
181 int	ndefout = 4;		/* number of defined symbols output */
182 char*	tempname;
183 char*	actname;
184 char	ttempname[] = TEMPNAME;
185 char	tactname[] = ACTNAME;
186 char*	parser = PARSER;
187 char*	yydebug;
188 
189 	/* storage of types */
190 int	ntypes;			/* number of types defined */
191 char*	typeset[NTYPES];	/* pointers to type tags */
192 
193 	/* token information */
194 
195 int	ntokens = 0 ;		/* number of tokens */
196 Symb	tokset[NTERMS];
197 int	toklev[NTERMS];		/* vector with the precedence of the terminals */
198 
199 	/* nonterminal information */
200 
201 int	nnonter = -1;		/* the number of nonterminals */
202 Symb	nontrst[NNONTERM];
203 int	start;			/* start symbol */
204 
205 	/* assigned token type values */
206 int	extval = 0;
207 
208 char*	ytabc = OFILE;	/* name of y.tab.c */
209 
210 	/* grammar rule information */
211 
212 int	mem0[MEMSIZE] ;		/* production storage */
213 int*	mem = mem0;
214 int	nprod = 1;		/* number of productions */
215 int*	prdptr[NPROD];		/* pointers to descriptions of productions */
216 int	levprd[NPROD];		/* precedence levels for the productions */
217 int	rlines[NPROD];		/* line number for this rule */
218 
219 	/* state information */
220 
221 int	nstate = 0;		/* number of states */
222 Item*	pstate[NSTATES+2];	/* pointers to the descriptions of the states */
223 int	tystate[NSTATES];	/* contains type information about the states */
224 int	defact[NSTATES];	/* the default actions of states */
225 int	tstates[NTERMS];	/* states generated by terminal gotos */
226 int	ntstates[NNONTERM]; 	/* states generated by nonterminal gotos */
227 int	mstates[NSTATES];	/* chain of overflows of term/nonterm generation lists  */
228 int	lastred; 		/* the number of the last reduction of a state */
229 
230 	/* lookahead set information */
231 
232 Lkset	lkst[LSETSIZE];
233 int	nolook;			/* flag to turn off lookahead computations */
234 int	tbitset;		/* size of lookahead sets */
235 int	nlset = 0;		/* next lookahead set index */
236 int	nolook = 0;		/* flag to suppress lookahead computations */
237 Lkset	clset;  		/* temporary storage for lookahead computations */
238 
239 	/* working set information */
240 
241 Wset	wsets[WSETSIZE];
242 Wset*	cwp;
243 
244 	/* storage for action table */
245 
246 int	amem[ACTSIZE];		/* action table storage */
247 int*	memp = amem;		/* next free action table position */
248 int	indgo[NSTATES];		/* index to the stored goto table */
249 
250 	/* temporary vector, indexable by states, terms, or ntokens */
251 
252 int	temp1[TEMPSIZE];	/* temporary storage, indexed by terms + ntokens or states */
253 int	lineno = 1;		/* current input line number */
254 int	fatfl = 1;  		/* if on, error is fatal */
255 int	nerrors = 0;		/* number of errors */
256 
257 	/* statistics collection variables */
258 
259 int	zzgoent;
260 int	zzgobest;
261 int	zzacent;
262 int	zzexcp;
263 int	zzclose;
264 int	zzrrconf;
265 int	zzsrconf;
266 
267 int*	ggreed = lkst[0].lset;
268 int*	pgo = wsets[0].ws.lset;
269 int*	yypgo = &nontrst[0].value;
270 
271 int	maxspr = 0;  		/* maximum spread of any entry */
272 int	maxoff = 0;  		/* maximum offset into a array */
273 int*	pmem = mem0;
274 int*	maxa;
275 int	nxdb = 0;
276 int	adb = 0;
277 
278 
279 	/* storage for information about the nonterminals */
280 
281 int**	pres[NNONTERM+2];  	/* vector of pointers to productions yielding each nonterminal */
282 Lkset*	pfirst[NNONTERM+2];	/* vector of pointers to first sets for each nonterminal */
283 int	pempty[NNONTERM+1];	/* vector of nonterminals nontrivially deriving e */
284 
285 	/* random stuff picked out from between functions */
286 
287 int	indebug = 0;
288 Wset*	zzcwp = wsets;
289 int	zzgoent = 0;
290 int	zzgobest = 0;
291 int	zzacent = 0;
292 int	zzexcp = 0;
293 int	zzclose = 0;
294 int	zzsrconf = 0;
295 int*	zzmemsz = mem0;
296 int	zzrrconf = 0;
297 int	pidebug = 0;		/* debugging flag for putitem */
298 int	gsdebug = 0;
299 int	cldebug = 0;		/* debugging flag for closure */
300 int	pkdebug = 0;
301 int	g2debug = 0;
302 
303 struct
304 {
305 	char*	name;
306 	long	value;
307 } resrv[] =
308 {
309 	"binary",	BINARY,
310 	"left",		LEFT,
311 	"nonassoc",	BINARY,
312 	"prec",		PREC,
313 	"right",	RIGHT,
314 	"start",	START,
315 	"term",		TERM,
316 	"token",	TERM,
317 	"type",		TYPEDEF,
318 	"union",	UNION,
319 	0,
320 };
321 
322 	/* define functions */
323 
324 void	main(int, char**);
325 void	others(void);
326 char*	chcopy(char*, char*);
327 char*	writem(int*);
328 char*	symnam(int);
329 void	summary(void);
330 void	error(char*, ...);
331 void	aryfil(int*, int, int);
332 int	setunion(int*, int*);
333 void	prlook(Lkset*);
334 void	cpres(void);
335 void	cpfir(void);
336 int	state(int);
337 void	putitem(int*, Lkset*);
338 void	cempty(void);
339 void	stagen(void);
340 void	closure(int);
341 Lkset*	flset(Lkset*);
342 void	cleantmp(void);
343 void	intr(void);
344 void	setup(int, char**);
345 void	finact(void);
346 int	defin(int, char*);
347 void	defout(int);
348 char*	cstash(char*);
349 long	gettok(void);
350 int	fdtype(int);
351 int	chfind(int, char*);
352 void	cpyunion(void);
353 void	cpycode(void);
354 int	skipcom(void);
355 void	cpyact(int);
356 void	openup(char*, int, int, int, char*);
357 void	output(void);
358 int	apack(int*, int);
359 void	go2out(void);
360 void	go2gen(int);
361 void	precftn(int, int, int);
362 void	wract(int);
363 void	wrstate(int);
364 void	warray(char*, int*, int);
365 void	hideprod(void);
366 void	callopt(void);
367 void	gin(int);
368 void	stin(int);
369 int	nxti(void);
370 void	osummary(void);
371 void	aoutput(void);
372 void	arout(char*, int*, int);
373 int	gtnm(void);
374 
375 void
main(int argc,char * argv[])376 main(int argc, char *argv[])
377 {
378 
379 	setup(argc, argv);	/* initialize and read productions */
380 	tbitset = NWORDS(ntokens);
381 	cpres();		/* make table of which productions yield a given nonterminal */
382 	cempty();		/* make a table of which nonterminals can match the empty string */
383 	cpfir();		/* make a table of firsts of nonterminals */
384 	stagen();		/* generate the states */
385 	output();		/* write the states and the tables */
386 	go2out();
387 	hideprod();
388 	summary();
389 	callopt();
390 	others();
391 	exits(0);
392 }
393 
394 /*
395  * put out other arrays, copy the parsers
396  */
397 void
others(void)398 others(void)
399 {
400 	int c, i, j;
401 
402 	finput = Bopen(parser, OREAD);
403 	if(finput == 0)
404 		error("cannot find parser %s", parser);
405 	warray("yyr1", levprd, nprod);
406 	aryfil(temp1, nprod, 0);
407 	PLOOP(1, i)
408 		temp1[i] = prdptr[i+1]-prdptr[i]-2;
409 	warray("yyr2", temp1, nprod);
410 
411 	aryfil(temp1, nstate, -1000);
412 	TLOOP(i)
413 		for(j=tstates[i]; j!=0; j=mstates[j])
414 			temp1[j] = i;
415 	NTLOOP(i)
416 		for(j=ntstates[i]; j!=0; j=mstates[j])
417 			temp1[j] = -i;
418 	warray("yychk", temp1, nstate);
419 	warray("yydef", defact, nstate);
420 
421 	/* put out token translation tables */
422 	/* table 1 has 0-256 */
423 	aryfil(temp1, 256, 0);
424 	c = 0;
425 	TLOOP(i) {
426 		j = tokset[i].value;
427 		if(j >= 0 && j < 256) {
428 			if(temp1[j]) {
429 				print("yacc bug -- cant have 2 different Ts with same value\n");
430 				print("	%s and %s\n", tokset[i].name, tokset[temp1[j]].name);
431 				nerrors++;
432 			}
433 			temp1[j] = i;
434 			if(j > c)
435 				c = j;
436 		}
437 	}
438 	warray("yytok1", temp1, c+1);
439 
440 	/* table 2 has PRIVATE-PRIVATE+256 */
441 	aryfil(temp1, 256, 0);
442 	c = 0;
443 	TLOOP(i) {
444 		j = tokset[i].value - PRIVATE;
445 		if(j >= 0 && j < 256) {
446 			if(temp1[j]) {
447 				print("yacc bug -- cant have 2 different Ts with same value\n");
448 				print("	%s and %s\n", tokset[i].name, tokset[temp1[j]].name);
449 				nerrors++;
450 			}
451 			temp1[j] = i;
452 			if(j > c)
453 				c = j;
454 		}
455 	}
456 	warray("yytok2", temp1, c+1);
457 
458 	/* table 3 has everything else */
459 	Bprint(ftable, "long	yytok3[] =\n{\n");
460 	c = 0;
461 	TLOOP(i) {
462 		j = tokset[i].value;
463 		if(j >= 0 && j < 256)
464 			continue;
465 		if(j >= PRIVATE && j < 256+PRIVATE)
466 			continue;
467 
468 		Bprint(ftable, "%4d,%4d,", j, i);
469 		c++;
470 		if(c%5 == 0)
471 			Bprint(ftable, "\n");
472 	}
473 	Bprint(ftable, "%4d\n};\n", 0);
474 
475 	/* copy parser text */
476 	while((c=Bgetrune(finput)) != Beof) {
477 		if(c == '$') {
478 			if((c = Bgetrune(finput)) != 'A')
479 				Bputrune(ftable, '$');
480 			else { /* copy actions */
481 				faction = Bopen(actname, OREAD);
482 				if(faction == 0)
483 					error("cannot reopen action tempfile");
484 				while((c=Bgetrune(faction)) != Beof)
485 					Bputrune(ftable, c);
486 				Bterm(faction);
487 				ZAPFILE(actname);
488 				c = Bgetrune(finput);
489 			}
490 		}
491 		Bputrune(ftable, c);
492 	}
493 	Bterm(ftable);
494 }
495 
496 /*
497  * copies string q into p, returning next free char ptr
498  */
499 char*
chcopy(char * p,char * q)500 chcopy(char* p, char* q)
501 {
502 	int c;
503 
504 	while(c = *q) {
505 		if(c == '"')
506 			*p++ = '\\';
507 		*p++ = c;
508 		q++;
509 	}
510 	*p = 0;
511 	return p;
512 }
513 
514 /*
515  * creates output string for item pointed to by pp
516  */
517 char*
writem(int * pp)518 writem(int *pp)
519 {
520 	int i,*p;
521 	static char sarr[ISIZE];
522 	char* q;
523 
524 	for(p=pp; *p>0; p++)
525 		;
526 	p = prdptr[-*p];
527 	q = chcopy(sarr, nontrst[*p-NTBASE].name);
528 	q = chcopy(q, ": ");
529 	for(;;) {
530 		*q = ' ';
531 		p++;
532 		if(p == pp)
533 			*q = '.';
534 		q++;
535 		*q = '\0';
536 		i = *p;
537 		if(i <= 0)
538 			break;
539 		q = chcopy(q, symnam(i));
540 		if(q > &sarr[ISIZE-30])
541 			error("item too big");
542 	}
543 
544 	/* an item calling for a reduction */
545 	i = *pp;
546 	if(i < 0 ) {
547 		q = chcopy(q, "    (");
548 		sprint(q, "%d)", -i);
549 	}
550 	return sarr;
551 }
552 
553 /*
554  * return a pointer to the name of symbol i
555  */
556 char*
symnam(int i)557 symnam(int i)
558 {
559 	char* cp;
560 
561 	cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
562 	if(*cp == ' ')
563 		cp++;
564 	return cp;
565 }
566 
567 /*
568  * output the summary on y.output
569  */
570 void
summary(void)571 summary(void)
572 {
573 
574 	if(foutput != 0) {
575 		Bprint(foutput, "\n%d/%d terminals, %d/%d nonterminals\n",
576 			ntokens, NTERMS, nnonter, NNONTERM);
577 		Bprint(foutput, "%d/%d grammar rules, %d/%d states\n",
578 			nprod, NPROD, nstate, NSTATES);
579 		Bprint(foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n",
580 			zzsrconf, zzrrconf);
581 		Bprint(foutput, "%d/%d working sets used\n",
582 			(int)(zzcwp-wsets), WSETSIZE);
583 		Bprint(foutput, "memory: states,etc. %d/%d, parser %d/%d\n",
584 			(int)(zzmemsz-mem0), MEMSIZE, (int)(memp-amem), ACTSIZE);
585 		Bprint(foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE);
586 		Bprint(foutput, "%d extra closures\n", zzclose - 2*nstate);
587 		Bprint(foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp);
588 		Bprint(foutput, "%d goto entries\n", zzgoent);
589 		Bprint(foutput, "%d entries saved by goto default\n", zzgobest);
590 	}
591 	if(zzsrconf != 0 || zzrrconf != 0) {
592 		print("\nconflicts: ");
593 		if(zzsrconf)
594 			print("%d shift/reduce", zzsrconf);
595 		if(zzsrconf && zzrrconf)
596 			print(", ");
597 		if(zzrrconf)
598 			print("%d reduce/reduce", zzrrconf);
599 		print("\n");
600 	}
601 	if(ftemp != 0) {
602 		Bterm(ftemp);
603 		ftemp = 0;
604 	}
605 	if(fdefine != 0) {
606 		Bterm(fdefine);
607 		fdefine = 0;
608 	}
609 }
610 
611 /*
612  * write out error comment -- NEEDS WORK
613  */
614 void
error(char * s,...)615 error(char *s, ...)
616 {
617 
618 	nerrors++;
619 	fprint(2, "\n fatal error:");
620 	fprint(2, s, (&s)[1]);
621 	fprint(2, ", %s:%d\n", infile, lineno);
622 	if(!fatfl)
623 		return;
624 	summary();
625 	cleantmp();
626 	exits("error");
627 }
628 
629 /*
630  * set elements 0 through n-1 to c
631  */
632 void
aryfil(int * v,int n,int c)633 aryfil(int *v, int n, int c)
634 {
635 	int i;
636 
637 	for(i=0; i<n; i++)
638 		v[i] = c;
639 }
640 
641 /*
642  * set a to the union of a and b
643  * return 1 if b is not a subset of a, 0 otherwise
644  */
645 int
setunion(int * a,int * b)646 setunion(int *a, int *b)
647 {
648 	int i, x, sub;
649 
650 	sub = 0;
651 	SETLOOP(i) {
652 		x = *a;
653 		*a |= *b;
654 		if(*a != x)
655 			sub = 1;
656 		a++;
657 		b++;
658 	}
659 	return sub;
660 }
661 
662 void
prlook(Lkset * p)663 prlook(Lkset* p)
664 {
665 	int j, *pp;
666 
667 	pp = p->lset;
668 	if(pp == 0)
669 		Bprint(foutput, "\tNULL");
670 	else {
671 		Bprint(foutput, " { ");
672 		TLOOP(j)
673 			if(BIT(pp,j))
674 				Bprint(foutput, "%s ", symnam(j));
675 		Bprint(foutput, "}");
676 	}
677 }
678 
679 /*
680  * compute an array with the beginnings of  productions yielding given nonterminals
681  * The array pres points to these lists
682  * the array pyield has the lists: the total size is only NPROD+1
683  */
684 void
cpres(void)685 cpres(void)
686 {
687 	int c, j, i, **pmem;
688 	static int *pyield[NPROD];
689 
690 	pmem = pyield;
691 	NTLOOP(i) {
692 		c = i+NTBASE;
693 		pres[i] = pmem;
694 		fatfl = 0;  	/* make undefined  symbols  nonfatal */
695 		PLOOP(0, j)
696 			if(*prdptr[j] == c)
697 				*pmem++ =  prdptr[j]+1;
698 		if(pres[i] == pmem)
699 			error("nonterminal %s not defined!", nontrst[i].name);
700 	}
701 	pres[i] = pmem;
702 	fatfl = 1;
703 	if(nerrors) {
704 		summary();
705 		cleantmp();
706 		exits("error");
707 	}
708 	if(pmem != &pyield[nprod])
709 		error("internal Yacc error: pyield %d", pmem-&pyield[nprod]);
710 }
711 
712 /*
713  * compute an array with the first of nonterminals
714  */
715 void
cpfir(void)716 cpfir(void)
717 {
718 	int *p, **s, i, **t, ch, changes;
719 
720 	zzcwp = &wsets[nnonter];
721 	NTLOOP(i) {
722 		aryfil(wsets[i].ws.lset, tbitset, 0);
723 		t = pres[i+1];
724 		/* initially fill the sets */
725 		for(s=pres[i]; s<t; ++s)
726 			for(p = *s; (ch = *p) > 0; ++p) {
727 				if(ch < NTBASE) {
728 					SETBIT(wsets[i].ws.lset, ch);
729 					break;
730 				}
731 				if(!pempty[ch-NTBASE])
732 					break;
733 			}
734 	}
735 
736 	/* now, reflect transitivity */
737 	changes = 1;
738 	while(changes) {
739 		changes = 0;
740 		NTLOOP(i) {
741 			t = pres[i+1];
742 			for(s = pres[i]; s < t; ++s)
743 				for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) {
744 					changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset);
745 					if(!pempty[ch])
746 						break;
747 				}
748 		}
749 	}
750 
751 	NTLOOP(i)
752 		pfirst[i] = flset(&wsets[i].ws);
753 	if(!indebug)
754 		return;
755 	if(foutput != 0)
756 		NTLOOP(i) {
757 			Bprint(foutput, "\n%s: ", nontrst[i].name);
758 			prlook(pfirst[i]);
759 			Bprint(foutput, " %d\n", pempty[i]);
760 		}
761 }
762 
763 /*
764  * sorts last state,and sees if it equals earlier ones. returns state number
765  */
766 int
state(int c)767 state(int c)
768 {
769 	Item *p1, *p2, *k, *l, *q1, *q2;
770 	int size1, size2, i;
771 
772 	p1 = pstate[nstate];
773 	p2 = pstate[nstate+1];
774 	if(p1 == p2)
775 		return 0;	/* null state */
776 	/* sort the items */
777 	for(k = p2-1; k > p1; k--)	/* make k the biggest */
778 		for(l = k-1; l >= p1; --l)
779 			if(l->pitem > k->pitem) {
780 				int *s;
781 				Lkset *ss;
782 
783 				s = k->pitem;
784 				k->pitem = l->pitem;
785 				l->pitem = s;
786 				ss = k->look;
787 				k->look = l->look;
788 				l->look = ss;
789 			}
790 	size1 = p2 - p1;	/* size of state */
791 
792 	for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) {
793 		/* get ith state */
794 		q1 = pstate[i];
795 		q2 = pstate[i+1];
796 		size2 = q2 - q1;
797 		if(size1 != size2)
798 			continue;
799 		k = p1;
800 		for(l = q1; l < q2; l++) {
801 			if(l->pitem != k->pitem)
802 				break;
803 			k++;
804 		}
805 		if(l != q2)
806 			continue;
807 		/* found it */
808 		pstate[nstate+1] = pstate[nstate];	/* delete last state */
809 		/* fix up lookaheads */
810 		if(nolook)
811 			return i;
812 		for(l = q1, k = p1; l < q2; ++l, ++k ) {
813 			int s;
814 
815 			SETLOOP(s)
816 				clset.lset[s] = l->look->lset[s];
817 			if(setunion(clset.lset, k->look->lset)) {
818 				tystate[i] = MUSTDO;
819 				/* register the new set */
820 				l->look = flset( &clset );
821 			}
822 		}
823 		return i;
824 	}
825 	/* state is new */
826 	if(nolook)
827 		error("yacc state/nolook error");
828 	pstate[nstate+2] = p2;
829 	if(nstate+1 >= NSTATES)
830 		error("too many states");
831 	if(c >= NTBASE) {
832 		mstates[nstate] = ntstates[c-NTBASE];
833 		ntstates[c-NTBASE] = nstate;
834 	} else {
835 		mstates[nstate] = tstates[c];
836 		tstates[c] = nstate;
837 	}
838 	tystate[nstate] = MUSTDO;
839 	return nstate++;
840 }
841 
842 void
putitem(int * ptr,Lkset * lptr)843 putitem(int *ptr, Lkset *lptr)
844 {
845 	Item *j;
846 
847 	if(pidebug && foutput != 0)
848 		Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate);
849 	j = pstate[nstate+1];
850 	j->pitem = ptr;
851 	if(!nolook)
852 		j->look = flset(lptr);
853 	pstate[nstate+1] = ++j;
854 	if((int*)j > zzmemsz) {
855 		zzmemsz = (int*)j;
856 		if(zzmemsz >=  &mem0[MEMSIZE])
857 			error("out of state space");
858 	}
859 }
860 
861 /*
862  * mark nonterminals which derive the empty string
863  * also, look for nonterminals which don't derive any token strings
864  */
865 void
cempty(void)866 cempty(void)
867 {
868 
869 	int i, *p;
870 
871 	/* first, use the array pempty to detect productions that can never be reduced */
872 	/* set pempty to WHONOWS */
873 	aryfil(pempty, nnonter+1, WHOKNOWS);
874 
875 	/* now, look at productions, marking nonterminals which derive something */
876 more:
877 	PLOOP(0, i) {
878 		if(pempty[*prdptr[i] - NTBASE])
879 			continue;
880 		for(p = prdptr[i]+1; *p >= 0; ++p)
881 			if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS)
882 				break;
883 		/* production can be derived */
884 		if(*p < 0) {
885 			pempty[*prdptr[i]-NTBASE] = OK;
886 			goto more;
887 		}
888 	}
889 
890 	/* now, look at the nonterminals, to see if they are all OK */
891 	NTLOOP(i) {
892 		/* the added production rises or falls as the start symbol ... */
893 		if(i == 0)
894 			continue;
895 		if(pempty[i] != OK) {
896 			fatfl = 0;
897 			error("nonterminal %s never derives any token string", nontrst[i].name);
898 		}
899 	}
900 
901 	if(nerrors) {
902 		summary();
903 		cleantmp();
904 		exits("error");
905 	}
906 
907 	/* now, compute the pempty array, to see which nonterminals derive the empty string */
908 	/* set pempty to WHOKNOWS */
909 	aryfil( pempty, nnonter+1, WHOKNOWS);
910 
911 	/* loop as long as we keep finding empty nonterminals */
912 
913 again:
914 	PLOOP(1, i) {
915 		/* not known to be empty */
916 		if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) {
917 			for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p)
918 				;
919 			/* we have a nontrivially empty nonterminal */
920 			if(*p < 0) {
921 				pempty[*prdptr[i]-NTBASE] = EMPTY;
922 				/* got one ... try for another */
923 				goto again;
924 			}
925 		}
926 	}
927 }
928 
929 /*
930  * generate the states
931  */
932 void
stagen(void)933 stagen(void)
934 {
935 
936 	int c, i, j, more;
937 	Wset *p, *q;
938 
939 	/* initialize */
940 	nstate = 0;
941 
942 	/* THIS IS FUNNY from the standpoint of portability
943 	 * it represents the magic moment when the mem0 array, which has
944 	 * been holding the productions, starts to hold item pointers, of a
945 	 * different type...
946 	 * someday, alloc should be used to allocate all this stuff... for now, we
947 	 * accept that if pointers don't fit in integers, there is a problem...
948 	 */
949 
950 	pstate[0] = pstate[1] = (Item*)mem;
951 	aryfil(clset.lset, tbitset, 0);
952 	putitem(prdptr[0]+1, &clset);
953 	tystate[0] = MUSTDO;
954 	nstate = 1;
955 	pstate[2] = pstate[1];
956 
957 	aryfil(amem, ACTSIZE, 0);
958 
959 	/* now, the main state generation loop */
960 	for(more=1; more;) {
961 		more = 0;
962 		SLOOP(i) {
963 			if(tystate[i] != MUSTDO)
964 				continue;
965 			tystate[i] = DONE;
966 			aryfil(temp1, nnonter+1, 0);
967 			/* take state i, close it, and do gotos */
968 			closure(i);
969 			/* generate goto's */
970 			WSLOOP(wsets, p) {
971 				if(p->flag)
972 					continue;
973 				p->flag = 1;
974 				c = *(p->pitem);
975 				if(c <= 1) {
976 					if(pstate[i+1]-pstate[i] <= p-wsets)
977 						tystate[i] = MUSTLOOKAHEAD;
978 					continue;
979 				}
980 				/* do a goto on c */
981 				WSLOOP(p, q)
982 					/* this item contributes to the goto */
983 					if(c == *(q->pitem)) {
984 						putitem(q->pitem+1, &q->ws);
985 						q->flag = 1;
986 					}
987 				if(c < NTBASE)
988 					state(c);	/* register new state */
989 				else
990 					temp1[c-NTBASE] = state(c);
991 			}
992 			if(gsdebug && foutput != 0) {
993 				Bprint(foutput, "%d: ", i);
994 				NTLOOP(j)
995 					if(temp1[j])
996 						Bprint(foutput, "%s %d, ",
997 						nontrst[j].name, temp1[j]);
998 				Bprint(foutput, "\n");
999 			}
1000 			indgo[i] = apack(&temp1[1], nnonter-1) - 1;
1001 			/* do some more */
1002 			more = 1;
1003 		}
1004 	}
1005 }
1006 
1007 /*
1008  * generate the closure of state i
1009  */
1010 void
closure(int i)1011 closure(int i)
1012 {
1013 
1014 	Wset *u, *v;
1015 	Item *p, *q;
1016 	int c, ch, work, k, *pi, **s, **t;
1017 
1018 	zzclose++;
1019 
1020 	/* first, copy kernel of state i to wsets */
1021 	cwp = wsets;
1022 	ITMLOOP(i, p, q) {
1023 		cwp->pitem = p->pitem;
1024 		cwp->flag = 1;			/* this item must get closed */
1025 		SETLOOP(k)
1026 			cwp->ws.lset[k] = p->look->lset[k];
1027 		WSBUMP(cwp);
1028 	}
1029 
1030 	/* now, go through the loop, closing each item */
1031 	work = 1;
1032 	while(work) {
1033 		work = 0;
1034 		WSLOOP(wsets, u) {
1035 			if(u->flag == 0)
1036 				continue;
1037 			/* dot is before c */
1038 			c = *(u->pitem);
1039 			if(c < NTBASE) {
1040 				u->flag = 0;
1041 				/* only interesting case is where . is before nonterminal */
1042 				continue;
1043 			}
1044 
1045 			/* compute the lookahead */
1046 			aryfil(clset.lset, tbitset, 0);
1047 
1048 			/* find items involving c */
1049 			WSLOOP(u, v)
1050 				if(v->flag == 1 && *(pi=v->pitem) == c) {
1051 					v->flag = 0;
1052 					if(nolook)
1053 						continue;
1054 					while((ch = *++pi) > 0) {
1055 						/* terminal symbol */
1056 						if(ch < NTBASE) {
1057 							SETBIT(clset.lset, ch);
1058 							break;
1059 						}
1060 						/* nonterminal symbol */
1061 						setunion(clset.lset, pfirst[ch-NTBASE]->lset);
1062 						if(!pempty[ch-NTBASE])
1063 							break;
1064 					}
1065 					if(ch <= 0)
1066 						setunion(clset.lset, v->ws.lset);
1067 				}
1068 
1069 			/*
1070 			 * now loop over productions derived from c
1071 			 * c is now nonterminal number
1072 			 */
1073 			c -= NTBASE;
1074 			t = pres[c+1];
1075 			for(s = pres[c]; s < t; ++s) {
1076 				/*
1077 				 * put these items into the closure
1078 				 * is the item there
1079 				 */
1080 				WSLOOP(wsets, v)
1081 					/* yes, it is there */
1082 					if(v->pitem == *s) {
1083 						if(nolook)
1084 							goto nexts;
1085 						if(setunion(v->ws.lset, clset.lset))
1086 							v->flag = work = 1;
1087 						goto nexts;
1088 					}
1089 
1090 				/*  not there; make a new entry */
1091 				if(cwp-wsets+1 >= WSETSIZE)
1092 					error( "working set overflow");
1093 				cwp->pitem = *s;
1094 				cwp->flag = 1;
1095 				if(!nolook) {
1096 					work = 1;
1097 					SETLOOP(k) cwp->ws.lset[k] = clset.lset[k];
1098 				}
1099 				WSBUMP(cwp);
1100 
1101 			nexts:;
1102 			}
1103 		}
1104 	}
1105 
1106 	/* have computed closure; flags are reset; return */
1107 	if(cwp > zzcwp)
1108 		zzcwp = cwp;
1109 	if(cldebug && foutput != 0) {
1110 		Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook);
1111 		WSLOOP(wsets, u) {
1112 			if(u->flag)
1113 				Bprint(foutput, "flag set!\n");
1114 			u->flag = 0;
1115 			Bprint(foutput, "\t%s", writem(u->pitem));
1116 			prlook(&u->ws);
1117 			Bprint(foutput, "\n");
1118 		}
1119 	}
1120 }
1121 
1122 /*
1123  * decide if the lookahead set pointed to by p is known
1124  * return pointer to a perminent location for the set
1125  */
1126 Lkset*
flset(Lkset * p)1127 flset(Lkset *p)
1128 {
1129 	Lkset *q;
1130 	int *u, *v, *w, j;
1131 
1132 	for(q = &lkst[nlset]; q-- > lkst;) {
1133 		u = p->lset;
1134 		v = q->lset;
1135 		w = &v[tbitset];
1136 		while(v < w)
1137 			if(*u++ != *v++)
1138 				goto more;
1139 		/* we have matched */
1140 		return q;
1141 	more:;
1142 	}
1143 	/* add a new one */
1144 	q = &lkst[nlset++];
1145 	if(nlset >= LSETSIZE)
1146 		error("too many lookahead sets");
1147 	SETLOOP(j)
1148 		q->lset[j] = p->lset[j];
1149 	return q;
1150 }
1151 
1152 void
cleantmp(void)1153 cleantmp(void)
1154 {
1155 	ZAPFILE(actname);
1156 	ZAPFILE(tempname);
1157 }
1158 
1159 void
intr(void)1160 intr(void)
1161 {
1162 	cleantmp();
1163 	exits("interrupted");
1164 }
1165 
1166 void
usage(void)1167 usage(void)
1168 {
1169 	fprint(2, "usage: yacc [-Dn] [-vdS] [-o outputfile] [-s stem] grammar\n");
1170 	exits("usage");
1171 }
1172 
1173 void
setup(int argc,char * argv[])1174 setup(int argc, char *argv[])
1175 {
1176 	long c, t;
1177 	int i, j, lev, ty, ytab, *p;
1178 	int vflag, dflag, stem;
1179 	char actnm[8], *stemc, *s, dirbuf[128];
1180 
1181 	ytab = 0;
1182 	vflag = 0;
1183 	dflag = 0;
1184 	stem = 0;
1185 	stemc = "y";
1186 	foutput = 0;
1187 	fdefine = 0;
1188 	fdebug = 0;
1189 	ARGBEGIN{
1190 	case 'v':
1191 	case 'V':
1192 		vflag++;
1193 		break;
1194 	case 'D':
1195 		yydebug = EARGF(usage());
1196 		break;
1197 	case 'd':
1198 		dflag++;
1199 		break;
1200 	case 'o':
1201 		ytab++;
1202 		ytabc = EARGF(usage());
1203 		break;
1204 	case 's':
1205 		stem++;
1206 		stemc = ARGF();
1207 		break;
1208 	case 'S':
1209 		parser = PARSERS;
1210 		break;
1211 	default:
1212 		error("illegal option: %c", ARGC());
1213 	}ARGEND
1214 	openup(stemc, dflag, vflag, ytab, ytabc);
1215 
1216 	ftemp = Bopen(tempname = mktemp(ttempname), OWRITE);
1217 	faction = Bopen(actname = mktemp(tactname), OWRITE);
1218 	if(ftemp == 0 || faction == 0)
1219 		error("cannot open temp file");
1220 	if(argc < 1)
1221 		error("no input file");
1222 	infile = argv[0];
1223 	if(infile[0] != '/' && getwd(dirbuf, sizeof dirbuf)!=nil){
1224 		i = strlen(infile)+1+strlen(dirbuf)+1+10;
1225 		s = malloc(i);
1226 		if(s != nil){
1227 			snprint(s, i, "%s/%s", dirbuf, infile);
1228 			cleanname(s);
1229 			infile = s;
1230 		}
1231 	}
1232 	finput = Bopen(infile, OREAD);
1233 	if(finput == 0)
1234 		error("cannot open '%s'", argv[0]);
1235 	cnamp = cnames;
1236 
1237 	defin(0, "$end");
1238 	extval = PRIVATE;	/* tokens start in unicode 'private use' */
1239 	defin(0, "error");
1240 	defin(1, "$accept");
1241 	defin(0, "$unk");
1242 	mem = mem0;
1243 	i = 0;
1244 
1245 	for(t = gettok(); t != MARK && t != ENDFILE;)
1246 	switch(t) {
1247 	case ';':
1248 		t = gettok();
1249 		break;
1250 
1251 	case START:
1252 		if(gettok() != IDENTIFIER)
1253 			error("bad %%start construction");
1254 		start = chfind(1, tokname);
1255 		t = gettok();
1256 		continue;
1257 
1258 	case TYPEDEF:
1259 		if(gettok() != TYPENAME)
1260 			error("bad syntax in %%type");
1261 		ty = numbval;
1262 		for(;;) {
1263 			t = gettok();
1264 			switch(t) {
1265 			case IDENTIFIER:
1266 				if((t=chfind(1, tokname)) < NTBASE) {
1267 					j = TYPE(toklev[t]);
1268 					if(j != 0 && j != ty)
1269 						error("type redeclaration of token %s",
1270 							tokset[t].name);
1271 					else
1272 						SETTYPE(toklev[t], ty);
1273 				} else {
1274 					j = nontrst[t-NTBASE].value;
1275 					if(j != 0 && j != ty)
1276 						error("type redeclaration of nonterminal %s",
1277 							nontrst[t-NTBASE].name );
1278 					else
1279 						nontrst[t-NTBASE].value = ty;
1280 				}
1281 			case ',':
1282 				continue;
1283 			case ';':
1284 				t = gettok();
1285 			default:
1286 				break;
1287 			}
1288 			break;
1289 		}
1290 		continue;
1291 
1292 	case UNION:
1293 		/* copy the union declaration to the output */
1294 		cpyunion();
1295 		t = gettok();
1296 		continue;
1297 
1298 	case LEFT:
1299 	case BINARY:
1300 	case RIGHT:
1301 		i++;
1302 
1303 	case TERM:
1304 		/* nonzero means new prec. and assoc. */
1305 		lev = t-TERM;
1306 		ty = 0;
1307 
1308 		/* get identifiers so defined */
1309 		t = gettok();
1310 
1311 		/* there is a type defined */
1312 		if(t == TYPENAME) {
1313 			ty = numbval;
1314 			t = gettok();
1315 		}
1316 		for(;;) {
1317 			switch(t) {
1318 			case ',':
1319 				t = gettok();
1320 				continue;
1321 
1322 			case ';':
1323 				break;
1324 
1325 			case IDENTIFIER:
1326 				j = chfind(0, tokname);
1327 				if(j >= NTBASE)
1328 					error("%s defined earlier as nonterminal", tokname);
1329 				if(lev) {
1330 					if(ASSOC(toklev[j]))
1331 						error("redeclaration of precedence of %s", tokname);
1332 					SETASC(toklev[j], lev);
1333 					SETPLEV(toklev[j], i);
1334 				}
1335 				if(ty) {
1336 					if(TYPE(toklev[j]))
1337 						error("redeclaration of type of %s", tokname);
1338 					SETTYPE(toklev[j],ty);
1339 				}
1340 				t = gettok();
1341 				if(t == NUMBER) {
1342 					tokset[j].value = numbval;
1343 					if(j < ndefout && j > 3)
1344 						error("please define type number of %s earlier",
1345 							tokset[j].name);
1346 					t = gettok();
1347 				}
1348 				continue;
1349 			}
1350 			break;
1351 		}
1352 		continue;
1353 
1354 	case LCURLY:
1355 		defout(0);
1356 		cpycode();
1357 		t = gettok();
1358 		continue;
1359 
1360 	default:
1361 		error("syntax error");
1362 	}
1363 	if(t == ENDFILE)
1364 		error("unexpected EOF before %%");
1365 
1366 	/* t is MARK */
1367 	Bprint(ftable, "extern	int	yyerrflag;\n");
1368 	Bprint(ftable, "#ifndef	YYMAXDEPTH\n");
1369 	Bprint(ftable, "#define	YYMAXDEPTH	150\n");
1370 	Bprint(ftable, "#endif\n" );
1371 	if(!ntypes) {
1372 		Bprint(ftable, "#ifndef	YYSTYPE\n");
1373 		Bprint(ftable, "#define	YYSTYPE	int\n");
1374 		Bprint(ftable, "#endif\n");
1375 	}
1376 	Bprint(ftable, "YYSTYPE	yylval;\n");
1377 	Bprint(ftable, "YYSTYPE	yyval;\n");
1378 
1379 	prdptr[0] = mem;
1380 
1381 	/* added production */
1382 	*mem++ = NTBASE;
1383 
1384 	/* if start is 0, we will overwrite with the lhs of the first rule */
1385 	*mem++ = start;
1386 	*mem++ = 1;
1387 	*mem++ = 0;
1388 	prdptr[1] = mem;
1389 	while((t=gettok()) == LCURLY)
1390 		cpycode();
1391 	if(t != IDENTCOLON)
1392 		error("bad syntax on first rule");
1393 
1394 	if(!start)
1395 		prdptr[0][1] = chfind(1, tokname);
1396 
1397 	/* read rules */
1398 	while(t != MARK && t != ENDFILE) {
1399 		/* process a rule */
1400 		rlines[nprod] = lineno;
1401 		if(t == '|')
1402 			*mem++ = *prdptr[nprod-1];
1403 		else
1404 			if(t == IDENTCOLON) {
1405 				*mem = chfind(1, tokname);
1406 				if(*mem < NTBASE)
1407 					error("token illegal on LHS of grammar rule");
1408 				mem++;
1409 			} else
1410 				error("illegal rule: missing semicolon or | ?");
1411 		/* read rule body */
1412 		t = gettok();
1413 
1414 	more_rule:
1415 		while(t == IDENTIFIER) {
1416 			*mem = chfind(1, tokname);
1417 			if(*mem < NTBASE)
1418 				levprd[nprod] = toklev[*mem];
1419 			mem++;
1420 			t = gettok();
1421 		}
1422 		if(t == PREC) {
1423 			if(gettok() != IDENTIFIER)
1424 				error("illegal %%prec syntax");
1425 			j = chfind(2, tokname);
1426 			if(j >= NTBASE)
1427 				error("nonterminal %s illegal after %%prec",
1428 					nontrst[j-NTBASE].name);
1429 			levprd[nprod] = toklev[j];
1430 			t = gettok();
1431 		}
1432 		if(t == '=') {
1433 			levprd[nprod] |= ACTFLAG;
1434 			Bprint(faction, "\ncase %d:", nprod);
1435 			cpyact(mem-prdptr[nprod]-1);
1436 			Bprint(faction, " break;");
1437 			if((t=gettok()) == IDENTIFIER) {
1438 
1439 				/* action within rule... */
1440 				sprint(actnm, "$$%d", nprod);
1441 
1442 				/* make it a nonterminal */
1443 				j = chfind(1, actnm);
1444 
1445 				/*
1446 				 * the current rule will become rule number nprod+1
1447 				 * move the contents down, and make room for the null
1448 				 */
1449 				for(p = mem; p >= prdptr[nprod]; --p)
1450 					p[2] = *p;
1451 				mem += 2;
1452 
1453 				/* enter null production for action */
1454 				p = prdptr[nprod];
1455 				*p++ = j;
1456 				*p++ = -nprod;
1457 
1458 				/* update the production information */
1459 				levprd[nprod+1] = levprd[nprod] & ~ACTFLAG;
1460 				levprd[nprod] = ACTFLAG;
1461 				if(++nprod >= NPROD)
1462 					error("more than %d rules", NPROD);
1463 				prdptr[nprod] = p;
1464 
1465 				/* make the action appear in the original rule */
1466 				*mem++ = j;
1467 
1468 				/* get some more of the rule */
1469 				goto more_rule;
1470 			}
1471 		}
1472 
1473 		while(t == ';')
1474 			t = gettok();
1475 		*mem++ = -nprod;
1476 
1477 		/* check that default action is reasonable */
1478 		if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].value) {
1479 
1480 			/* no explicit action, LHS has value */
1481 			int tempty;
1482 
1483 			tempty = prdptr[nprod][1];
1484 			if(tempty < 0)
1485 				error("must return a value, since LHS has a type");
1486 			else
1487 				if(tempty >= NTBASE)
1488 					tempty = nontrst[tempty-NTBASE].value;
1489 				else
1490 					tempty = TYPE(toklev[tempty]);
1491 			if(tempty != nontrst[*prdptr[nprod]-NTBASE].value)
1492 				error("default action causes potential type clash");
1493 		}
1494 		nprod++;
1495 		if(nprod >= NPROD)
1496 			error("more than %d rules", NPROD);
1497 		prdptr[nprod] = mem;
1498 		levprd[nprod] = 0;
1499 	}
1500 
1501 	/* end of all rules */
1502 	defout(1);
1503 
1504 	finact();
1505 	if(t == MARK) {
1506 		Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1507 		while((c=Bgetrune(finput)) != Beof)
1508 			Bputrune(ftable, c);
1509 	}
1510 	Bterm(finput);
1511 }
1512 
1513 /*
1514  * finish action routine
1515  */
1516 void
finact(void)1517 finact(void)
1518 {
1519 
1520 	Bterm(faction);
1521 	Bprint(ftable, "#define YYEOFCODE %d\n", 1);
1522 	Bprint(ftable, "#define YYERRCODE %d\n", 2);
1523 }
1524 
1525 /*
1526  * define s to be a terminal if t=0
1527  * or a nonterminal if t=1
1528  */
1529 int
defin(int nt,char * s)1530 defin(int nt, char *s)
1531 {
1532 	int val;
1533 	Rune rune;
1534 
1535 	val = 0;
1536 	if(nt) {
1537 		nnonter++;
1538 		if(nnonter >= NNONTERM)
1539 			error("too many nonterminals, limit %d",NNONTERM);
1540 		nontrst[nnonter].name = cstash(s);
1541 		return NTBASE + nnonter;
1542 	}
1543 
1544 	/* must be a token */
1545 	ntokens++;
1546 	if(ntokens >= NTERMS)
1547 		error("too many terminals, limit %d", NTERMS);
1548 	tokset[ntokens].name = cstash(s);
1549 
1550 	/* establish value for token */
1551 	/* single character literal */
1552 	if(s[0] == ' ') {
1553 		val = chartorune(&rune, &s[1]);
1554 		if(s[val+1] == 0) {
1555 			val = rune;
1556 			goto out;
1557 		}
1558 	}
1559 
1560 	/* escape sequence */
1561 	if(s[0] == ' ' && s[1] == '\\') {
1562 		if(s[3] == 0) {
1563 			/* single character escape sequence */
1564 			switch(s[2]) {
1565 			case 'n':	val = '\n'; break;
1566 			case 'r':	val = '\r'; break;
1567 			case 'b':	val = '\b'; break;
1568 			case 't':	val = '\t'; break;
1569 			case 'f':	val = '\f'; break;
1570 			case '\'':	val = '\''; break;
1571 			case '"':	val = '"'; break;
1572 			case '\\':	val = '\\'; break;
1573 			default:	error("invalid escape");
1574 			}
1575 			goto out;
1576 		}
1577 
1578 		/* \nnn sequence */
1579 		if(s[2] >= '0' && s[2] <= '7') {
1580 			if(s[3] < '0' ||
1581 			   s[3] > '7' ||
1582 			   s[4] < '0' ||
1583 			   s[4] > '7' ||
1584 			   s[5] != 0)
1585 				error("illegal \\nnn construction");
1586 			val = 64*s[2] + 8*s[3] + s[4] - 73*'0';
1587 			if(val == 0)
1588 				error("'\\000' is illegal");
1589 			goto out;
1590 		}
1591 		error("unknown escape");
1592 	}
1593 	val = extval++;
1594 
1595 out:
1596 	tokset[ntokens].value = val;
1597 	toklev[ntokens] = 0;
1598 	return ntokens;
1599 }
1600 
1601 /*
1602  * write out the defines (at the end of the declaration section)
1603  */
1604 void
defout(int last)1605 defout(int last)
1606 {
1607 	int i, c;
1608 	char sar[NAMESIZE+10];
1609 
1610 	for(i=ndefout; i<=ntokens; i++) {
1611 		/* non-literals */
1612 		c = tokset[i].name[0];
1613 		if(c != ' ' && c != '$') {
1614 			Bprint(ftable, "#define	%s	%d\n",
1615 				tokset[i].name, tokset[i].value);
1616 			if(fdefine)
1617 				Bprint(fdefine, "#define\t%s\t%d\n",
1618 					tokset[i].name, tokset[i].value);
1619 		}
1620 	}
1621 	ndefout = ntokens+1;
1622 	if(last && fdebug) {
1623 		Bprint(fdebug, "char*	yytoknames[] =\n{\n");
1624 		TLOOP(i) {
1625 			if(tokset[i].name) {
1626 				chcopy(sar, tokset[i].name);
1627 				Bprint(fdebug, "\t\"%s\",\n", sar);
1628 				continue;
1629 			}
1630 			Bprint(fdebug, "\t0,\n");
1631 		}
1632 		Bprint(fdebug, "};\n");
1633 	}
1634 }
1635 
1636 char*
cstash(char * s)1637 cstash(char *s)
1638 {
1639 	char *temp;
1640 
1641 	temp = cnamp;
1642 	do {
1643 		if(cnamp >= &cnames[cnamsz])
1644 			error("too many characters in id's and literals");
1645 		else
1646 			*cnamp++ = *s;
1647 	} while(*s++);
1648 	return temp;
1649 }
1650 
1651 long
gettok(void)1652 gettok(void)
1653 {
1654 	long c;
1655 	Rune rune;
1656 	int i, base, match, reserve;
1657 	static int peekline;
1658 
1659 begin:
1660 	reserve = 0;
1661 	lineno += peekline;
1662 	peekline = 0;
1663 	c = Bgetrune(finput);
1664 	while(c == ' ' || c == '\n' || c == '\t' || c == '\f') {
1665 		if(c == '\n')
1666 			lineno++;
1667 		c = Bgetrune(finput);
1668 	}
1669 
1670 	/* skip comment */
1671 	if(c == '/') {
1672 		lineno += skipcom();
1673 		goto begin;
1674 	}
1675 	switch(c) {
1676 	case Beof:
1677 		return ENDFILE;
1678 
1679 	case '{':
1680 		Bungetrune(finput);
1681 		return '=';
1682 
1683 	case '<':
1684 		/* get, and look up, a type name (union member name) */
1685 		i = 0;
1686 		while((c=Bgetrune(finput)) != '>' && c >= 0 && c != '\n') {
1687 			rune = c;
1688 			c = runetochar(&tokname[i], &rune);
1689 			if(i < NAMESIZE)
1690 				i += c;
1691 		}
1692 		if(c != '>')
1693 			error("unterminated < ... > clause");
1694 		tokname[i] = 0;
1695 		for(i=1; i<=ntypes; i++)
1696 			if(!strcmp(typeset[i], tokname)) {
1697 				numbval = i;
1698 				return TYPENAME;
1699 			}
1700 		ntypes++;
1701 		numbval = ntypes;
1702 		typeset[numbval] = cstash(tokname);
1703 		return TYPENAME;
1704 
1705 	case '"':
1706 	case '\'':
1707 		match = c;
1708 		tokname[0] = ' ';
1709 		i = 1;
1710 		for(;;) {
1711 			c = Bgetrune(finput);
1712 			if(c == '\n' || c <= 0)
1713 				error("illegal or missing ' or \"" );
1714 			if(c == '\\') {
1715 				tokname[i] = '\\';
1716 				if(i < NAMESIZE)
1717 					i++;
1718 				c = Bgetrune(finput);
1719 			} else
1720 				if(c == match)
1721 					break;
1722 			rune = c;
1723 			c = runetochar(&tokname[i], &rune);
1724 			if(i < NAMESIZE)
1725 				i += c;
1726 		}
1727 		break;
1728 
1729 	case '%':
1730 	case '\\':
1731 		switch(c = Bgetrune(finput)) {
1732 		case '0':	return TERM;
1733 		case '<':	return LEFT;
1734 		case '2':	return BINARY;
1735 		case '>':	return RIGHT;
1736 		case '%':
1737 		case '\\':	return MARK;
1738 		case '=':	return PREC;
1739 		case '{':	return LCURLY;
1740 		default:	reserve = 1;
1741 		}
1742 
1743 	default:
1744 		/* number */
1745 		if(isdigit(c)) {
1746 			numbval = c-'0';
1747 			base = (c=='0')? 8: 10;
1748 			for(c = Bgetrune(finput); isdigit(c); c = Bgetrune(finput))
1749 				numbval = numbval*base + (c-'0');
1750 			Bungetrune(finput);
1751 			return NUMBER;
1752 		}
1753 		if(islower(c) || isupper(c) || c=='_' || c=='.' || c=='$')  {
1754 			i = 0;
1755 			while(islower(c) || isupper(c) || isdigit(c) ||
1756 			    c == '-' || c=='_' || c=='.' || c=='$') {
1757 				if(reserve && isupper(c))
1758 					c += 'a'-'A';
1759 				rune = c;
1760 				c = runetochar(&tokname[i], &rune);
1761 				if(i < NAMESIZE)
1762 					i += c;
1763 				c = Bgetrune(finput);
1764 			}
1765 		} else
1766 			return c;
1767 		Bungetrune(finput);
1768 	}
1769 	tokname[i] = 0;
1770 
1771 	/* find a reserved word */
1772 	if(reserve) {
1773 		for(c=0; resrv[c].name; c++)
1774 			if(strcmp(tokname, resrv[c].name) == 0)
1775 				return resrv[c].value;
1776 		error("invalid escape, or illegal reserved word: %s", tokname);
1777 	}
1778 
1779 	/* look ahead to distinguish IDENTIFIER from IDENTCOLON */
1780 	c = Bgetrune(finput);
1781 	while(c == ' ' || c == '\t'|| c == '\n' || c == '\f' || c == '/') {
1782 		if(c == '\n')
1783 			peekline++;
1784 		/* look for comments */
1785 		if(c == '/')
1786 			peekline += skipcom();
1787 		c = Bgetrune(finput);
1788 	}
1789 	if(c == ':')
1790 		return IDENTCOLON;
1791 	Bungetrune(finput);
1792 	return IDENTIFIER;
1793 }
1794 
1795 /*
1796  * determine the type of a symbol
1797  */
1798 int
fdtype(int t)1799 fdtype(int t)
1800 {
1801 	int v;
1802 
1803 	if(t >= NTBASE)
1804 		v = nontrst[t-NTBASE].value;
1805 	else
1806 		v = TYPE(toklev[t]);
1807 	if(v <= 0)
1808 		error("must specify type for %s", (t>=NTBASE)?
1809 			nontrst[t-NTBASE].name: tokset[t].name);
1810 	return v;
1811 }
1812 
1813 int
chfind(int t,char * s)1814 chfind(int t, char *s)
1815 {
1816 	int i;
1817 
1818 	if(s[0] == ' ')
1819 		t = 0;
1820 	TLOOP(i)
1821 		if(!strcmp(s, tokset[i].name))
1822 			return i;
1823 	NTLOOP(i)
1824 		if(!strcmp(s, nontrst[i].name))
1825 			return NTBASE+i;
1826 
1827 	/* cannot find name */
1828 	if(t > 1)
1829 		error("%s should have been defined earlier", s);
1830 	return defin(t, s);
1831 }
1832 
1833 /*
1834  * copy the union declaration to the output, and the define file if present
1835  */
1836 void
cpyunion(void)1837 cpyunion(void)
1838 {
1839 	long c;
1840 	int level;
1841 
1842 	Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1843 	Bprint(ftable, "typedef union ");
1844 	if(fdefine != 0)
1845 		Bprint(fdefine, "\ntypedef union ");
1846 
1847 	level = 0;
1848 	for(;;) {
1849 		if((c=Bgetrune(finput)) == Beof)
1850 			error("EOF encountered while processing %%union");
1851 		Bputrune(ftable, c);
1852 		if(fdefine != 0)
1853 			Bputrune(fdefine, c);
1854 		switch(c) {
1855 		case '\n':
1856 			lineno++;
1857 			break;
1858 		case '{':
1859 			level++;
1860 			break;
1861 		case '}':
1862 			level--;
1863 
1864 			/* we are finished copying */
1865 			if(level == 0) {
1866 				Bprint(ftable, " YYSTYPE;\n");
1867 				if(fdefine != 0)
1868 					Bprint(fdefine, "\tYYSTYPE;\nextern\tYYSTYPE\tyylval;\n");
1869 				return;
1870 			}
1871 		}
1872 	}
1873 }
1874 
1875 /*
1876  * copies code between \{ and \}
1877  */
1878 void
cpycode(void)1879 cpycode(void)
1880 {
1881 
1882 	long c;
1883 
1884 	c = Bgetrune(finput);
1885 	if(c == '\n') {
1886 		c = Bgetrune(finput);
1887 		lineno++;
1888 	}
1889 	Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1890 	while(c != Beof) {
1891 		if(c == '\\') {
1892 			if((c=Bgetrune(finput)) == '}')
1893 				return;
1894 			Bputc(ftable, '\\');
1895 		}
1896 		if(c == '%') {
1897 			if((c=Bgetrune(finput)) == '}')
1898 				return;
1899 			Bputc(ftable, '%');
1900 		}
1901 		Bputrune(ftable, c);
1902 		if(c == '\n')
1903 			lineno++;
1904 		c = Bgetrune(finput);
1905 	}
1906 	error("eof before %%}");
1907 }
1908 
1909 /*
1910  * skip over comments
1911  * skipcom is called after reading a '/'
1912  */
1913 int
skipcom(void)1914 skipcom(void)
1915 {
1916 	long c;
1917 	int i;
1918 
1919 	/* i is the number of lines skipped */
1920 	i = 0;
1921 	c = Bgetrune(finput);
1922 	if(c == '/'){			/* C++ //: skip to end of line */
1923 		while((c = Bgetrune(finput)) != Beof)
1924 			if(c == '\n')
1925 				return 1;
1926 	}else if(c == '*'){		/* normal C comment */
1927 		while((c = Bgetrune(finput)) != Beof) {
1928 			while(c == '*')
1929 				if((c = Bgetrune(finput)) == '/')
1930 					return i;
1931 			if(c == '\n')
1932 				i++;
1933 		}
1934 	}else
1935 		error("illegal comment");
1936 
1937 	error("EOF inside comment");
1938 	return 0;
1939 }
1940 
1941 /*
1942  * copy C action to the next ; or closing }
1943  */
1944 void
cpyact(int offset)1945 cpyact(int offset)
1946 {
1947 	long c;
1948 	int brac, match, j, s, fnd, tok;
1949 
1950 	Bprint(faction, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1951 	brac = 0;
1952 
1953 loop:
1954 	c = Bgetrune(finput);
1955 swt:
1956 	switch(c) {
1957 	case ';':
1958 		if(brac == 0) {
1959 			Bputrune(faction, c);
1960 			return;
1961 		}
1962 		goto lcopy;
1963 
1964 	case '{':
1965 		brac++;
1966 		goto lcopy;
1967 
1968 	case '$':
1969 		s = 1;
1970 		tok = -1;
1971 		c = Bgetrune(finput);
1972 
1973 		/* type description */
1974 		if(c == '<') {
1975 			Bungetrune(finput);
1976 			if(gettok() != TYPENAME)
1977 				error("bad syntax on $<ident> clause");
1978 			tok = numbval;
1979 			c = Bgetrune(finput);
1980 		}
1981 		if(c == '$') {
1982 			Bprint(faction, "yyval");
1983 
1984 			/* put out the proper tag... */
1985 			if(ntypes) {
1986 				if(tok < 0)
1987 					tok = fdtype(*prdptr[nprod]);
1988 				Bprint(faction, ".%s", typeset[tok]);
1989 			}
1990 			goto loop;
1991 		}
1992 		if(c == '-') {
1993 			s = -s;
1994 			c = Bgetrune(finput);
1995 		}
1996 		if(isdigit(c)) {
1997 			j = 0;
1998 			while(isdigit(c)) {
1999 				j = j*10 + (c-'0');
2000 				c = Bgetrune(finput);
2001 			}
2002 			Bungetrune(finput);
2003 			j = j*s - offset;
2004 			if(j > 0)
2005 				error("Illegal use of $%d", j+offset);
2006 
2007 		dollar:
2008 			Bprint(faction, "yypt[-%d].yyv", -j);
2009 
2010 			/* put out the proper tag */
2011 			if(ntypes) {
2012 				if(j+offset <= 0 && tok < 0)
2013 					error("must specify type of $%d", j+offset);
2014 				if(tok < 0)
2015 					tok = fdtype(prdptr[nprod][j+offset]);
2016 				Bprint(faction, ".%s", typeset[tok]);
2017 			}
2018 			goto loop;
2019 		}
2020 		if(isupper(c) || islower(c) || c == '_' || c == '.') {
2021 			int tok; /* tok used oustide for type info */
2022 
2023 			/* look for $name */
2024 			Bungetrune(finput);
2025 			if(gettok() != IDENTIFIER)
2026 				error("$ must be followed by an identifier");
2027 			tok = chfind(2, tokname);
2028 			if((c = Bgetrune(finput)) != '#') {
2029 				Bungetrune(finput);
2030 				fnd = -1;
2031 			} else
2032 				if(gettok() != NUMBER) {
2033 					error("# must be followed by number");
2034 					fnd = -1;
2035 				} else
2036 					fnd = numbval;
2037 			for(j=1; j<=offset; ++j)
2038 				if(tok == prdptr[nprod][j]) {
2039 					if(--fnd <= 0) {
2040 						j -= offset;
2041 						goto dollar;
2042 					}
2043 				}
2044 			error("$name or $name#number not found");
2045 		}
2046 		Bputc(faction, '$');
2047 		if(s < 0 )
2048 			Bputc(faction, '-');
2049 		goto swt;
2050 
2051 	case '}':
2052 		brac--;
2053 		if(brac)
2054 			goto lcopy;
2055 		Bputrune(faction, c);
2056 		return;
2057 
2058 	case '/':
2059 		/* look for comments */
2060 		Bputrune(faction, c);
2061 		c = Bgetrune(finput);
2062 		if(c != '*')
2063 			goto swt;
2064 
2065 		/* it really is a comment; copy it */
2066 		Bputrune(faction, c);
2067 		c = Bgetrune(finput);
2068 		while(c >= 0) {
2069 			while(c == '*') {
2070 				Bputrune(faction, c);
2071 				if((c=Bgetrune(finput)) == '/')
2072 					goto lcopy;
2073 			}
2074 			Bputrune(faction, c);
2075 			if(c == '\n')
2076 				lineno++;
2077 			c = Bgetrune(finput);
2078 		}
2079 		error("EOF inside comment");
2080 
2081 	case '\'':
2082 		/* character constant */
2083 		match = '\'';
2084 		goto string;
2085 
2086 	case '"':
2087 		/* character string */
2088 		match = '"';
2089 
2090 	string:
2091 		Bputrune(faction, c);
2092 		while(c = Bgetrune(finput)) {
2093 			if(c == '\\') {
2094 				Bputrune(faction, c);
2095 				c = Bgetrune(finput);
2096 				if(c == '\n')
2097 					lineno++;
2098 			} else
2099 				if(c == match)
2100 					goto lcopy;
2101 				if(c == '\n')
2102 					error("newline in string or char. const.");
2103 			Bputrune(faction, c);
2104 		}
2105 		error("EOF in string or character constant");
2106 
2107 	case Beof:
2108 		error("action does not terminate");
2109 
2110 	case '\n':
2111 		lineno++;
2112 		goto lcopy;
2113 	}
2114 
2115 lcopy:
2116 	Bputrune(faction, c);
2117 	goto loop;
2118 }
2119 
2120 void
openup(char * stem,int dflag,int vflag,int ytab,char * ytabc)2121 openup(char *stem, int dflag, int vflag, int ytab, char *ytabc)
2122 {
2123 	char buf[256];
2124 
2125 	if(vflag) {
2126 		snprint(buf, sizeof buf, "%s.%s", stem, FILEU);
2127 		foutput = Bopen(buf, OWRITE);
2128 		if(foutput == 0)
2129 			error("cannot open %s", buf);
2130 	}
2131 	if(yydebug) {
2132 		snprint(buf, sizeof buf, "%s.%s", stem, FILEDEBUG);
2133 		if((fdebug = Bopen(buf, OWRITE)) == 0)
2134 			error("can't open %s", buf);
2135 	}
2136 	if(dflag) {
2137 		snprint(buf, sizeof buf, "%s.%s", stem, FILED);
2138 		fdefine = Bopen(buf, OWRITE);
2139 		if(fdefine == 0)
2140 			error("can't create %s", buf);
2141 	}
2142 	if(ytab == 0)
2143 		snprint(buf, sizeof buf, "%s.%s", stem, OFILE);
2144 	else
2145 		strecpy(buf, buf+sizeof buf, ytabc);
2146 	ftable = Bopen(buf, OWRITE);
2147 	if(ftable == 0)
2148 		error("cannot open table file %s", buf);
2149 }
2150 
2151 /*
2152  * print the output for the states
2153  */
2154 void
output(void)2155 output(void)
2156 {
2157 	int i, k, c;
2158 	Wset *u, *v;
2159 
2160 	Bprint(ftable, "short	yyexca[] =\n{");
2161 	if(fdebug)
2162 		Bprint(fdebug, "char*	yystates[] =\n{\n");
2163 
2164 	/* output the stuff for state i */
2165 	SLOOP(i) {
2166 		nolook = tystate[i]!=MUSTLOOKAHEAD;
2167 		closure(i);
2168 
2169 		/* output actions */
2170 		nolook = 1;
2171 		aryfil(temp1, ntokens+nnonter+1, 0);
2172 		WSLOOP(wsets, u) {
2173 			c = *(u->pitem);
2174 			if(c > 1 && c < NTBASE && temp1[c] == 0) {
2175 				WSLOOP(u, v)
2176 					if(c == *(v->pitem))
2177 						putitem(v->pitem+1, (Lkset*)0);
2178 				temp1[c] = state(c);
2179 			} else
2180 				if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0)
2181 					temp1[c+ntokens] = amem[indgo[i]+c];
2182 		}
2183 		if(i == 1)
2184 			temp1[1] = ACCEPTCODE;
2185 
2186 		/* now, we have the shifts; look at the reductions */
2187 		lastred = 0;
2188 		WSLOOP(wsets, u) {
2189 			c = *u->pitem;
2190 
2191 			/* reduction */
2192 			if(c <= 0) {
2193 				lastred = -c;
2194 				TLOOP(k)
2195 					if(BIT(u->ws.lset, k)) {
2196 						if(temp1[k] == 0)
2197 							temp1[k] = c;
2198 						else
2199 						if(temp1[k] < 0) { /* reduce/reduce conflict */
2200 							if(foutput)
2201 								Bprint(foutput,
2202 									"\n%d: reduce/reduce conflict"
2203 									" (red'ns %d and %d ) on %s",
2204 									i, -temp1[k], lastred,
2205 									symnam(k));
2206 							if(-temp1[k] > lastred)
2207 								temp1[k] = -lastred;
2208 							zzrrconf++;
2209 						} else
2210 							/* potential shift/reduce conflict */
2211 							precftn( lastred, k, i );
2212 					}
2213 				}
2214 		}
2215 		wract(i);
2216 	}
2217 
2218 	if(fdebug)
2219 		Bprint(fdebug, "};\n");
2220 	Bprint(ftable, "};\n");
2221 	Bprint(ftable, "#define	YYNPROD	%d\n", nprod);
2222 	Bprint(ftable, "#define	YYPRIVATE %d\n", PRIVATE);
2223 	if(yydebug)
2224 		Bprint(ftable, "#define	yydebug	%s\n", yydebug);
2225 }
2226 
2227 /*
2228  * pack state i from temp1 into amem
2229  */
2230 int
apack(int * p,int n)2231 apack(int *p, int n)
2232 {
2233 	int *pp, *qq, *rr, off, *q, *r;
2234 
2235 	/* we don't need to worry about checking because
2236 	 * we will only look at entries known to be there...
2237 	 * eliminate leading and trailing 0's
2238 	 */
2239 
2240 	q = p+n;
2241 	for(pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off)
2242 		;
2243  	/* no actions */
2244 	if(pp > q)
2245 		return 0;
2246 	p = pp;
2247 
2248 	/* now, find a place for the elements from p to q, inclusive */
2249 	r = &amem[ACTSIZE-1];
2250 	for(rr = amem; rr <= r; rr++, off++) {
2251 		for(qq = rr, pp = p; pp <= q; pp++, qq++)
2252 			if(*pp != 0)
2253 				if(*pp != *qq && *qq != 0)
2254 					goto nextk;
2255 
2256 		/* we have found an acceptable k */
2257 		if(pkdebug && foutput != 0)
2258 			Bprint(foutput, "off = %d, k = %d\n", off, (int)(rr-amem));
2259 		for(qq = rr, pp = p; pp <= q; pp++, qq++)
2260 			if(*pp) {
2261 				if(qq > r)
2262 					error("action table overflow");
2263 				if(qq > memp)
2264 					memp = qq;
2265 				*qq = *pp;
2266 			}
2267 		if(pkdebug && foutput != 0)
2268 			for(pp = amem; pp <= memp; pp += 10) {
2269 				Bprint(foutput, "\t");
2270 				for(qq = pp; qq <= pp+9; qq++)
2271 					Bprint(foutput, "%d ", *qq);
2272 				Bprint(foutput, "\n");
2273 			}
2274 		return(off);
2275 	nextk:;
2276 	}
2277 	error("no space in action table");
2278 	return 0;
2279 }
2280 
2281 /*
2282  * output the gotos for the nontermninals
2283  */
2284 void
go2out(void)2285 go2out(void)
2286 {
2287 	int i, j, k, best, count, cbest, times;
2288 
2289 	/* mark begining of gotos */
2290 	Bprint(ftemp, "$\n");
2291 	for(i = 1; i <= nnonter; i++) {
2292 		go2gen(i);
2293 
2294 		/* find the best one to make default */
2295 		best = -1;
2296 		times = 0;
2297 
2298 		/* is j the most frequent */
2299 		for(j = 0; j <= nstate; j++) {
2300 			if(tystate[j] == 0)
2301 				continue;
2302 			if(tystate[j] == best)
2303 				continue;
2304 
2305 			/* is tystate[j] the most frequent */
2306 			count = 0;
2307 			cbest = tystate[j];
2308 			for(k = j; k <= nstate; k++)
2309 				if(tystate[k] == cbest)
2310 					count++;
2311 			if(count > times) {
2312 				best = cbest;
2313 				times = count;
2314 			}
2315 		}
2316 
2317 		/* best is now the default entry */
2318 		zzgobest += times-1;
2319 		for(j = 0; j <= nstate; j++)
2320 			if(tystate[j] != 0 && tystate[j] != best) {
2321 				Bprint(ftemp, "%d,%d,", j, tystate[j]);
2322 				zzgoent++;
2323 			}
2324 
2325 		/* now, the default */
2326 		if(best == -1)
2327 			best = 0;
2328 		zzgoent++;
2329 		Bprint(ftemp, "%d\n", best);
2330 	}
2331 }
2332 
2333 /*
2334  * output the gotos for nonterminal c
2335  */
2336 void
go2gen(int c)2337 go2gen(int c)
2338 {
2339 	int i, work, cc;
2340 	Item *p, *q;
2341 
2342 
2343 	/* first, find nonterminals with gotos on c */
2344 	aryfil(temp1, nnonter+1, 0);
2345 	temp1[c] = 1;
2346 	work = 1;
2347 	while(work) {
2348 		work = 0;
2349 		PLOOP(0, i)
2350 
2351 			/* cc is a nonterminal */
2352 			if((cc=prdptr[i][1]-NTBASE) >= 0)
2353 				/* cc has a goto on c */
2354 				if(temp1[cc] != 0) {
2355 
2356 					/* thus, the left side of production i does too */
2357 					cc = *prdptr[i]-NTBASE;
2358 					if(temp1[cc] == 0) {
2359 						  work = 1;
2360 						  temp1[cc] = 1;
2361 					}
2362 				}
2363 	}
2364 
2365 	/* now, we have temp1[c] = 1 if a goto on c in closure of cc */
2366 	if(g2debug && foutput != 0) {
2367 		Bprint(foutput, "%s: gotos on ", nontrst[c].name);
2368 		NTLOOP(i)
2369 			if(temp1[i])
2370 				Bprint(foutput, "%s ", nontrst[i].name);
2371 		Bprint(foutput, "\n");
2372 	}
2373 
2374 	/* now, go through and put gotos into tystate */
2375 	aryfil(tystate, nstate, 0);
2376 	SLOOP(i)
2377 		ITMLOOP(i, p, q)
2378 			if((cc = *p->pitem) >= NTBASE)
2379 				/* goto on c is possible */
2380 				if(temp1[cc-NTBASE]) {
2381 					tystate[i] = amem[indgo[i]+c];
2382 					break;
2383 				}
2384 }
2385 
2386 /*
2387  * decide a shift/reduce conflict by precedence.
2388  * r is a rule number, t a token number
2389  * the conflict is in state s
2390  * temp1[t] is changed to reflect the action
2391  */
2392 void
precftn(int r,int t,int s)2393 precftn(int r, int t, int s)
2394 {
2395 	int lp, lt, action;
2396 
2397 	lp = levprd[r];
2398 	lt = toklev[t];
2399 	if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) {
2400 
2401 		/* conflict */
2402 		if(foutput != 0)
2403 			Bprint(foutput,
2404 				"\n%d: shift/reduce conflict (shift %d(%d), red'n %d(%d)) on %s",
2405 				s, temp1[t], PLEVEL(lt), r, PLEVEL(lp), symnam(t));
2406 		zzsrconf++;
2407 		return;
2408 	}
2409 	if(PLEVEL(lt) == PLEVEL(lp))
2410 		action = ASSOC(lt);
2411 	else
2412 		if(PLEVEL(lt) > PLEVEL(lp))
2413 			action = RASC;  /* shift */
2414 		else
2415 			action = LASC;  /* reduce */
2416 	switch(action) {
2417 	case BASC:  /* error action */
2418 		temp1[t] = ERRCODE;
2419 		break;
2420 	case LASC:  /* reduce */
2421 		temp1[t] = -r;
2422 		break;
2423 	}
2424 }
2425 
2426 /*
2427  * output state i
2428  * temp1 has the actions, lastred the default
2429  */
2430 void
wract(int i)2431 wract(int i)
2432 {
2433 	int p, p0, p1, ntimes, tred, count, j, flag;
2434 
2435 	/* find the best choice for lastred */
2436 	lastred = 0;
2437 	ntimes = 0;
2438 	TLOOP(j) {
2439 		if(temp1[j] >= 0)
2440 			continue;
2441 		if(temp1[j]+lastred == 0)
2442 			continue;
2443 		/* count the number of appearances of temp1[j] */
2444 		count = 0;
2445 		tred = -temp1[j];
2446 		levprd[tred] |= REDFLAG;
2447 		TLOOP(p)
2448 			if(temp1[p]+tred == 0)
2449 				count++;
2450 		if(count > ntimes) {
2451 			lastred = tred;
2452 			ntimes = count;
2453 		}
2454 	}
2455 
2456 	/*
2457 	 * for error recovery, arrange that, if there is a shift on the
2458 	 * error recovery token, `error', that the default be the error action
2459 	 */
2460 	if(temp1[2] > 0)
2461 		lastred = 0;
2462 
2463 	/* clear out entries in temp1 which equal lastred */
2464 	TLOOP(p)
2465 		if(temp1[p]+lastred == 0)
2466 			temp1[p] = 0;
2467 
2468 	wrstate(i);
2469 	defact[i] = lastred;
2470 	flag = 0;
2471 	TLOOP(p0)
2472 		if((p1=temp1[p0]) != 0) {
2473 			if(p1 < 0) {
2474 				p1 = -p1;
2475 				goto exc;
2476 			}
2477 			if(p1 == ACCEPTCODE) {
2478 				p1 = -1;
2479 				goto exc;
2480 			}
2481 			if(p1 == ERRCODE) {
2482 				p1 = 0;
2483 			exc:
2484 				if(flag++ == 0)
2485 					Bprint(ftable, "-1, %d,\n", i);
2486 				Bprint(ftable, "\t%d, %d,\n", p0, p1);
2487 				zzexcp++;
2488 				continue;
2489 			}
2490 			Bprint(ftemp, "%d,%d,", p0, p1);
2491 			zzacent++;
2492 		}
2493 	if(flag) {
2494 		defact[i] = -2;
2495 		Bprint(ftable, "\t-2, %d,\n", lastred);
2496 	}
2497 	Bprint(ftemp, "\n");
2498 }
2499 
2500 /*
2501  * writes state i
2502  */
2503 void
wrstate(int i)2504 wrstate(int i)
2505 {
2506 	int j0, j1;
2507 	Item *pp, *qq;
2508 	Wset *u;
2509 
2510 	if(fdebug) {
2511 		if(lastred) {
2512 			Bprint(fdebug, "	0, /*%d*/\n", i);
2513 		} else {
2514 			Bprint(fdebug, "	\"");
2515 			ITMLOOP(i, pp, qq)
2516 				Bprint(fdebug, "%s\\n", writem(pp->pitem));
2517 			if(tystate[i] == MUSTLOOKAHEAD)
2518 				WSLOOP(wsets + (pstate[i+1] - pstate[i]), u)
2519 					if(*u->pitem < 0)
2520 						Bprint(fdebug, "%s\\n", writem(u->pitem));
2521 			Bprint(fdebug, "\", /*%d*/\n", i);
2522 		}
2523 	}
2524 	if(foutput == 0)
2525 		return;
2526 	Bprint(foutput, "\nstate %d\n", i);
2527 	ITMLOOP(i, pp, qq)
2528 		Bprint(foutput, "\t%s\n", writem(pp->pitem));
2529 	if(tystate[i] == MUSTLOOKAHEAD)
2530 		/* print out empty productions in closure */
2531 		WSLOOP(wsets+(pstate[i+1]-pstate[i]), u)
2532 			if(*u->pitem < 0)
2533 				Bprint(foutput, "\t%s\n", writem(u->pitem));
2534 
2535 	/* check for state equal to another */
2536 	TLOOP(j0)
2537 		if((j1=temp1[j0]) != 0) {
2538 			Bprint(foutput, "\n\t%s  ", symnam(j0));
2539 			/* shift, error, or accept */
2540 			if(j1 > 0) {
2541 				if(j1 == ACCEPTCODE)
2542 					Bprint(foutput,  "accept");
2543 				else
2544 					if(j1 == ERRCODE)
2545 						Bprint(foutput, "error");
2546 					else
2547 						Bprint(foutput, "shift %d", j1);
2548 			} else
2549 				Bprint(foutput, "reduce %d (src line %d)", -j1, rlines[-j1]);
2550 		}
2551 
2552 	/* output the final production */
2553 	if(lastred)
2554 		Bprint(foutput, "\n\t.  reduce %d (src line %d)\n\n",
2555 			lastred, rlines[lastred]);
2556 	else
2557 		Bprint(foutput, "\n\t.  error\n\n");
2558 
2559 	/* now, output nonterminal actions */
2560 	j1 = ntokens;
2561 	for(j0 = 1; j0 <= nnonter; j0++) {
2562 		j1++;
2563 		if(temp1[j1])
2564 			Bprint(foutput, "\t%s  goto %d\n", symnam(j0+NTBASE), temp1[j1]);
2565 	}
2566 }
2567 
2568 void
warray(char * s,int * v,int n)2569 warray(char *s, int *v, int n)
2570 {
2571 	int i;
2572 
2573 	Bprint(ftable, "short	%s[] =\n{", s);
2574 	for(i=0;;) {
2575 		if(i%10 == 0)
2576 			Bprint(ftable, "\n");
2577 		Bprint(ftable, "%4d", v[i]);
2578 		i++;
2579 		if(i >= n) {
2580 			Bprint(ftable, "\n};\n");
2581 			break;
2582 		}
2583 		Bprint(ftable, ",");
2584 	}
2585 }
2586 
2587 /*
2588  * in order to free up the mem and amem arrays for the optimizer,
2589  * and still be able to output yyr1, etc., after the sizes of
2590  * the action array is known, we hide the nonterminals
2591  * derived by productions in levprd.
2592  */
2593 
2594 void
hideprod(void)2595 hideprod(void)
2596 {
2597 	int i, j;
2598 
2599 	j = 0;
2600 	levprd[0] = 0;
2601 	PLOOP(1, i) {
2602 		if(!(levprd[i] & REDFLAG)) {
2603 			j++;
2604 			if(foutput != 0)
2605 				Bprint(foutput, "Rule not reduced:   %s\n", writem(prdptr[i]));
2606 		}
2607 		levprd[i] = *prdptr[i] - NTBASE;
2608 	}
2609 	if(j)
2610 		print("%d rules never reduced\n", j);
2611 }
2612 
2613 void
callopt(void)2614 callopt(void)
2615 {
2616 	int i, *p, j, k, *q;
2617 
2618 	/* read the arrays from tempfile and set parameters */
2619 	finput = Bopen(tempname, OREAD);
2620 	if(finput == 0)
2621 		error("optimizer cannot open tempfile");
2622 
2623 	pgo[0] = 0;
2624 	temp1[0] = 0;
2625 	nstate = 0;
2626 	nnonter = 0;
2627 	for(;;) {
2628 		switch(gtnm()) {
2629 		case '\n':
2630 			nstate++;
2631 			pmem--;
2632 			temp1[nstate] = pmem - mem0;
2633 		case ',':
2634 			continue;
2635 		case '$':
2636 			break;
2637 		default:
2638 			error("bad tempfile");
2639 		}
2640 		break;
2641 	}
2642 
2643 	pmem--;
2644 	temp1[nstate] = yypgo[0] = pmem - mem0;
2645 	for(;;) {
2646 		switch(gtnm()) {
2647 		case '\n':
2648 			nnonter++;
2649 			yypgo[nnonter] = pmem-mem0;
2650 		case ',':
2651 			continue;
2652 		case -1:
2653 			break;
2654 		default:
2655 			error("bad tempfile");
2656 		}
2657 		break;
2658 	}
2659 	pmem--;
2660 	yypgo[nnonter--] = pmem - mem0;
2661 	for(i = 0; i < nstate; i++) {
2662 		k = 32000;
2663 		j = 0;
2664 		q = mem0 + temp1[i+1];
2665 		for(p = mem0 + temp1[i]; p < q ; p += 2) {
2666 			if(*p > j)
2667 				j = *p;
2668 			if(*p < k)
2669 				k = *p;
2670 		}
2671 		/* nontrivial situation */
2672 		if(k <= j) {
2673 			/* j is now the range */
2674 /*			j -= k;			/* call scj */
2675 			if(k > maxoff)
2676 				maxoff = k;
2677 		}
2678 		tystate[i] = (temp1[i+1]-temp1[i]) + 2*j;
2679 		if(j > maxspr)
2680 			maxspr = j;
2681 	}
2682 
2683 	/* initialize ggreed table */
2684 	for(i = 1; i <= nnonter; i++) {
2685 		ggreed[i] = 1;
2686 		j = 0;
2687 
2688 		/* minimum entry index is always 0 */
2689 		q = mem0 + yypgo[i+1] - 1;
2690 		for(p = mem0+yypgo[i]; p < q ; p += 2) {
2691 			ggreed[i] += 2;
2692 			if(*p > j)
2693 				j = *p;
2694 		}
2695 		ggreed[i] = ggreed[i] + 2*j;
2696 		if(j > maxoff)
2697 			maxoff = j;
2698 	}
2699 
2700 	/* now, prepare to put the shift actions into the amem array */
2701 	for(i = 0; i < ACTSIZE; i++)
2702 		amem[i] = 0;
2703 	maxa = amem;
2704 	for(i = 0; i < nstate; i++) {
2705 		if(tystate[i] == 0 && adb > 1)
2706 			Bprint(ftable, "State %d: null\n", i);
2707 		indgo[i] = YYFLAG1;
2708 	}
2709 	while((i = nxti()) != NOMORE)
2710 		if(i >= 0)
2711 			stin(i);
2712 		else
2713 			gin(-i);
2714 
2715 	/* print amem array */
2716 	if(adb > 2 )
2717 		for(p = amem; p <= maxa; p += 10) {
2718 			Bprint(ftable, "%4d  ", (int)(p-amem));
2719 			for(i = 0; i < 10; ++i)
2720 				Bprint(ftable, "%4d  ", p[i]);
2721 			Bprint(ftable, "\n");
2722 		}
2723 
2724 	/* write out the output appropriate to the language */
2725 	aoutput();
2726 	osummary();
2727 	ZAPFILE(tempname);
2728 }
2729 
2730 void
gin(int i)2731 gin(int i)
2732 {
2733 	int *p, *r, *s, *q1, *q2;
2734 
2735 	/* enter gotos on nonterminal i into array amem */
2736 	ggreed[i] = 0;
2737 
2738 	q2 = mem0+ yypgo[i+1] - 1;
2739 	q1 = mem0 + yypgo[i];
2740 
2741 	/* now, find amem place for it */
2742 	for(p = amem; p < &amem[ACTSIZE]; p++) {
2743 		if(*p)
2744 			continue;
2745 		for(r = q1; r < q2; r += 2) {
2746 			s = p + *r + 1;
2747 			if(*s)
2748 				goto nextgp;
2749 			if(s > maxa)
2750 				if((maxa = s) > &amem[ACTSIZE])
2751 					error("a array overflow");
2752 		}
2753 		/* we have found amem spot */
2754 		*p = *q2;
2755 		if(p > maxa)
2756 			if((maxa = p) > &amem[ACTSIZE])
2757 				error("a array overflow");
2758 		for(r = q1; r < q2; r += 2) {
2759 			s = p + *r + 1;
2760 			*s = r[1];
2761 		}
2762 		pgo[i] = p-amem;
2763 		if(adb > 1)
2764 			Bprint(ftable, "Nonterminal %d, entry at %d\n", i, pgo[i]);
2765 		return;
2766 
2767 	nextgp:;
2768 	}
2769 	error("cannot place goto %d\n", i);
2770 }
2771 
2772 void
stin(int i)2773 stin(int i)
2774 {
2775 	int *r, *s, n, flag, j, *q1, *q2;
2776 
2777 	tystate[i] = 0;
2778 
2779 	/* enter state i into the amem array */
2780 	q2 = mem0+temp1[i+1];
2781 	q1 = mem0+temp1[i];
2782 	/* find an acceptable place */
2783 	for(n = -maxoff; n < ACTSIZE; n++) {
2784 		flag = 0;
2785 		for(r = q1; r < q2; r += 2) {
2786 			if((s = *r + n + amem) < amem)
2787 				goto nextn;
2788 			if(*s == 0)
2789 				flag++;
2790 			else
2791 				if(*s != r[1])
2792 					goto nextn;
2793 		}
2794 
2795 		/* check that the position equals another only if the states are identical */
2796 		for(j=0; j<nstate; j++) {
2797 			if(indgo[j] == n) {
2798 
2799 				/* we have some disagreement */
2800 				if(flag)
2801 					goto nextn;
2802 				if(temp1[j+1]+temp1[i] == temp1[j]+temp1[i+1]) {
2803 
2804 					/* states are equal */
2805 					indgo[i] = n;
2806 					if(adb > 1)
2807 						Bprint(ftable,
2808 						"State %d: entry at %d equals state %d\n",
2809 						i, n, j);
2810 					return;
2811 				}
2812 
2813 				/* we have some disagreement */
2814 				goto nextn;
2815 			}
2816 		}
2817 
2818 		for(r = q1; r < q2; r += 2) {
2819 			if((s = *r+n+amem) >= &amem[ACTSIZE])
2820 				error("out of space in optimizer a array");
2821 			if(s > maxa)
2822 				maxa = s;
2823 			if(*s != 0 && *s != r[1])
2824 				error("clobber of a array, pos'n %d, by %d", s-amem, r[1]);
2825 			*s = r[1];
2826 		}
2827 		indgo[i] = n;
2828 		if(adb > 1)
2829 			Bprint(ftable, "State %d: entry at %d\n", i, indgo[i]);
2830 		return;
2831 	nextn:;
2832 	}
2833 	error("Error; failure to place state %d\n", i);
2834 }
2835 
2836 /*
2837  * finds the next i
2838  */
2839 int
nxti(void)2840 nxti(void)
2841 {
2842 	int i, max, maxi;
2843 
2844 	max = 0;
2845 	maxi = 0;
2846 	for(i = 1; i <= nnonter; i++)
2847 		if(ggreed[i] >= max) {
2848 			max = ggreed[i];
2849 			maxi = -i;
2850 		}
2851 	for(i = 0; i < nstate; ++i)
2852 		if(tystate[i] >= max) {
2853 			max = tystate[i];
2854 			maxi = i;
2855 		}
2856 	if(nxdb)
2857 		Bprint(ftable, "nxti = %d, max = %d\n", maxi, max);
2858 	if(max == 0)
2859 		return NOMORE;
2860 	return maxi;
2861 }
2862 
2863 /*
2864  * write summary
2865  */
2866 void
osummary(void)2867 osummary(void)
2868 {
2869 
2870 	int i, *p;
2871 
2872 	if(foutput == 0)
2873 		return;
2874 	i = 0;
2875 	for(p = maxa; p >= amem; p--)
2876 		if(*p == 0)
2877 			i++;
2878 
2879 	Bprint(foutput, "Optimizer space used: input %d/%d, output %d/%d\n",
2880 		(int)(pmem-mem0+1), MEMSIZE, (int)(maxa-amem+1), ACTSIZE);
2881 	Bprint(foutput, "%d table entries, %d zero\n", (int)(maxa-amem+1), i);
2882 	Bprint(foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff);
2883 }
2884 
2885 /*
2886  * this version is for C
2887  * write out the optimized parser
2888  */
2889 void
aoutput(void)2890 aoutput(void)
2891 {
2892 	Bprint(ftable, "#define\tYYLAST\t%d\n", (int)(maxa-amem+1));
2893 	arout("yyact", amem, (maxa-amem)+1);
2894 	arout("yypact", indgo, nstate);
2895 	arout("yypgo", pgo, nnonter+1);
2896 }
2897 
2898 void
arout(char * s,int * v,int n)2899 arout(char *s, int *v, int n)
2900 {
2901 	int i;
2902 
2903 	Bprint(ftable, "short	%s[] =\n{", s);
2904 	for(i = 0; i < n;) {
2905 		if(i%10 == 0)
2906 			Bprint(ftable, "\n");
2907 		Bprint(ftable, "%4d", v[i]);
2908 		i++;
2909 		if(i == n)
2910 			Bprint(ftable, "\n};\n");
2911 		else
2912 			Bprint(ftable, ",");
2913 	}
2914 }
2915 
2916 /*
2917  * read and convert an integer from the standard input
2918  * return the terminating character
2919  * blanks, tabs, and newlines are ignored
2920  */
2921 int
gtnm(void)2922 gtnm(void)
2923 {
2924 	int sign, val, c;
2925 
2926 	sign = 0;
2927 	val = 0;
2928 	while((c=Bgetrune(finput)) != Beof) {
2929 		if(isdigit(c)) {
2930 			val = val*10 + c-'0';
2931 			continue;
2932 		}
2933 		if(c == '-') {
2934 			sign = 1;
2935 			continue;
2936 		}
2937 		break;
2938 	}
2939 	if(sign)
2940 		val = -val;
2941 	*pmem++ = val;
2942 	if(pmem >= &mem0[MEMSIZE])
2943 		error("out of space");
2944 	return c;
2945 }
2946