xref: /inferno-os/utils/iyacc/yacc.c (revision 18eb7a2d4ffc48d0a90687ace604e4e388440626)
1 #include <lib9.h>
2 #include <bio.h>
3 #include <ctype.h>
4 
5 #define	Bungetrune	Bungetc		/* ok for now. */
6 
7 /*
8  * all these are 32 bit
9  */
10 #define TBITSET		((32+NTERMS)/32)	/* BOTCH?? +31 */
11 #define BIT(a,i)	((a)[(i)>>5] & (1<<((i)&037)))
12 #define SETBIT(a,i)	((a)[(i)>>5] |= (1<<((i)&037)))
13 #define NWORDS(n)	(((n)+32)/32)
14 
15 #ifndef PARSER
16 #define PARSER		"yaccpar"
17 #define PARSERS		"yaccpars"
18 #endif
19 #define TEMPNAME	"y.tmp.XXXXXX"
20 #define ACTNAME		"y.acts.XXXXXX"
21 #define OFILE		"tab.c"
22 #define FILEU		"output"
23 #define FILED		"tab.h"
24 #define FILEDEBUG	"debug"
25 
26 enum
27 {
28 /*
29  * the following are adjustable
30  * according to memory size
31  */
32 	ACTSIZE		= 30000,
33 	MEMSIZE		= 30000,
34 	NSTATES		= 2000,
35 	NTERMS		= 255,
36 	NPROD		= 800,
37 	NNONTERM	= 300,
38 	TEMPSIZE	= 2000,
39 	CNAMSZ		= 5000,
40 	LSETSIZE	= 2400,
41 	WSETSIZE	= 350,
42 
43 	NAMESIZE	= 50,
44 	NTYPES		= 63,
45 	ISIZE		= 400,
46 
47 	PRIVATE		= 0xE000,	/* unicode private use */
48 
49 	/* relationships which must hold:
50 		TBITSET ints must hold NTERMS+1 bits...
51 		WSETSIZE >= NNONTERM
52 		LSETSIZE >= NNONTERM
53 		TEMPSIZE >= NTERMS + NNONTERM + 1
54 		TEMPSIZE >= NSTATES
55 	*/
56 
57 	NTBASE		= 010000,
58 	ERRCODE		= 8190,
59 	ACCEPTCODE	= 8191,
60 
61 	NOASC		= 0,	/* no assoc. */
62 	LASC		= 1,	/* left assoc. */
63 	RASC		= 2,	/* right assoc. */
64 	BASC		= 3,	/* binary assoc. */
65 
66 	/* flags for state generation */
67 
68 	DONE		= 0,
69 	MUSTDO		= 1,
70 	MUSTLOOKAHEAD	= 2,
71 
72 	/* flags for a rule having an action, and being reduced */
73 
74 	ACTFLAG		= 04,
75 	REDFLAG		= 010,
76 
77 	/* output parser flags */
78 	YYFLAG1		= -1000,
79 
80 	/* parse tokens */
81 	IDENTIFIER	= PRIVATE,
82 	MARK,
83 	TERM,
84 	LEFT,
85 	RIGHT,
86 	BINARY,
87 	PREC,
88 	LCURLY,
89 	IDENTCOLON,
90 	NUMBER,
91 	START,
92 	TYPEDEF,
93 	TYPENAME,
94 	UNION,
95 
96 	ENDFILE		= 0,
97 
98 	EMPTY		= 1,
99 	WHOKNOWS	= 0,
100 	OK		= 1,
101 	NOMORE		= -1000,
102 };
103 
104 	/* macros for getting associativity and precedence levels */
105 
106 #define ASSOC(i)	((i)&03)
107 #define PLEVEL(i)	(((i)>>4)&077)
108 #define TYPE(i)		(((i)>>10)&077)
109 
110 	/* macros for setting associativity and precedence levels */
111 
112 #define SETASC(i,j)	i |= j
113 #define SETPLEV(i,j)	i |= (j<<4)
114 #define SETTYPE(i,j)	i |= (j<<10)
115 
116 	/* looping macros */
117 
118 #define TLOOP(i)	for(i=1; i<=ntokens; i++)
119 #define NTLOOP(i)	for(i=0; i<=nnonter; i++)
120 #define PLOOP(s,i)	for(i=s; i<nprod; i++)
121 #define SLOOP(i)	for(i=0; i<nstate; i++)
122 #define WSBUMP(x)	x++
123 #define WSLOOP(s,j)	for(j=s; j<cwp; j++)
124 #define ITMLOOP(i,p,q)	for(q=pstate[i+1], p=pstate[i]; p<q; p++)
125 #define SETLOOP(i)	for(i=0; i<tbitset; i++)
126 
127 	/* command to clobber tempfiles after use */
128 
129 #define	ZAPFILE(x)	if(x) remove(x)
130 
131 	/* I/O descriptors */
132 
133 Biobuf*	faction;	/* file for saving actions */
134 Biobuf*	fdefine;	/* file for #defines */
135 Biobuf*	fdebug;		/* y.debug for strings for debugging */
136 Biobuf*	ftable;		/* y.tab.c file */
137 Biobuf*	ftemp;		/* tempfile to pass 2 */
138 Biobuf*	finput;		/* input file */
139 Biobuf*	foutput;	/* y.output file */
140 
141 	/* communication variables between various I/O routines */
142 
143 char*	infile;			/* input file name */
144 int	numbval;		/* value of an input number */
145 char	tokname[NAMESIZE+4];	/* input token name, slop for runes and 0 */
146 
147 	/* structure declarations */
148 
149 typedef
150 struct
151 {
152 	int	lset[TBITSET];
153 } Lkset;
154 
155 typedef
156 struct
157 {
158 	int*	pitem;
159 	Lkset*	look;
160 } Item;
161 
162 typedef
163 struct
164 {
165 	char*	name;
166 	int	value;
167 } Symb;
168 
169 typedef
170 struct
171 {
172 	int*	pitem;
173 	int	flag;
174 	Lkset	ws;
175 } Wset;
176 
177 	/* storage of names */
178 
179 char	cnames[CNAMSZ];		/* place where token and nonterminal names are stored */
180 int	cnamsz = CNAMSZ;	/* size of cnames */
181 char*	cnamp = cnames;		/* place where next name is to be put in */
182 int	ndefout = 4;		/* number of defined symbols output */
183 char*	tempname;
184 char*	actname;
185 char	ttempname[] = TEMPNAME;
186 char	tactname[] = ACTNAME;
187 char*	parser = PARSER;
188 char*	yydebug;
189 char	par[256];		/* full path of parser */
190 
191 	/* storage of types */
192 int	ntypes;			/* number of types defined */
193 char*	typeset[NTYPES];	/* pointers to type tags */
194 
195 	/* token information */
196 
197 int	ntokens = 0 ;		/* number of tokens */
198 Symb	tokset[NTERMS];
199 int	toklev[NTERMS];		/* vector with the precedence of the terminals */
200 
201 	/* nonterminal information */
202 
203 int	nnonter = -1;		/* the number of nonterminals */
204 Symb	nontrst[NNONTERM];
205 int	start;			/* start symbol */
206 
207 	/* assigned token type values */
208 int	extval = 0;
209 
210 char*	ytabc = OFILE;	/* name of y.tab.c */
211 
212 	/* grammar rule information */
213 
214 int	mem0[MEMSIZE] ;		/* production storage */
215 int*	mem = mem0;
216 int	nprod = 1;		/* number of productions */
217 int*	prdptr[NPROD];		/* pointers to descriptions of productions */
218 int	levprd[NPROD];		/* precedence levels for the productions */
219 int	rlines[NPROD];		/* line number for this rule */
220 
221 	/* state information */
222 
223 int	nstate = 0;		/* number of states */
224 Item*	pstate[NSTATES+2];	/* pointers to the descriptions of the states */
225 int	tystate[NSTATES];	/* contains type information about the states */
226 int	defact[NSTATES];	/* the default actions of states */
227 int	tstates[NTERMS];	/* states generated by terminal gotos */
228 int	ntstates[NNONTERM]; 	/* states generated by nonterminal gotos */
229 int	mstates[NSTATES];	/* chain of overflows of term/nonterm generation lists  */
230 int	lastred; 		/* the number of the last reduction of a state */
231 
232 	/* lookahead set information */
233 
234 Lkset	lkst[LSETSIZE];
235 int	nolook;			/* flag to turn off lookahead computations */
236 int	tbitset;		/* size of lookahead sets */
237 int	nlset = 0;		/* next lookahead set index */
238 int	nolook = 0;		/* flag to suppress lookahead computations */
239 Lkset	clset;  		/* temporary storage for lookahead computations */
240 
241 	/* working set information */
242 
243 Wset	wsets[WSETSIZE];
244 Wset*	cwp;
245 
246 	/* storage for action table */
247 
248 int	amem[ACTSIZE];		/* action table storage */
249 int*	memp = amem;		/* next free action table position */
250 int	indgo[NSTATES];		/* index to the stored goto table */
251 
252 	/* temporary vector, indexable by states, terms, or ntokens */
253 
254 int	temp1[TEMPSIZE];	/* temporary storage, indexed by terms + ntokens or states */
255 int	lineno = 1;		/* current input line number */
256 int	fatfl = 1;  		/* if on, error is fatal */
257 int	nerrors = 0;		/* number of errors */
258 
259 	/* statistics collection variables */
260 
261 int	zzgoent;
262 int	zzgobest;
263 int	zzacent;
264 int	zzexcp;
265 int	zzclose;
266 int	zzrrconf;
267 int	zzsrconf;
268 
269 int*	ggreed = lkst[0].lset;
270 int*	pgo = wsets[0].ws.lset;
271 int*	yypgo = &nontrst[0].value;
272 
273 int	maxspr = 0;  		/* maximum spread of any entry */
274 int	maxoff = 0;  		/* maximum offset into a array */
275 int*	pmem = mem0;
276 int*	maxa;
277 int	nxdb = 0;
278 int	adb = 0;
279 
280 
281 	/* storage for information about the nonterminals */
282 
283 int**	pres[NNONTERM+2];  	/* vector of pointers to productions yielding each nonterminal */
284 Lkset*	pfirst[NNONTERM+2];	/* vector of pointers to first sets for each nonterminal */
285 int	pempty[NNONTERM+1];	/* vector of nonterminals nontrivially deriving e */
286 
287 	/* random stuff picked out from between functions */
288 
289 int	indebug = 0;
290 Wset*	zzcwp = wsets;
291 int	zzgoent = 0;
292 int	zzgobest = 0;
293 int	zzacent = 0;
294 int	zzexcp = 0;
295 int	zzclose = 0;
296 int	zzsrconf = 0;
297 int*	zzmemsz = mem0;
298 int	zzrrconf = 0;
299 int	pidebug = 0;		/* debugging flag for putitem */
300 int	gsdebug = 0;
301 int	cldebug = 0;		/* debugging flag for closure */
302 int	pkdebug = 0;
303 int	g2debug = 0;
304 
305 struct
306 {
307 	char*	name;
308 	long	value;
309 } resrv[] =
310 {
311 	"binary",	BINARY,
312 	"left",		LEFT,
313 	"nonassoc",	BINARY,
314 	"prec",		PREC,
315 	"right",	RIGHT,
316 	"start",	START,
317 	"term",		TERM,
318 	"token",	TERM,
319 	"type",		TYPEDEF,
320 	"union",	UNION,
321 	0,
322 };
323 
324 	/* define functions */
325 
326 void	main(int, char**);
327 void	others(void);
328 char*	chcopy(char*, char*);
329 char*	writem(int*);
330 char*	symnam(int);
331 void	summary(void);
332 void	error(char*, ...);
333 void	aryfil(int*, int, int);
334 int	setunion(int*, int*);
335 void	prlook(Lkset*);
336 void	cpres(void);
337 void	cpfir(void);
338 int	state(int);
339 void	putitem(int*, Lkset*);
340 void	cempty(void);
341 void	stagen(void);
342 void	closure(int);
343 Lkset*	flset(Lkset*);
344 void	cleantmp(void);
345 void	intr(void);
346 void	setup(int, char**);
347 void	finact(void);
348 int	defin(int, char*);
349 void	defout(int);
350 char*	cstash(char*);
351 long	gettok(void);
352 int	fdtype(int);
353 int	chfind(int, char*);
354 void	cpyunion(void);
355 void	cpycode(void);
356 int	skipcom(void);
357 void	cpyact(int);
358 void	openup(char*, int, int, int, char*);
359 void	output(void);
360 int	apack(int*, int);
361 void	go2out(void);
362 void	go2gen(int);
363 void	precftn(int, int, int);
364 void	wract(int);
365 void	wrstate(int);
366 void	warray(char*, int*, int);
367 void	hideprod(void);
368 void	callopt(void);
369 void	gin(int);
370 void	stin(int);
371 int	nxti(void);
372 void	osummary(void);
373 void	aoutput(void);
374 void	arout(char*, int*, int);
375 int	gtnm(void);
376 
377 void
main(int argc,char * argv[])378 main(int argc, char *argv[])
379 {
380 
381 	setup(argc, argv);	/* initialize and read productions */
382 	tbitset = NWORDS(ntokens);
383 	cpres();		/* make table of which productions yield a given nonterminal */
384 	cempty();		/* make a table of which nonterminals can match the empty string */
385 	cpfir();		/* make a table of firsts of nonterminals */
386 	stagen();		/* generate the states */
387 	output();		/* write the states and the tables */
388 	go2out();
389 	hideprod();
390 	summary();
391 	callopt();
392 	others();
393 	exits(0);
394 }
395 
396 /*
397  * put out other arrays, copy the parsers
398  */
399 void
others(void)400 others(void)
401 {
402 	int c, i, j;
403 
404 	finput = Bopen(parser, OREAD);
405 	if(finput == 0)
406 		error("cannot find parser %s", parser);
407 	warray("yyr1", levprd, nprod);
408 	aryfil(temp1, nprod, 0);
409 	PLOOP(1, i)
410 		temp1[i] = prdptr[i+1]-prdptr[i]-2;
411 	warray("yyr2", temp1, nprod);
412 
413 	aryfil(temp1, nstate, -1000);
414 	TLOOP(i)
415 		for(j=tstates[i]; j!=0; j=mstates[j])
416 			temp1[j] = i;
417 	NTLOOP(i)
418 		for(j=ntstates[i]; j!=0; j=mstates[j])
419 			temp1[j] = -i;
420 	warray("yychk", temp1, nstate);
421 	warray("yydef", defact, nstate);
422 
423 	/* put out token translation tables */
424 	/* table 1 has 0-256 */
425 	aryfil(temp1, 256, 0);
426 	c = 0;
427 	TLOOP(i) {
428 		j = tokset[i].value;
429 		if(j >= 0 && j < 256) {
430 			if(temp1[j]) {
431 				print("yacc bug -- cant have 2 different Ts with same value\n");
432 				print("	%s and %s\n", tokset[i].name, tokset[temp1[j]].name);
433 				nerrors++;
434 			}
435 			temp1[j] = i;
436 			if(j > c)
437 				c = j;
438 		}
439 	}
440 	warray("yytok1", temp1, c+1);
441 
442 	/* table 2 has PRIVATE-PRIVATE+256 */
443 	aryfil(temp1, 256, 0);
444 	c = 0;
445 	TLOOP(i) {
446 		j = tokset[i].value - PRIVATE;
447 		if(j >= 0 && j < 256) {
448 			if(temp1[j]) {
449 				print("yacc bug -- cant have 2 different Ts with same value\n");
450 				print("	%s and %s\n", tokset[i].name, tokset[temp1[j]].name);
451 				nerrors++;
452 			}
453 			temp1[j] = i;
454 			if(j > c)
455 				c = j;
456 		}
457 	}
458 	warray("yytok2", temp1, c+1);
459 
460 	/* table 3 has everything else */
461 	Bprint(ftable, "long	yytok3[] =\n{\n");
462 	c = 0;
463 	TLOOP(i) {
464 		j = tokset[i].value;
465 		if(j >= 0 && j < 256)
466 			continue;
467 		if(j >= PRIVATE && j < 256+PRIVATE)
468 			continue;
469 
470 		Bprint(ftable, "%4d,%4d,", j, i);
471 		c++;
472 		if(c%5 == 0)
473 			Bprint(ftable, "\n");
474 	}
475 	Bprint(ftable, "%4d\n};\n", 0);
476 
477 	/* copy parser text */
478 	while((c=Bgetrune(finput)) != Beof) {
479 		if(c == '$') {
480 			if((c = Bgetrune(finput)) != 'A')
481 				Bputrune(ftable, '$');
482 			else { /* copy actions */
483 				faction = Bopen(actname, OREAD);
484 				if(faction == 0)
485 					error("cannot reopen action tempfile");
486 				while((c=Bgetrune(faction)) != Beof)
487 					Bputrune(ftable, c);
488 				Bterm(faction);
489 				ZAPFILE(actname);
490 				c = Bgetrune(finput);
491 			}
492 		}
493 		Bputrune(ftable, c);
494 	}
495 	Bterm(ftable);
496 }
497 
498 /*
499  * copies string q into p, returning next free char ptr
500  */
501 char*
chcopy(char * p,char * q)502 chcopy(char* p, char* q)
503 {
504 	int c;
505 
506 	while(c = *q) {
507 		if(c == '"')
508 			*p++ = '\\';
509 		*p++ = c;
510 		q++;
511 	}
512 	*p = 0;
513 	return p;
514 }
515 
516 /*
517  * creates output string for item pointed to by pp
518  */
519 char*
writem(int * pp)520 writem(int *pp)
521 {
522 	int i,*p;
523 	static char sarr[ISIZE];
524 	char* q;
525 
526 	for(p=pp; *p>0; p++)
527 		;
528 	p = prdptr[-*p];
529 	q = chcopy(sarr, nontrst[*p-NTBASE].name);
530 	q = chcopy(q, ": ");
531 	for(;;) {
532 		*q = ' ';
533 		p++;
534 		if(p == pp)
535 			*q = '.';
536 		q++;
537 		*q = '\0';
538 		i = *p;
539 		if(i <= 0)
540 			break;
541 		q = chcopy(q, symnam(i));
542 		if(q > &sarr[ISIZE-30])
543 			error("item too big");
544 	}
545 
546 	/* an item calling for a reduction */
547 	i = *pp;
548 	if(i < 0 ) {
549 		q = chcopy(q, "    (");
550 		sprint(q, "%d)", -i);
551 	}
552 	return sarr;
553 }
554 
555 /*
556  * return a pointer to the name of symbol i
557  */
558 char*
symnam(int i)559 symnam(int i)
560 {
561 	char* cp;
562 
563 	cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
564 	if(*cp == ' ')
565 		cp++;
566 	return cp;
567 }
568 
569 /*
570  * output the summary on y.output
571  */
572 void
summary(void)573 summary(void)
574 {
575 
576 	if(foutput != 0) {
577 		Bprint(foutput, "\n%d/%d terminals, %d/%d nonterminals\n",
578 			ntokens, NTERMS, nnonter, NNONTERM);
579 		Bprint(foutput, "%d/%d grammar rules, %d/%d states\n",
580 			nprod, NPROD, nstate, NSTATES);
581 		Bprint(foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n",
582 			zzsrconf, zzrrconf);
583 		Bprint(foutput, "%d/%d working sets used\n",
584 			(int)(zzcwp-wsets), WSETSIZE);
585 		Bprint(foutput, "memory: states,etc. %d/%d, parser %d/%d\n",
586 			(int)(zzmemsz-mem0), MEMSIZE, (int)(memp-amem), ACTSIZE);
587 		Bprint(foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE);
588 		Bprint(foutput, "%d extra closures\n", zzclose - 2*nstate);
589 		Bprint(foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp);
590 		Bprint(foutput, "%d goto entries\n", zzgoent);
591 		Bprint(foutput, "%d entries saved by goto default\n", zzgobest);
592 	}
593 	if(zzsrconf != 0 || zzrrconf != 0) {
594 		print("\nconflicts: ");
595 		if(zzsrconf)
596 			print("%d shift/reduce", zzsrconf);
597 		if(zzsrconf && zzrrconf)
598 			print(", ");
599 		if(zzrrconf)
600 			print("%d reduce/reduce", zzrrconf);
601 		print("\n");
602 	}
603 	if(ftemp != 0) {
604 		Bterm(ftemp);
605 		ftemp = 0;
606 	}
607 	if(fdefine != 0) {
608 		Bterm(fdefine);
609 		fdefine = 0;
610 	}
611 }
612 
613 /*
614  * write out error comment -- NEEDS WORK
615  */
616 void
error(char * s,...)617 error(char *s, ...)
618 {
619 
620 	nerrors++;
621 	fprint(2, "\n fatal error:");
622 	fprint(2, s, (&s)[1]);
623 	fprint(2, ", %s:%d\n", infile, lineno);
624 	if(!fatfl)
625 		return;
626 	summary();
627 	cleantmp();
628 	exits("error");
629 }
630 
631 /*
632  * set elements 0 through n-1 to c
633  */
634 void
aryfil(int * v,int n,int c)635 aryfil(int *v, int n, int c)
636 {
637 	int i;
638 
639 	for(i=0; i<n; i++)
640 		v[i] = c;
641 }
642 
643 /*
644  * set a to the union of a and b
645  * return 1 if b is not a subset of a, 0 otherwise
646  */
647 int
setunion(int * a,int * b)648 setunion(int *a, int *b)
649 {
650 	int i, x, sub;
651 
652 	sub = 0;
653 	SETLOOP(i) {
654 		x = *a;
655 		*a |= *b;
656 		if(*a != x)
657 			sub = 1;
658 		a++;
659 		b++;
660 	}
661 	return sub;
662 }
663 
664 void
prlook(Lkset * p)665 prlook(Lkset* p)
666 {
667 	int j, *pp;
668 
669 	pp = p->lset;
670 	if(pp == 0)
671 		Bprint(foutput, "\tNULL");
672 	else {
673 		Bprint(foutput, " { ");
674 		TLOOP(j)
675 			if(BIT(pp,j))
676 				Bprint(foutput, "%s ", symnam(j));
677 		Bprint(foutput, "}");
678 	}
679 }
680 
681 /*
682  * compute an array with the beginnings of  productions yielding given nonterminals
683  * The array pres points to these lists
684  * the array pyield has the lists: the total size is only NPROD+1
685  */
686 void
cpres(void)687 cpres(void)
688 {
689 	int c, j, i, **pmem;
690 	static int *pyield[NPROD];
691 
692 	pmem = pyield;
693 	NTLOOP(i) {
694 		c = i+NTBASE;
695 		pres[i] = pmem;
696 		fatfl = 0;  	/* make undefined  symbols  nonfatal */
697 		PLOOP(0, j)
698 			if(*prdptr[j] == c)
699 				*pmem++ =  prdptr[j]+1;
700 		if(pres[i] == pmem)
701 			error("nonterminal %s not defined!", nontrst[i].name);
702 	}
703 	pres[i] = pmem;
704 	fatfl = 1;
705 	if(nerrors) {
706 		summary();
707 		cleantmp();
708 		exits("error");
709 	}
710 	if(pmem != &pyield[nprod])
711 		error("internal Yacc error: pyield %d", pmem-&pyield[nprod]);
712 }
713 
714 /*
715  * compute an array with the first of nonterminals
716  */
717 void
cpfir(void)718 cpfir(void)
719 {
720 	int *p, **s, i, **t, ch, changes;
721 
722 	zzcwp = &wsets[nnonter];
723 	NTLOOP(i) {
724 		aryfil(wsets[i].ws.lset, tbitset, 0);
725 		t = pres[i+1];
726 		/* initially fill the sets */
727 		for(s=pres[i]; s<t; ++s)
728 			for(p = *s; (ch = *p) > 0; ++p) {
729 				if(ch < NTBASE) {
730 					SETBIT(wsets[i].ws.lset, ch);
731 					break;
732 				}
733 				if(!pempty[ch-NTBASE])
734 					break;
735 			}
736 	}
737 
738 	/* now, reflect transitivity */
739 	changes = 1;
740 	while(changes) {
741 		changes = 0;
742 		NTLOOP(i) {
743 			t = pres[i+1];
744 			for(s = pres[i]; s < t; ++s)
745 				for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) {
746 					changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset);
747 					if(!pempty[ch])
748 						break;
749 				}
750 		}
751 	}
752 
753 	NTLOOP(i)
754 		pfirst[i] = flset(&wsets[i].ws);
755 	if(!indebug)
756 		return;
757 	if(foutput != 0)
758 		NTLOOP(i) {
759 			Bprint(foutput, "\n%s: ", nontrst[i].name);
760 			prlook(pfirst[i]);
761 			Bprint(foutput, " %d\n", pempty[i]);
762 		}
763 }
764 
765 /*
766  * sorts last state,and sees if it equals earlier ones. returns state number
767  */
768 int
state(int c)769 state(int c)
770 {
771 	Item *p1, *p2, *k, *l, *q1, *q2;
772 	int size1, size2, i;
773 
774 	p1 = pstate[nstate];
775 	p2 = pstate[nstate+1];
776 	if(p1 == p2)
777 		return 0;	/* null state */
778 	/* sort the items */
779 	for(k = p2-1; k > p1; k--)	/* make k the biggest */
780 		for(l = k-1; l >= p1; --l)
781 			if(l->pitem > k->pitem) {
782 				int *s;
783 				Lkset *ss;
784 
785 				s = k->pitem;
786 				k->pitem = l->pitem;
787 				l->pitem = s;
788 				ss = k->look;
789 				k->look = l->look;
790 				l->look = ss;
791 			}
792 	size1 = p2 - p1;	/* size of state */
793 
794 	for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) {
795 		/* get ith state */
796 		q1 = pstate[i];
797 		q2 = pstate[i+1];
798 		size2 = q2 - q1;
799 		if(size1 != size2)
800 			continue;
801 		k = p1;
802 		for(l = q1; l < q2; l++) {
803 			if(l->pitem != k->pitem)
804 				break;
805 			k++;
806 		}
807 		if(l != q2)
808 			continue;
809 		/* found it */
810 		pstate[nstate+1] = pstate[nstate];	/* delete last state */
811 		/* fix up lookaheads */
812 		if(nolook)
813 			return i;
814 		for(l = q1, k = p1; l < q2; ++l, ++k ) {
815 			int s;
816 
817 			SETLOOP(s)
818 				clset.lset[s] = l->look->lset[s];
819 			if(setunion(clset.lset, k->look->lset)) {
820 				tystate[i] = MUSTDO;
821 				/* register the new set */
822 				l->look = flset( &clset );
823 			}
824 		}
825 		return i;
826 	}
827 	/* state is new */
828 	if(nolook)
829 		error("yacc state/nolook error");
830 	pstate[nstate+2] = p2;
831 	if(nstate+1 >= NSTATES)
832 		error("too many states");
833 	if(c >= NTBASE) {
834 		mstates[nstate] = ntstates[c-NTBASE];
835 		ntstates[c-NTBASE] = nstate;
836 	} else {
837 		mstates[nstate] = tstates[c];
838 		tstates[c] = nstate;
839 	}
840 	tystate[nstate] = MUSTDO;
841 	return nstate++;
842 }
843 
844 void
putitem(int * ptr,Lkset * lptr)845 putitem(int *ptr, Lkset *lptr)
846 {
847 	Item *j;
848 
849 	if(pidebug && foutput != 0)
850 		Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate);
851 	j = pstate[nstate+1];
852 	j->pitem = ptr;
853 	if(!nolook)
854 		j->look = flset(lptr);
855 	pstate[nstate+1] = ++j;
856 	if((int*)j > zzmemsz) {
857 		zzmemsz = (int*)j;
858 		if(zzmemsz >=  &mem0[MEMSIZE])
859 			error("out of state space");
860 	}
861 }
862 
863 /*
864  * mark nonterminals which derive the empty string
865  * also, look for nonterminals which don't derive any token strings
866  */
867 void
cempty(void)868 cempty(void)
869 {
870 
871 	int i, *p;
872 
873 	/* first, use the array pempty to detect productions that can never be reduced */
874 	/* set pempty to WHONOWS */
875 	aryfil(pempty, nnonter+1, WHOKNOWS);
876 
877 	/* now, look at productions, marking nonterminals which derive something */
878 more:
879 	PLOOP(0, i) {
880 		if(pempty[*prdptr[i] - NTBASE])
881 			continue;
882 		for(p = prdptr[i]+1; *p >= 0; ++p)
883 			if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS)
884 				break;
885 		/* production can be derived */
886 		if(*p < 0) {
887 			pempty[*prdptr[i]-NTBASE] = OK;
888 			goto more;
889 		}
890 	}
891 
892 	/* now, look at the nonterminals, to see if they are all OK */
893 	NTLOOP(i) {
894 		/* the added production rises or falls as the start symbol ... */
895 		if(i == 0)
896 			continue;
897 		if(pempty[i] != OK) {
898 			fatfl = 0;
899 			error("nonterminal %s never derives any token string", nontrst[i].name);
900 		}
901 	}
902 
903 	if(nerrors) {
904 		summary();
905 		cleantmp();
906 		exits("error");
907 	}
908 
909 	/* now, compute the pempty array, to see which nonterminals derive the empty string */
910 	/* set pempty to WHOKNOWS */
911 	aryfil( pempty, nnonter+1, WHOKNOWS);
912 
913 	/* loop as long as we keep finding empty nonterminals */
914 
915 again:
916 	PLOOP(1, i) {
917 		/* not known to be empty */
918 		if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) {
919 			for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p)
920 				;
921 			/* we have a nontrivially empty nonterminal */
922 			if(*p < 0) {
923 				pempty[*prdptr[i]-NTBASE] = EMPTY;
924 				/* got one ... try for another */
925 				goto again;
926 			}
927 		}
928 	}
929 }
930 
931 /*
932  * generate the states
933  */
934 void
stagen(void)935 stagen(void)
936 {
937 
938 	int c, i, j, more;
939 	Wset *p, *q;
940 
941 	/* initialize */
942 	nstate = 0;
943 
944 	/* THIS IS FUNNY from the standpoint of portability
945 	 * it represents the magic moment when the mem0 array, which has
946 	 * been holding the productions, starts to hold item pointers, of a
947 	 * different type...
948 	 * someday, alloc should be used to allocate all this stuff... for now, we
949 	 * accept that if pointers don't fit in integers, there is a problem...
950 	 */
951 
952 	pstate[0] = pstate[1] = (Item*)mem;
953 	aryfil(clset.lset, tbitset, 0);
954 	putitem(prdptr[0]+1, &clset);
955 	tystate[0] = MUSTDO;
956 	nstate = 1;
957 	pstate[2] = pstate[1];
958 
959 	aryfil(amem, ACTSIZE, 0);
960 
961 	/* now, the main state generation loop */
962 	for(more=1; more;) {
963 		more = 0;
964 		SLOOP(i) {
965 			if(tystate[i] != MUSTDO)
966 				continue;
967 			tystate[i] = DONE;
968 			aryfil(temp1, nnonter+1, 0);
969 			/* take state i, close it, and do gotos */
970 			closure(i);
971 			/* generate goto's */
972 			WSLOOP(wsets, p) {
973 				if(p->flag)
974 					continue;
975 				p->flag = 1;
976 				c = *(p->pitem);
977 				if(c <= 1) {
978 					if(pstate[i+1]-pstate[i] <= p-wsets)
979 						tystate[i] = MUSTLOOKAHEAD;
980 					continue;
981 				}
982 				/* do a goto on c */
983 				WSLOOP(p, q)
984 					/* this item contributes to the goto */
985 					if(c == *(q->pitem)) {
986 						putitem(q->pitem+1, &q->ws);
987 						q->flag = 1;
988 					}
989 				if(c < NTBASE)
990 					state(c);	/* register new state */
991 				else
992 					temp1[c-NTBASE] = state(c);
993 			}
994 			if(gsdebug && foutput != 0) {
995 				Bprint(foutput, "%d: ", i);
996 				NTLOOP(j)
997 					if(temp1[j])
998 						Bprint(foutput, "%s %d, ",
999 						nontrst[j].name, temp1[j]);
1000 				Bprint(foutput, "\n");
1001 			}
1002 			indgo[i] = apack(&temp1[1], nnonter-1) - 1;
1003 			/* do some more */
1004 			more = 1;
1005 		}
1006 	}
1007 }
1008 
1009 /*
1010  * generate the closure of state i
1011  */
1012 void
closure(int i)1013 closure(int i)
1014 {
1015 
1016 	Wset *u, *v;
1017 	Item *p, *q;
1018 	int c, ch, work, k, *pi, **s, **t;
1019 
1020 	zzclose++;
1021 
1022 	/* first, copy kernel of state i to wsets */
1023 	cwp = wsets;
1024 	ITMLOOP(i, p, q) {
1025 		cwp->pitem = p->pitem;
1026 		cwp->flag = 1;			/* this item must get closed */
1027 		SETLOOP(k)
1028 			cwp->ws.lset[k] = p->look->lset[k];
1029 		WSBUMP(cwp);
1030 	}
1031 
1032 	/* now, go through the loop, closing each item */
1033 	work = 1;
1034 	while(work) {
1035 		work = 0;
1036 		WSLOOP(wsets, u) {
1037 			if(u->flag == 0)
1038 				continue;
1039 			/* dot is before c */
1040 			c = *(u->pitem);
1041 			if(c < NTBASE) {
1042 				u->flag = 0;
1043 				/* only interesting case is where . is before nonterminal */
1044 				continue;
1045 			}
1046 
1047 			/* compute the lookahead */
1048 			aryfil(clset.lset, tbitset, 0);
1049 
1050 			/* find items involving c */
1051 			WSLOOP(u, v)
1052 				if(v->flag == 1 && *(pi=v->pitem) == c) {
1053 					v->flag = 0;
1054 					if(nolook)
1055 						continue;
1056 					while((ch = *++pi) > 0) {
1057 						/* terminal symbol */
1058 						if(ch < NTBASE) {
1059 							SETBIT(clset.lset, ch);
1060 							break;
1061 						}
1062 						/* nonterminal symbol */
1063 						setunion(clset.lset, pfirst[ch-NTBASE]->lset);
1064 						if(!pempty[ch-NTBASE])
1065 							break;
1066 					}
1067 					if(ch <= 0)
1068 						setunion(clset.lset, v->ws.lset);
1069 				}
1070 
1071 			/*
1072 			 * now loop over productions derived from c
1073 			 * c is now nonterminal number
1074 			 */
1075 			c -= NTBASE;
1076 			t = pres[c+1];
1077 			for(s = pres[c]; s < t; ++s) {
1078 				/*
1079 				 * put these items into the closure
1080 				 * is the item there
1081 				 */
1082 				WSLOOP(wsets, v)
1083 					/* yes, it is there */
1084 					if(v->pitem == *s) {
1085 						if(nolook)
1086 							goto nexts;
1087 						if(setunion(v->ws.lset, clset.lset))
1088 							v->flag = work = 1;
1089 						goto nexts;
1090 					}
1091 
1092 				/*  not there; make a new entry */
1093 				if(cwp-wsets+1 >= WSETSIZE)
1094 					error( "working set overflow");
1095 				cwp->pitem = *s;
1096 				cwp->flag = 1;
1097 				if(!nolook) {
1098 					work = 1;
1099 					SETLOOP(k) cwp->ws.lset[k] = clset.lset[k];
1100 				}
1101 				WSBUMP(cwp);
1102 
1103 			nexts:;
1104 			}
1105 		}
1106 	}
1107 
1108 	/* have computed closure; flags are reset; return */
1109 	if(cwp > zzcwp)
1110 		zzcwp = cwp;
1111 	if(cldebug && foutput != 0) {
1112 		Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook);
1113 		WSLOOP(wsets, u) {
1114 			if(u->flag)
1115 				Bprint(foutput, "flag set!\n");
1116 			u->flag = 0;
1117 			Bprint(foutput, "\t%s", writem(u->pitem));
1118 			prlook(&u->ws);
1119 			Bprint(foutput, "\n");
1120 		}
1121 	}
1122 }
1123 
1124 /*
1125  * decide if the lookahead set pointed to by p is known
1126  * return pointer to a perminent location for the set
1127  */
1128 Lkset*
flset(Lkset * p)1129 flset(Lkset *p)
1130 {
1131 	Lkset *q;
1132 	int *u, *v, *w, j;
1133 
1134 	for(q = &lkst[nlset]; q-- > lkst;) {
1135 		u = p->lset;
1136 		v = q->lset;
1137 		w = &v[tbitset];
1138 		while(v < w)
1139 			if(*u++ != *v++)
1140 				goto more;
1141 		/* we have matched */
1142 		return q;
1143 	more:;
1144 	}
1145 	/* add a new one */
1146 	q = &lkst[nlset++];
1147 	if(nlset >= LSETSIZE)
1148 		error("too many lookahead sets");
1149 	SETLOOP(j)
1150 		q->lset[j] = p->lset[j];
1151 	return q;
1152 }
1153 
1154 void
cleantmp(void)1155 cleantmp(void)
1156 {
1157 	ZAPFILE(actname);
1158 	ZAPFILE(tempname);
1159 }
1160 
1161 void
intr(void)1162 intr(void)
1163 {
1164 	cleantmp();
1165 	exits("interrupted");
1166 }
1167 
1168 void
setup(int argc,char * argv[])1169 setup(int argc, char *argv[])
1170 {
1171 	long c, t;
1172 	int i, j, lev, ty, ytab, *p;
1173 	int vflag, dflag, stem;
1174 	char actnm[8], *stemc, *cp;
1175 
1176 	ytab = 0;
1177 	vflag = 0;
1178 	dflag = 0;
1179 	stem = 0;
1180 	stemc = "y";
1181 	foutput = 0;
1182 	fdefine = 0;
1183 	fdebug = 0;
1184 	ARGBEGIN{
1185 	case 'v':
1186 	case 'V':
1187 		vflag++;
1188 		break;
1189 	case 'D':
1190 		yydebug = ARGF();
1191 		break;
1192 	case 'd':
1193 		dflag++;
1194 		break;
1195 	case 'o':
1196 		ytab++;
1197 		ytabc = ARGF();
1198 		break;
1199 	case 's':
1200 		stem++;
1201 		stemc = ARGF();
1202 		break;
1203 	case 'S':
1204 		parser = PARSERS;
1205 		break;
1206 	default:
1207 		error("illegal option: %c", ARGC());
1208 	}ARGEND
1209 
1210 	cp = getenv("ROOT");
1211 	if(cp == 0)
1212 		cp = ROOT;
1213 	snprint(par, sizeof(par), "%s/utils/lib/%s", cp, parser);
1214 	parser = par;
1215 
1216 	openup(stemc, dflag, vflag, ytab, ytabc);
1217 
1218 	ftemp = Bopen(tempname = mktemp(ttempname), OWRITE);
1219 	faction = Bopen(actname = mktemp(tactname), OWRITE);
1220 	if(ftemp == 0 || faction == 0)
1221 		error("cannot open temp file");
1222 	if(argc < 1)
1223 		error("no input file");
1224 	infile = argv[0];
1225 	finput = Bopen(infile, OREAD);
1226 	if(finput == 0)
1227 		error("cannot open '%s'", argv[0]);
1228 	cnamp = cnames;
1229 
1230 	defin(0, "$end");
1231 	extval = PRIVATE;	/* tokens start in unicode 'private use' */
1232 	defin(0, "error");
1233 	defin(1, "$accept");
1234 	defin(0, "$unk");
1235 	mem = mem0;
1236 	i = 0;
1237 
1238 	for(t = gettok(); t != MARK && t != ENDFILE;)
1239 	switch(t) {
1240 	case ';':
1241 		t = gettok();
1242 		break;
1243 
1244 	case START:
1245 		if(gettok() != IDENTIFIER)
1246 			error("bad %%start construction");
1247 		start = chfind(1, tokname);
1248 		t = gettok();
1249 		continue;
1250 
1251 	case TYPEDEF:
1252 		if(gettok() != TYPENAME)
1253 			error("bad syntax in %%type");
1254 		ty = numbval;
1255 		for(;;) {
1256 			t = gettok();
1257 			switch(t) {
1258 			case IDENTIFIER:
1259 				if((t=chfind(1, tokname)) < NTBASE) {
1260 					j = TYPE(toklev[t]);
1261 					if(j != 0 && j != ty)
1262 						error("type redeclaration of token %s",
1263 							tokset[t].name);
1264 					else
1265 						SETTYPE(toklev[t], ty);
1266 				} else {
1267 					j = nontrst[t-NTBASE].value;
1268 					if(j != 0 && j != ty)
1269 						error("type redeclaration of nonterminal %s",
1270 							nontrst[t-NTBASE].name );
1271 					else
1272 						nontrst[t-NTBASE].value = ty;
1273 				}
1274 			case ',':
1275 				continue;
1276 			case ';':
1277 				t = gettok();
1278 			default:
1279 				break;
1280 			}
1281 			break;
1282 		}
1283 		continue;
1284 
1285 	case UNION:
1286 		/* copy the union declaration to the output */
1287 		cpyunion();
1288 		t = gettok();
1289 		continue;
1290 
1291 	case LEFT:
1292 	case BINARY:
1293 	case RIGHT:
1294 		i++;
1295 
1296 	case TERM:
1297 		/* nonzero means new prec. and assoc. */
1298 		lev = t-TERM;
1299 		ty = 0;
1300 
1301 		/* get identifiers so defined */
1302 		t = gettok();
1303 
1304 		/* there is a type defined */
1305 		if(t == TYPENAME) {
1306 			ty = numbval;
1307 			t = gettok();
1308 		}
1309 		for(;;) {
1310 			switch(t) {
1311 			case ',':
1312 				t = gettok();
1313 				continue;
1314 
1315 			case ';':
1316 				break;
1317 
1318 			case IDENTIFIER:
1319 				j = chfind(0, tokname);
1320 				if(j >= NTBASE)
1321 					error("%s defined earlier as nonterminal", tokname);
1322 				if(lev) {
1323 					if(ASSOC(toklev[j]))
1324 						error("redeclaration of precedence of %s", tokname);
1325 					SETASC(toklev[j], lev);
1326 					SETPLEV(toklev[j], i);
1327 				}
1328 				if(ty) {
1329 					if(TYPE(toklev[j]))
1330 						error("redeclaration of type of %s", tokname);
1331 					SETTYPE(toklev[j],ty);
1332 				}
1333 				t = gettok();
1334 				if(t == NUMBER) {
1335 					tokset[j].value = numbval;
1336 					if(j < ndefout && j > 3)
1337 						error("please define type number of %s earlier",
1338 							tokset[j].name);
1339 					t = gettok();
1340 				}
1341 				continue;
1342 			}
1343 			break;
1344 		}
1345 		continue;
1346 
1347 	case LCURLY:
1348 		defout(0);
1349 		cpycode();
1350 		t = gettok();
1351 		continue;
1352 
1353 	default:
1354 		error("syntax error");
1355 	}
1356 	if(t == ENDFILE)
1357 		error("unexpected EOF before %%");
1358 
1359 	/* t is MARK */
1360 	Bprint(ftable, "extern	int	yyerrflag;\n");
1361 	Bprint(ftable, "#ifndef	YYMAXDEPTH\n");
1362 	Bprint(ftable, "#define	YYMAXDEPTH	150\n");
1363 	Bprint(ftable, "#endif\n" );
1364 	if(!ntypes) {
1365 		Bprint(ftable, "#ifndef	YYSTYPE\n");
1366 		Bprint(ftable, "#define	YYSTYPE	int\n");
1367 		Bprint(ftable, "#endif\n");
1368 	}
1369 	Bprint(ftable, "YYSTYPE	yylval;\n");
1370 	Bprint(ftable, "YYSTYPE	yyval;\n");
1371 
1372 	prdptr[0] = mem;
1373 
1374 	/* added production */
1375 	*mem++ = NTBASE;
1376 
1377 	/* if start is 0, we will overwrite with the lhs of the first rule */
1378 	*mem++ = start;
1379 	*mem++ = 1;
1380 	*mem++ = 0;
1381 	prdptr[1] = mem;
1382 	while((t=gettok()) == LCURLY)
1383 		cpycode();
1384 	if(t != IDENTCOLON)
1385 		error("bad syntax on first rule");
1386 
1387 	if(!start)
1388 		prdptr[0][1] = chfind(1, tokname);
1389 
1390 	/* read rules */
1391 	while(t != MARK && t != ENDFILE) {
1392 		/* process a rule */
1393 		rlines[nprod] = lineno;
1394 		if(t == '|')
1395 			*mem++ = *prdptr[nprod-1];
1396 		else
1397 			if(t == IDENTCOLON) {
1398 				*mem = chfind(1, tokname);
1399 				if(*mem < NTBASE)
1400 					error("token illegal on LHS of grammar rule");
1401 				mem++;
1402 			} else
1403 				error("illegal rule: missing semicolon or | ?");
1404 		/* read rule body */
1405 		t = gettok();
1406 
1407 	more_rule:
1408 		while(t == IDENTIFIER) {
1409 			*mem = chfind(1, tokname);
1410 			if(*mem < NTBASE)
1411 				levprd[nprod] = toklev[*mem];
1412 			mem++;
1413 			t = gettok();
1414 		}
1415 		if(t == PREC) {
1416 			if(gettok() != IDENTIFIER)
1417 				error("illegal %%prec syntax");
1418 			j = chfind(2, tokname);
1419 			if(j >= NTBASE)
1420 				error("nonterminal %s illegal after %%prec",
1421 					nontrst[j-NTBASE].name);
1422 			levprd[nprod] = toklev[j];
1423 			t = gettok();
1424 		}
1425 		if(t == '=') {
1426 			levprd[nprod] |= ACTFLAG;
1427 			Bprint(faction, "\ncase %d:", nprod);
1428 			cpyact(mem-prdptr[nprod]-1);
1429 			Bprint(faction, " break;");
1430 			if((t=gettok()) == IDENTIFIER) {
1431 
1432 				/* action within rule... */
1433 				sprint(actnm, "$$%d", nprod);
1434 
1435 				/* make it a nonterminal */
1436 				j = chfind(1, actnm);
1437 
1438 				/*
1439 				 * the current rule will become rule number nprod+1
1440 				 * move the contents down, and make room for the null
1441 				 */
1442 				for(p = mem; p >= prdptr[nprod]; --p)
1443 					p[2] = *p;
1444 				mem += 2;
1445 
1446 				/* enter null production for action */
1447 				p = prdptr[nprod];
1448 				*p++ = j;
1449 				*p++ = -nprod;
1450 
1451 				/* update the production information */
1452 				levprd[nprod+1] = levprd[nprod] & ~ACTFLAG;
1453 				levprd[nprod] = ACTFLAG;
1454 				if(++nprod >= NPROD)
1455 					error("more than %d rules", NPROD);
1456 				prdptr[nprod] = p;
1457 
1458 				/* make the action appear in the original rule */
1459 				*mem++ = j;
1460 
1461 				/* get some more of the rule */
1462 				goto more_rule;
1463 			}
1464 		}
1465 
1466 		while(t == ';')
1467 			t = gettok();
1468 		*mem++ = -nprod;
1469 
1470 		/* check that default action is reasonable */
1471 		if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].value) {
1472 
1473 			/* no explicit action, LHS has value */
1474 			int tempty;
1475 
1476 			tempty = prdptr[nprod][1];
1477 			if(tempty < 0)
1478 				error("must return a value, since LHS has a type");
1479 			else
1480 				if(tempty >= NTBASE)
1481 					tempty = nontrst[tempty-NTBASE].value;
1482 				else
1483 					tempty = TYPE(toklev[tempty]);
1484 			if(tempty != nontrst[*prdptr[nprod]-NTBASE].value)
1485 				error("default action causes potential type clash");
1486 		}
1487 		nprod++;
1488 		if(nprod >= NPROD)
1489 			error("more than %d rules", NPROD);
1490 		prdptr[nprod] = mem;
1491 		levprd[nprod] = 0;
1492 	}
1493 
1494 	/* end of all rules */
1495 	defout(1);
1496 
1497 	finact();
1498 	if(t == MARK) {
1499 		Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1500 		while((c=Bgetrune(finput)) != Beof)
1501 			Bputrune(ftable, c);
1502 	}
1503 	Bterm(finput);
1504 }
1505 
1506 /*
1507  * finish action routine
1508  */
1509 void
finact(void)1510 finact(void)
1511 {
1512 
1513 	Bterm(faction);
1514 	Bprint(ftable, "#define YYEOFCODE %d\n", 1);
1515 	Bprint(ftable, "#define YYERRCODE %d\n", 2);
1516 }
1517 
1518 /*
1519  * define s to be a terminal if t=0
1520  * or a nonterminal if t=1
1521  */
1522 int
defin(int nt,char * s)1523 defin(int nt, char *s)
1524 {
1525 	int val;
1526 	Rune rune;
1527 
1528 	val = 0;
1529 	if(nt) {
1530 		nnonter++;
1531 		if(nnonter >= NNONTERM)
1532 			error("too many nonterminals, limit %d",NNONTERM);
1533 		nontrst[nnonter].name = cstash(s);
1534 		return NTBASE + nnonter;
1535 	}
1536 
1537 	/* must be a token */
1538 	ntokens++;
1539 	if(ntokens >= NTERMS)
1540 		error("too many terminals, limit %d", NTERMS);
1541 	tokset[ntokens].name = cstash(s);
1542 
1543 	/* establish value for token */
1544 	/* single character literal */
1545 	if(s[0] == ' ') {
1546 		val = chartorune(&rune, &s[1]);
1547 		if(s[val+1] == 0) {
1548 			val = rune;
1549 			goto out;
1550 		}
1551 	}
1552 
1553 	/* escape sequence */
1554 	if(s[0] == ' ' && s[1] == '\\') {
1555 		if(s[3] == 0) {
1556 			/* single character escape sequence */
1557 			switch(s[2]) {
1558 			case 'n':	val = '\n'; break;
1559 			case 'r':	val = '\r'; break;
1560 			case 'b':	val = '\b'; break;
1561 			case 't':	val = '\t'; break;
1562 			case 'f':	val = '\f'; break;
1563 			case '\'':	val = '\''; break;
1564 			case '"':	val = '"'; break;
1565 			case '\\':	val = '\\'; break;
1566 			default:	error("invalid escape");
1567 			}
1568 			goto out;
1569 		}
1570 
1571 		/* \nnn sequence */
1572 		if(s[2] >= '0' && s[2] <= '7') {
1573 			if(s[3] < '0' ||
1574 			   s[3] > '7' ||
1575 			   s[4] < '0' ||
1576 			   s[4] > '7' ||
1577 			   s[5] != 0)
1578 				error("illegal \\nnn construction");
1579 			val = 64*s[2] + 8*s[3] + s[4] - 73*'0';
1580 			if(val == 0)
1581 				error("'\\000' is illegal");
1582 			goto out;
1583 		}
1584 		error("unknown escape");
1585 	}
1586 	val = extval++;
1587 
1588 out:
1589 	tokset[ntokens].value = val;
1590 	toklev[ntokens] = 0;
1591 	return ntokens;
1592 }
1593 
1594 /*
1595  * write out the defines (at the end of the declaration section)
1596  */
1597 void
defout(int last)1598 defout(int last)
1599 {
1600 	int i, c;
1601 	char sar[NAMESIZE+10];
1602 
1603 	for(i=ndefout; i<=ntokens; i++) {
1604 		/* non-literals */
1605 		c = tokset[i].name[0];
1606 		if(c != ' ' && c != '$') {
1607 			Bprint(ftable, "#define	%s	%d\n",
1608 				tokset[i].name, tokset[i].value);
1609 			if(fdefine)
1610 				Bprint(fdefine, "#define\t%s\t%d\n",
1611 					tokset[i].name, tokset[i].value);
1612 		}
1613 	}
1614 	ndefout = ntokens+1;
1615 	if(last && fdebug) {
1616 		Bprint(fdebug, "char*	yytoknames[] =\n{\n");
1617 		TLOOP(i) {
1618 			if(tokset[i].name) {
1619 				chcopy(sar, tokset[i].name);
1620 				Bprint(fdebug, "\t\"%s\",\n", sar);
1621 				continue;
1622 			}
1623 			Bprint(fdebug, "\t0,\n");
1624 		}
1625 		Bprint(fdebug, "};\n");
1626 	}
1627 }
1628 
1629 char*
cstash(char * s)1630 cstash(char *s)
1631 {
1632 	char *temp;
1633 
1634 	temp = cnamp;
1635 	do {
1636 		if(cnamp >= &cnames[cnamsz])
1637 			error("too many characters in id's and literals");
1638 		else
1639 			*cnamp++ = *s;
1640 	} while(*s++);
1641 	return temp;
1642 }
1643 
1644 long
gettok(void)1645 gettok(void)
1646 {
1647 	long c;
1648 	Rune rune;
1649 	int i, base, match, reserve;
1650 	static int peekline;
1651 
1652 begin:
1653 	reserve = 0;
1654 	lineno += peekline;
1655 	peekline = 0;
1656 	c = Bgetrune(finput);
1657 	while(c == ' ' || c == '\n' || c == '\t' || c == '\f' || c == '\r') {
1658 		if(c == '\n')
1659 			lineno++;
1660 		c = Bgetrune(finput);
1661 	}
1662 
1663 	/* skip comment */
1664 	if(c == '/') {
1665 		lineno += skipcom();
1666 		goto begin;
1667 	}
1668 	switch(c) {
1669 	case Beof:
1670 		return ENDFILE;
1671 
1672 	case '{':
1673 		Bungetrune(finput);
1674 		return '=';
1675 
1676 	case '<':
1677 		/* get, and look up, a type name (union member name) */
1678 		i = 0;
1679 		while((c=Bgetrune(finput)) != '>' && c >= 0 && c != '\n' && c != '\r') {
1680 			rune = c;
1681 			c = runetochar(&tokname[i], &rune);
1682 			if(i < NAMESIZE)
1683 				i += c;
1684 		}
1685 		if(c != '>')
1686 			error("unterminated < ... > clause");
1687 		tokname[i] = 0;
1688 		for(i=1; i<=ntypes; i++)
1689 			if(!strcmp(typeset[i], tokname)) {
1690 				numbval = i;
1691 				return TYPENAME;
1692 			}
1693 		ntypes++;
1694 		numbval = ntypes;
1695 		typeset[numbval] = cstash(tokname);
1696 		return TYPENAME;
1697 
1698 	case '"':
1699 	case '\'':
1700 		match = c;
1701 		tokname[0] = ' ';
1702 		i = 1;
1703 		for(;;) {
1704 			c = Bgetrune(finput);
1705 			if(c == '\n' || c == '\r' || c <= 0)
1706 				error("illegal or missing ' or \"" );
1707 			if(c == '\\') {
1708 				tokname[i] = '\\';
1709 				if(i < NAMESIZE)
1710 					i++;
1711 				c = Bgetrune(finput);
1712 			} else
1713 				if(c == match)
1714 					break;
1715 			rune = c;
1716 			c = runetochar(&tokname[i], &rune);
1717 			if(i < NAMESIZE)
1718 				i += c;
1719 		}
1720 		break;
1721 
1722 	case '%':
1723 	case '\\':
1724 		switch(c = Bgetrune(finput)) {
1725 		case '0':	return TERM;
1726 		case '<':	return LEFT;
1727 		case '2':	return BINARY;
1728 		case '>':	return RIGHT;
1729 		case '%':
1730 		case '\\':	return MARK;
1731 		case '=':	return PREC;
1732 		case '{':	return LCURLY;
1733 		default:	reserve = 1;
1734 		}
1735 
1736 	default:
1737 		/* number */
1738 		if(isdigit(c)) {
1739 			numbval = c-'0';
1740 			base = (c=='0')? 8: 10;
1741 			for(c = Bgetrune(finput); isdigit(c); c = Bgetrune(finput))
1742 				numbval = numbval*base + (c-'0');
1743 			Bungetrune(finput);
1744 			return NUMBER;
1745 		}
1746 		if(islower(c) || isupper(c) || c=='_' || c=='.' || c=='$')  {
1747 			i = 0;
1748 			while(islower(c) || isupper(c) || isdigit(c) ||
1749 			    c == '-' || c=='_' || c=='.' || c=='$') {
1750 				if(reserve && isupper(c))
1751 					c += 'a'-'A';
1752 				rune = c;
1753 				c = runetochar(&tokname[i], &rune);
1754 				if(i < NAMESIZE)
1755 					i += c;
1756 				c = Bgetrune(finput);
1757 			}
1758 		} else
1759 			return c;
1760 		Bungetrune(finput);
1761 	}
1762 	tokname[i] = 0;
1763 
1764 	/* find a reserved word */
1765 	if(reserve) {
1766 		for(c=0; resrv[c].name; c++)
1767 			if(strcmp(tokname, resrv[c].name) == 0)
1768 				return resrv[c].value;
1769 		error("invalid escape, or illegal reserved word: %s", tokname);
1770 	}
1771 
1772 	/* look ahead to distinguish IDENTIFIER from IDENTCOLON */
1773 	c = Bgetrune(finput);
1774 	while(c == ' ' || c == '\t'|| c == '\n' || c == '\r' || c == '\f' || c == '/') {
1775 		if(c == '\n')
1776 			peekline++;
1777 		/* look for comments */
1778 		if(c == '/')
1779 			peekline += skipcom();
1780 		c = Bgetrune(finput);
1781 	}
1782 	if(c == ':')
1783 		return IDENTCOLON;
1784 	Bungetrune(finput);
1785 	return IDENTIFIER;
1786 }
1787 
1788 /*
1789  * determine the type of a symbol
1790  */
1791 int
fdtype(int t)1792 fdtype(int t)
1793 {
1794 	int v;
1795 
1796 	if(t >= NTBASE)
1797 		v = nontrst[t-NTBASE].value;
1798 	else
1799 		v = TYPE(toklev[t]);
1800 	if(v <= 0)
1801 		error("must specify type for %s", (t>=NTBASE)?
1802 			nontrst[t-NTBASE].name: tokset[t].name);
1803 	return v;
1804 }
1805 
1806 int
chfind(int t,char * s)1807 chfind(int t, char *s)
1808 {
1809 	int i;
1810 
1811 	if(s[0] == ' ')
1812 		t = 0;
1813 	TLOOP(i)
1814 		if(!strcmp(s, tokset[i].name))
1815 			return i;
1816 	NTLOOP(i)
1817 		if(!strcmp(s, nontrst[i].name))
1818 			return NTBASE+i;
1819 
1820 	/* cannot find name */
1821 	if(t > 1)
1822 		error("%s should have been defined earlier", s);
1823 	return defin(t, s);
1824 }
1825 
1826 /*
1827  * copy the union declaration to the output, and the define file if present
1828  */
1829 void
cpyunion(void)1830 cpyunion(void)
1831 {
1832 	long c;
1833 	int level;
1834 
1835 	Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1836 	Bprint(ftable, "typedef union ");
1837 	if(fdefine != 0)
1838 		Bprint(fdefine, "\ntypedef union ");
1839 
1840 	level = 0;
1841 	for(;;) {
1842 		if((c=Bgetrune(finput)) == Beof)
1843 			error("EOF encountered while processing %%union");
1844 		Bputrune(ftable, c);
1845 		if(fdefine != 0)
1846 			Bputrune(fdefine, c);
1847 		switch(c) {
1848 		case '\n':
1849 			lineno++;
1850 			break;
1851 		case '{':
1852 			level++;
1853 			break;
1854 		case '}':
1855 			level--;
1856 
1857 			/* we are finished copying */
1858 			if(level == 0) {
1859 				Bprint(ftable, " YYSTYPE;\n");
1860 				if(fdefine != 0)
1861 					Bprint(fdefine, "\tYYSTYPE;\nextern\tYYSTYPE\tyylval;\n");
1862 				return;
1863 			}
1864 		}
1865 	}
1866 }
1867 
1868 /*
1869  * copies code between \{ and \}
1870  */
1871 void
cpycode(void)1872 cpycode(void)
1873 {
1874 
1875 	long c;
1876 
1877 	c = Bgetrune(finput);
1878 	if(c == '\n') {
1879 		c = Bgetrune(finput);
1880 		lineno++;
1881 	}
1882 	Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1883 	while(c != Beof) {
1884 		if(c == '\\') {
1885 			if((c=Bgetrune(finput)) == '}')
1886 				return;
1887 			Bputc(ftable, '\\');
1888 		}
1889 		if(c == '%') {
1890 			if((c=Bgetrune(finput)) == '}')
1891 				return;
1892 			Bputc(ftable, '%');
1893 		}
1894 		Bputrune(ftable, c);
1895 		if(c == '\n')
1896 			lineno++;
1897 		c = Bgetrune(finput);
1898 	}
1899 	error("eof before %%}");
1900 }
1901 
1902 /*
1903  * skip over comments
1904  * skipcom is called after reading a '/'
1905  */
1906 int
skipcom(void)1907 skipcom(void)
1908 {
1909 	long c;
1910 	int i;
1911 
1912 	/* i is the number of lines skipped */
1913 	i = 0;
1914 	if(Bgetrune(finput) != '*')
1915 		error("illegal comment");
1916 	c = Bgetrune(finput);
1917 	while(c != Beof) {
1918 		while(c == '*')
1919 			if((c=Bgetrune(finput)) == '/')
1920 				return i;
1921 		if(c == '\n')
1922 			i++;
1923 		c = Bgetrune(finput);
1924 	}
1925 	error("EOF inside comment");
1926 	return 0;
1927 }
1928 
1929 /*
1930  * copy C action to the next ; or closing }
1931  */
1932 void
cpyact(int offset)1933 cpyact(int offset)
1934 {
1935 	long c;
1936 	int brac, match, j, s, fnd, tok;
1937 
1938 	Bprint(faction, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1939 	brac = 0;
1940 
1941 loop:
1942 	c = Bgetrune(finput);
1943 swt:
1944 	switch(c) {
1945 	case ';':
1946 		if(brac == 0) {
1947 			Bputrune(faction, c);
1948 			return;
1949 		}
1950 		goto lcopy;
1951 
1952 	case '{':
1953 		brac++;
1954 		goto lcopy;
1955 
1956 	case '$':
1957 		s = 1;
1958 		tok = -1;
1959 		c = Bgetrune(finput);
1960 
1961 		/* type description */
1962 		if(c == '<') {
1963 			Bungetrune(finput);
1964 			if(gettok() != TYPENAME)
1965 				error("bad syntax on $<ident> clause");
1966 			tok = numbval;
1967 			c = Bgetrune(finput);
1968 		}
1969 		if(c == '$') {
1970 			Bprint(faction, "yyval");
1971 
1972 			/* put out the proper tag... */
1973 			if(ntypes) {
1974 				if(tok < 0)
1975 					tok = fdtype(*prdptr[nprod]);
1976 				Bprint(faction, ".%s", typeset[tok]);
1977 			}
1978 			goto loop;
1979 		}
1980 		if(c == '-') {
1981 			s = -s;
1982 			c = Bgetrune(finput);
1983 		}
1984 		if(isdigit(c)) {
1985 			j = 0;
1986 			while(isdigit(c)) {
1987 				j = j*10 + (c-'0');
1988 				c = Bgetrune(finput);
1989 			}
1990 			Bungetrune(finput);
1991 			j = j*s - offset;
1992 			if(j > 0)
1993 				error("Illegal use of $%d", j+offset);
1994 
1995 		dollar:
1996 			Bprint(faction, "yypt[-%d].yyv", -j);
1997 
1998 			/* put out the proper tag */
1999 			if(ntypes) {
2000 				if(j+offset <= 0 && tok < 0)
2001 					error("must specify type of $%d", j+offset);
2002 				if(tok < 0)
2003 					tok = fdtype(prdptr[nprod][j+offset]);
2004 				Bprint(faction, ".%s", typeset[tok]);
2005 			}
2006 			goto loop;
2007 		}
2008 		if(isupper(c) || islower(c) || c == '_' || c == '.') {
2009 			int tok; /* tok used oustide for type info */
2010 
2011 			/* look for $name */
2012 			Bungetrune(finput);
2013 			if(gettok() != IDENTIFIER)
2014 				error("$ must be followed by an identifier");
2015 			tok = chfind(2, tokname);
2016 			if((c = Bgetrune(finput)) != '#') {
2017 				Bungetrune(finput);
2018 				fnd = -1;
2019 			} else
2020 				if(gettok() != NUMBER) {
2021 					error("# must be followed by number");
2022 					fnd = -1;
2023 				} else
2024 					fnd = numbval;
2025 			for(j=1; j<=offset; ++j)
2026 				if(tok == prdptr[nprod][j]) {
2027 					if(--fnd <= 0) {
2028 						j -= offset;
2029 						goto dollar;
2030 					}
2031 				}
2032 			error("$name or $name#number not found");
2033 		}
2034 		Bputc(faction, '$');
2035 		if(s < 0 )
2036 			Bputc(faction, '-');
2037 		goto swt;
2038 
2039 	case '}':
2040 		brac--;
2041 		if(brac)
2042 			goto lcopy;
2043 		Bputrune(faction, c);
2044 		return;
2045 
2046 	case '/':
2047 		/* look for comments */
2048 		Bputrune(faction, c);
2049 		c = Bgetrune(finput);
2050 		if(c != '*')
2051 			goto swt;
2052 
2053 		/* it really is a comment */
2054 		Bputrune(faction, c);
2055 		c = Bgetrune(finput);
2056 		while(c >= 0) {
2057 			while(c == '*') {
2058 				Bputrune(faction, c);
2059 				if((c=Bgetrune(finput)) == '/')
2060 					goto lcopy;
2061 			}
2062 			Bputrune(faction, c);
2063 			if(c == '\n')
2064 				lineno++;
2065 			c = Bgetrune(finput);
2066 		}
2067 		error("EOF inside comment");
2068 
2069 	case '\'':
2070 		/* character constant */
2071 		match = '\'';
2072 		goto string;
2073 
2074 	case '"':
2075 		/* character string */
2076 		match = '"';
2077 
2078 	string:
2079 		Bputrune(faction, c);
2080 		while(c = Bgetrune(finput)) {
2081 			if(c == '\\') {
2082 				Bputrune(faction, c);
2083 				c = Bgetrune(finput);
2084 				if(c == '\n')
2085 					lineno++;
2086 			} else
2087 				if(c == match)
2088 					goto lcopy;
2089 				if(c == '\n')
2090 					error("newline in string or char. const.");
2091 			Bputrune(faction, c);
2092 		}
2093 		error("EOF in string or character constant");
2094 
2095 	case Beof:
2096 		error("action does not terminate");
2097 
2098 	case '\n':
2099 		lineno++;
2100 		goto lcopy;
2101 	}
2102 
2103 lcopy:
2104 	Bputrune(faction, c);
2105 	goto loop;
2106 }
2107 
2108 void
openup(char * stem,int dflag,int vflag,int ytab,char * ytabc)2109 openup(char *stem, int dflag, int vflag, int ytab, char *ytabc)
2110 {
2111 	char buf[256];
2112 
2113 	if(vflag) {
2114 		sprint(buf, "%s.%s", stem, FILEU);
2115 		foutput = Bopen(buf, OWRITE);
2116 		if(foutput == 0)
2117 			error("cannot open %s", buf);
2118 	}
2119 	if(yydebug) {
2120 		sprint(buf, "%s.%s", stem, FILEDEBUG);
2121 		if((fdebug = Bopen(buf, OWRITE)) == 0)
2122 			error("can't open %s", buf);
2123 	}
2124 	if(dflag) {
2125 		sprint(buf, "%s.%s", stem, FILED);
2126 		fdefine = Bopen(buf, OWRITE);
2127 		if(fdefine == 0)
2128 			error("can't create %s", buf);
2129 	}
2130 	if(ytab == 0)
2131 		sprint(buf, "%s.%s", stem, OFILE);
2132 	else
2133 		strcpy(buf, ytabc);
2134 	ftable = Bopen(buf, OWRITE);
2135 	if(ftable == 0)
2136 		error("cannot open table file %s", buf);
2137 }
2138 
2139 /*
2140  * print the output for the states
2141  */
2142 void
output(void)2143 output(void)
2144 {
2145 	int i, k, c;
2146 	Wset *u, *v;
2147 
2148 	Bprint(ftable, "short	yyexca[] =\n{");
2149 	if(fdebug)
2150 		Bprint(fdebug, "char*	yystates[] =\n{\n");
2151 
2152 	/* output the stuff for state i */
2153 	SLOOP(i) {
2154 		nolook = tystate[i]!=MUSTLOOKAHEAD;
2155 		closure(i);
2156 
2157 		/* output actions */
2158 		nolook = 1;
2159 		aryfil(temp1, ntokens+nnonter+1, 0);
2160 		WSLOOP(wsets, u) {
2161 			c = *(u->pitem);
2162 			if(c > 1 && c < NTBASE && temp1[c] == 0) {
2163 				WSLOOP(u, v)
2164 					if(c == *(v->pitem))
2165 						putitem(v->pitem+1, (Lkset*)0);
2166 				temp1[c] = state(c);
2167 			} else
2168 				if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0)
2169 					temp1[c+ntokens] = amem[indgo[i]+c];
2170 		}
2171 		if(i == 1)
2172 			temp1[1] = ACCEPTCODE;
2173 
2174 		/* now, we have the shifts; look at the reductions */
2175 		lastred = 0;
2176 		WSLOOP(wsets, u) {
2177 			c = *u->pitem;
2178 
2179 			/* reduction */
2180 			if(c <= 0) {
2181 				lastred = -c;
2182 				TLOOP(k)
2183 					if(BIT(u->ws.lset, k)) {
2184 						if(temp1[k] == 0)
2185 							temp1[k] = c;
2186 						else
2187 						if(temp1[k] < 0) { /* reduce/reduce conflict */
2188 							if(foutput)
2189 								Bprint(foutput,
2190 									"\n%d: reduce/reduce conflict"
2191 									" (red'ns %d and %d ) on %s",
2192 									i, -temp1[k], lastred,
2193 									symnam(k));
2194 							if(-temp1[k] > lastred)
2195 								temp1[k] = -lastred;
2196 							zzrrconf++;
2197 						} else
2198 							/* potential shift/reduce conflict */
2199 							precftn( lastred, k, i );
2200 					}
2201 				}
2202 		}
2203 		wract(i);
2204 	}
2205 
2206 	if(fdebug)
2207 		Bprint(fdebug, "};\n");
2208 	Bprint(ftable, "};\n");
2209 	Bprint(ftable, "#define	YYNPROD	%d\n", nprod);
2210 	Bprint(ftable, "#define	YYPRIVATE %d\n", PRIVATE);
2211 	if(yydebug)
2212 		Bprint(ftable, "#define	yydebug	%s\n", yydebug);
2213 }
2214 
2215 /*
2216  * pack state i from temp1 into amem
2217  */
2218 int
apack(int * p,int n)2219 apack(int *p, int n)
2220 {
2221 	int *pp, *qq, *rr, off, *q, *r;
2222 
2223 	/* we don't need to worry about checking because
2224 	 * we will only look at entries known to be there...
2225 	 * eliminate leading and trailing 0's
2226 	 */
2227 
2228 	q = p+n;
2229 	for(pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off)
2230 		;
2231  	/* no actions */
2232 	if(pp > q)
2233 		return 0;
2234 	p = pp;
2235 
2236 	/* now, find a place for the elements from p to q, inclusive */
2237 	r = &amem[ACTSIZE-1];
2238 	for(rr = amem; rr <= r; rr++, off++) {
2239 		for(qq = rr, pp = p; pp <= q; pp++, qq++)
2240 			if(*pp != 0)
2241 				if(*pp != *qq && *qq != 0)
2242 					goto nextk;
2243 
2244 		/* we have found an acceptable k */
2245 		if(pkdebug && foutput != 0)
2246 			Bprint(foutput, "off = %d, k = %d\n", off, (int)(rr-amem));
2247 		for(qq = rr, pp = p; pp <= q; pp++, qq++)
2248 			if(*pp) {
2249 				if(qq > r)
2250 					error("action table overflow");
2251 				if(qq > memp)
2252 					memp = qq;
2253 				*qq = *pp;
2254 			}
2255 		if(pkdebug && foutput != 0)
2256 			for(pp = amem; pp <= memp; pp += 10) {
2257 				Bprint(foutput, "\t");
2258 				for(qq = pp; qq <= pp+9; qq++)
2259 					Bprint(foutput, "%d ", *qq);
2260 				Bprint(foutput, "\n");
2261 			}
2262 		return(off);
2263 	nextk:;
2264 	}
2265 	error("no space in action table");
2266 	return 0;
2267 }
2268 
2269 /*
2270  * output the gotos for the nontermninals
2271  */
2272 void
go2out(void)2273 go2out(void)
2274 {
2275 	int i, j, k, best, count, cbest, times;
2276 
2277 	/* mark begining of gotos */
2278 	Bprint(ftemp, "$\n");
2279 	for(i = 1; i <= nnonter; i++) {
2280 		go2gen(i);
2281 
2282 		/* find the best one to make default */
2283 		best = -1;
2284 		times = 0;
2285 
2286 		/* is j the most frequent */
2287 		for(j = 0; j <= nstate; j++) {
2288 			if(tystate[j] == 0)
2289 				continue;
2290 			if(tystate[j] == best)
2291 				continue;
2292 
2293 			/* is tystate[j] the most frequent */
2294 			count = 0;
2295 			cbest = tystate[j];
2296 			for(k = j; k <= nstate; k++)
2297 				if(tystate[k] == cbest)
2298 					count++;
2299 			if(count > times) {
2300 				best = cbest;
2301 				times = count;
2302 			}
2303 		}
2304 
2305 		/* best is now the default entry */
2306 		zzgobest += times-1;
2307 		for(j = 0; j <= nstate; j++)
2308 			if(tystate[j] != 0 && tystate[j] != best) {
2309 				Bprint(ftemp, "%d,%d,", j, tystate[j]);
2310 				zzgoent++;
2311 			}
2312 
2313 		/* now, the default */
2314 		if(best == -1)
2315 			best = 0;
2316 		zzgoent++;
2317 		Bprint(ftemp, "%d\n", best);
2318 	}
2319 }
2320 
2321 /*
2322  * output the gotos for nonterminal c
2323  */
2324 void
go2gen(int c)2325 go2gen(int c)
2326 {
2327 	int i, work, cc;
2328 	Item *p, *q;
2329 
2330 
2331 	/* first, find nonterminals with gotos on c */
2332 	aryfil(temp1, nnonter+1, 0);
2333 	temp1[c] = 1;
2334 	work = 1;
2335 	while(work) {
2336 		work = 0;
2337 		PLOOP(0, i)
2338 
2339 			/* cc is a nonterminal */
2340 			if((cc=prdptr[i][1]-NTBASE) >= 0)
2341 				/* cc has a goto on c */
2342 				if(temp1[cc] != 0) {
2343 
2344 					/* thus, the left side of production i does too */
2345 					cc = *prdptr[i]-NTBASE;
2346 					if(temp1[cc] == 0) {
2347 						  work = 1;
2348 						  temp1[cc] = 1;
2349 					}
2350 				}
2351 	}
2352 
2353 	/* now, we have temp1[c] = 1 if a goto on c in closure of cc */
2354 	if(g2debug && foutput != 0) {
2355 		Bprint(foutput, "%s: gotos on ", nontrst[c].name);
2356 		NTLOOP(i)
2357 			if(temp1[i])
2358 				Bprint(foutput, "%s ", nontrst[i].name);
2359 		Bprint(foutput, "\n");
2360 	}
2361 
2362 	/* now, go through and put gotos into tystate */
2363 	aryfil(tystate, nstate, 0);
2364 	SLOOP(i)
2365 		ITMLOOP(i, p, q)
2366 			if((cc = *p->pitem) >= NTBASE)
2367 				/* goto on c is possible */
2368 				if(temp1[cc-NTBASE]) {
2369 					tystate[i] = amem[indgo[i]+c];
2370 					break;
2371 				}
2372 }
2373 
2374 /*
2375  * decide a shift/reduce conflict by precedence.
2376  * r is a rule number, t a token number
2377  * the conflict is in state s
2378  * temp1[t] is changed to reflect the action
2379  */
2380 void
precftn(int r,int t,int s)2381 precftn(int r, int t, int s)
2382 {
2383 	int lp, lt, action;
2384 
2385 	lp = levprd[r];
2386 	lt = toklev[t];
2387 	if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) {
2388 
2389 		/* conflict */
2390 		if(foutput != 0)
2391 			Bprint(foutput,
2392 				"\n%d: shift/reduce conflict (shift %d(%d), red'n %d(%d)) on %s",
2393 				s, temp1[t], PLEVEL(lt), r, PLEVEL(lp), symnam(t));
2394 		zzsrconf++;
2395 		return;
2396 	}
2397 	if(PLEVEL(lt) == PLEVEL(lp))
2398 		action = ASSOC(lt);
2399 	else
2400 		if(PLEVEL(lt) > PLEVEL(lp))
2401 			action = RASC;  /* shift */
2402 		else
2403 			action = LASC;  /* reduce */
2404 	switch(action) {
2405 	case BASC:  /* error action */
2406 		temp1[t] = ERRCODE;
2407 		break;
2408 	case LASC:  /* reduce */
2409 		temp1[t] = -r;
2410 		break;
2411 	}
2412 }
2413 
2414 /*
2415  * output state i
2416  * temp1 has the actions, lastred the default
2417  */
2418 void
wract(int i)2419 wract(int i)
2420 {
2421 	int p, p0, p1, ntimes, tred, count, j, flag;
2422 
2423 	/* find the best choice for lastred */
2424 	lastred = 0;
2425 	ntimes = 0;
2426 	TLOOP(j) {
2427 		if(temp1[j] >= 0)
2428 			continue;
2429 		if(temp1[j]+lastred == 0)
2430 			continue;
2431 		/* count the number of appearances of temp1[j] */
2432 		count = 0;
2433 		tred = -temp1[j];
2434 		levprd[tred] |= REDFLAG;
2435 		TLOOP(p)
2436 			if(temp1[p]+tred == 0)
2437 				count++;
2438 		if(count > ntimes) {
2439 			lastred = tred;
2440 			ntimes = count;
2441 		}
2442 	}
2443 
2444 	/*
2445 	 * for error recovery, arrange that, if there is a shift on the
2446 	 * error recovery token, `error', that the default be the error action
2447 	 */
2448 	if(temp1[2] > 0)
2449 		lastred = 0;
2450 
2451 	/* clear out entries in temp1 which equal lastred */
2452 	TLOOP(p)
2453 		if(temp1[p]+lastred == 0)
2454 			temp1[p] = 0;
2455 
2456 	wrstate(i);
2457 	defact[i] = lastred;
2458 	flag = 0;
2459 	TLOOP(p0)
2460 		if((p1=temp1[p0]) != 0) {
2461 			if(p1 < 0) {
2462 				p1 = -p1;
2463 				goto exc;
2464 			}
2465 			if(p1 == ACCEPTCODE) {
2466 				p1 = -1;
2467 				goto exc;
2468 			}
2469 			if(p1 == ERRCODE) {
2470 				p1 = 0;
2471 			exc:
2472 				if(flag++ == 0)
2473 					Bprint(ftable, "-1, %d,\n", i);
2474 				Bprint(ftable, "\t%d, %d,\n", p0, p1);
2475 				zzexcp++;
2476 				continue;
2477 			}
2478 			Bprint(ftemp, "%d,%d,", p0, p1);
2479 			zzacent++;
2480 		}
2481 	if(flag) {
2482 		defact[i] = -2;
2483 		Bprint(ftable, "\t-2, %d,\n", lastred);
2484 	}
2485 	Bprint(ftemp, "\n");
2486 }
2487 
2488 /*
2489  * writes state i
2490  */
2491 void
wrstate(int i)2492 wrstate(int i)
2493 {
2494 	int j0, j1;
2495 	Item *pp, *qq;
2496 	Wset *u;
2497 
2498 	if(fdebug) {
2499 		if(lastred) {
2500 			Bprint(fdebug, "	0, /*%d*/\n", i);
2501 		} else {
2502 			Bprint(fdebug, "	\"");
2503 			ITMLOOP(i, pp, qq)
2504 				Bprint(fdebug, "%s\\n", writem(pp->pitem));
2505 			if(tystate[i] == MUSTLOOKAHEAD)
2506 				WSLOOP(wsets + (pstate[i+1] - pstate[i]), u)
2507 					if(*u->pitem < 0)
2508 						Bprint(fdebug, "%s\\n", writem(u->pitem));
2509 			Bprint(fdebug, "\", /*%d*/\n", i);
2510 		}
2511 	}
2512 	if(foutput == 0)
2513 		return;
2514 	Bprint(foutput, "\nstate %d\n", i);
2515 	ITMLOOP(i, pp, qq)
2516 		Bprint(foutput, "\t%s\n", writem(pp->pitem));
2517 	if(tystate[i] == MUSTLOOKAHEAD)
2518 		/* print out empty productions in closure */
2519 		WSLOOP(wsets+(pstate[i+1]-pstate[i]), u)
2520 			if(*u->pitem < 0)
2521 				Bprint(foutput, "\t%s\n", writem(u->pitem));
2522 
2523 	/* check for state equal to another */
2524 	TLOOP(j0)
2525 		if((j1=temp1[j0]) != 0) {
2526 			Bprint(foutput, "\n\t%s  ", symnam(j0));
2527 			/* shift, error, or accept */
2528 			if(j1 > 0) {
2529 				if(j1 == ACCEPTCODE)
2530 					Bprint(foutput,  "accept");
2531 				else
2532 					if(j1 == ERRCODE)
2533 						Bprint(foutput, "error");
2534 					else
2535 						Bprint(foutput, "shift %d", j1);
2536 			} else
2537 				Bprint(foutput, "reduce %d (src line %d)", -j1, rlines[-j1]);
2538 		}
2539 
2540 	/* output the final production */
2541 	if(lastred)
2542 		Bprint(foutput, "\n\t.  reduce %d (src line %d)\n\n",
2543 			lastred, rlines[lastred]);
2544 	else
2545 		Bprint(foutput, "\n\t.  error\n\n");
2546 
2547 	/* now, output nonterminal actions */
2548 	j1 = ntokens;
2549 	for(j0 = 1; j0 <= nnonter; j0++) {
2550 		j1++;
2551 		if(temp1[j1])
2552 			Bprint(foutput, "\t%s  goto %d\n", symnam(j0+NTBASE), temp1[j1]);
2553 	}
2554 }
2555 
2556 void
warray(char * s,int * v,int n)2557 warray(char *s, int *v, int n)
2558 {
2559 	int i;
2560 
2561 	Bprint(ftable, "short	%s[] =\n{", s);
2562 	for(i=0;;) {
2563 		if(i%10 == 0)
2564 			Bprint(ftable, "\n");
2565 		Bprint(ftable, "%4d", v[i]);
2566 		i++;
2567 		if(i >= n) {
2568 			Bprint(ftable, "\n};\n");
2569 			break;
2570 		}
2571 		Bprint(ftable, ",");
2572 	}
2573 }
2574 
2575 /*
2576  * in order to free up the mem and amem arrays for the optimizer,
2577  * and still be able to output yyr1, etc., after the sizes of
2578  * the action array is known, we hide the nonterminals
2579  * derived by productions in levprd.
2580  */
2581 
2582 void
hideprod(void)2583 hideprod(void)
2584 {
2585 	int i, j;
2586 
2587 	j = 0;
2588 	levprd[0] = 0;
2589 	PLOOP(1, i) {
2590 		if(!(levprd[i] & REDFLAG)) {
2591 			j++;
2592 			if(foutput != 0)
2593 				Bprint(foutput, "Rule not reduced:   %s\n", writem(prdptr[i]));
2594 		}
2595 		levprd[i] = *prdptr[i] - NTBASE;
2596 	}
2597 	if(j)
2598 		print("%d rules never reduced\n", j);
2599 }
2600 
2601 void
callopt(void)2602 callopt(void)
2603 {
2604 	int i, *p, j, k, *q;
2605 
2606 	/* read the arrays from tempfile and set parameters */
2607 	finput = Bopen(tempname, OREAD);
2608 	if(finput == 0)
2609 		error("optimizer cannot open tempfile");
2610 	pgo[0] = 0;
2611 	temp1[0] = 0;
2612 	nstate = 0;
2613 	nnonter = 0;
2614 	for(;;) {
2615 		switch(gtnm()) {
2616 		case '\n':
2617 			nstate++;
2618 			pmem--;
2619 			temp1[nstate] = pmem - mem0;
2620 		case ',':
2621 			continue;
2622 		case '$':
2623 			break;
2624 		default:
2625 			error("bad tempfile");
2626 		}
2627 		break;
2628 	}
2629 
2630 	pmem--;
2631 	temp1[nstate] = yypgo[0] = pmem - mem0;
2632 	for(;;) {
2633 		switch(gtnm()) {
2634 		case '\n':
2635 			nnonter++;
2636 			yypgo[nnonter] = pmem-mem0;
2637 		case ',':
2638 			continue;
2639 		case -1:
2640 			break;
2641 		default:
2642 			error("bad tempfile");
2643 		}
2644 		break;
2645 	}
2646 	pmem--;
2647 	yypgo[nnonter--] = pmem - mem0;
2648 	for(i = 0; i < nstate; i++) {
2649 		k = 32000;
2650 		j = 0;
2651 		q = mem0 + temp1[i+1];
2652 		for(p = mem0 + temp1[i]; p < q ; p += 2) {
2653 			if(*p > j)
2654 				j = *p;
2655 			if(*p < k)
2656 				k = *p;
2657 		}
2658 		/* nontrivial situation */
2659 		if(k <= j) {
2660 			/* j is now the range */
2661 /*			j -= k;			/* call scj */
2662 			if(k > maxoff)
2663 				maxoff = k;
2664 		}
2665 		tystate[i] = (temp1[i+1]-temp1[i]) + 2*j;
2666 		if(j > maxspr)
2667 			maxspr = j;
2668 	}
2669 
2670 	/* initialize ggreed table */
2671 	for(i = 1; i <= nnonter; i++) {
2672 		ggreed[i] = 1;
2673 		j = 0;
2674 
2675 		/* minimum entry index is always 0 */
2676 		q = mem0 + yypgo[i+1] - 1;
2677 		for(p = mem0+yypgo[i]; p < q ; p += 2) {
2678 			ggreed[i] += 2;
2679 			if(*p > j)
2680 				j = *p;
2681 		}
2682 		ggreed[i] = ggreed[i] + 2*j;
2683 		if(j > maxoff)
2684 			maxoff = j;
2685 	}
2686 
2687 	/* now, prepare to put the shift actions into the amem array */
2688 	for(i = 0; i < ACTSIZE; i++)
2689 		amem[i] = 0;
2690 	maxa = amem;
2691 	for(i = 0; i < nstate; i++) {
2692 		if(tystate[i] == 0 && adb > 1)
2693 			Bprint(ftable, "State %d: null\n", i);
2694 		indgo[i] = YYFLAG1;
2695 	}
2696 	while((i = nxti()) != NOMORE)
2697 		if(i >= 0)
2698 			stin(i);
2699 		else
2700 			gin(-i);
2701 
2702 	/* print amem array */
2703 	if(adb > 2 )
2704 		for(p = amem; p <= maxa; p += 10) {
2705 			Bprint(ftable, "%4d  ", (int)(p-amem));
2706 			for(i = 0; i < 10; ++i)
2707 				Bprint(ftable, "%4d  ", p[i]);
2708 			Bprint(ftable, "\n");
2709 		}
2710 
2711 	/* write out the output appropriate to the language */
2712 	aoutput();
2713 	osummary();
2714 	Bterm(finput);
2715 	ZAPFILE(tempname);
2716 }
2717 
2718 void
gin(int i)2719 gin(int i)
2720 {
2721 	int *p, *r, *s, *q1, *q2;
2722 
2723 	/* enter gotos on nonterminal i into array amem */
2724 	ggreed[i] = 0;
2725 
2726 	q2 = mem0+ yypgo[i+1] - 1;
2727 	q1 = mem0 + yypgo[i];
2728 
2729 	/* now, find amem place for it */
2730 	for(p = amem; p < &amem[ACTSIZE]; p++) {
2731 		if(*p)
2732 			continue;
2733 		for(r = q1; r < q2; r += 2) {
2734 			s = p + *r + 1;
2735 			if(*s)
2736 				goto nextgp;
2737 			if(s > maxa)
2738 				if((maxa = s) > &amem[ACTSIZE])
2739 					error("a array overflow");
2740 		}
2741 		/* we have found amem spot */
2742 		*p = *q2;
2743 		if(p > maxa)
2744 			if((maxa = p) > &amem[ACTSIZE])
2745 				error("a array overflow");
2746 		for(r = q1; r < q2; r += 2) {
2747 			s = p + *r + 1;
2748 			*s = r[1];
2749 		}
2750 		pgo[i] = p-amem;
2751 		if(adb > 1)
2752 			Bprint(ftable, "Nonterminal %d, entry at %d\n", i, pgo[i]);
2753 		return;
2754 
2755 	nextgp:;
2756 	}
2757 	error("cannot place goto %d\n", i);
2758 }
2759 
2760 void
stin(int i)2761 stin(int i)
2762 {
2763 	int *r, *s, n, flag, j, *q1, *q2;
2764 
2765 	tystate[i] = 0;
2766 
2767 	/* enter state i into the amem array */
2768 	q2 = mem0+temp1[i+1];
2769 	q1 = mem0+temp1[i];
2770 	/* find an acceptable place */
2771 	for(n = -maxoff; n < ACTSIZE; n++) {
2772 		flag = 0;
2773 		for(r = q1; r < q2; r += 2) {
2774 			if((s = *r + n + amem) < amem)
2775 				goto nextn;
2776 			if(*s == 0)
2777 				flag++;
2778 			else
2779 				if(*s != r[1])
2780 					goto nextn;
2781 		}
2782 
2783 		/* check that the position equals another only if the states are identical */
2784 		for(j=0; j<nstate; j++) {
2785 			if(indgo[j] == n) {
2786 
2787 				/* we have some disagreement */
2788 				if(flag)
2789 					goto nextn;
2790 				if(temp1[j+1]+temp1[i] == temp1[j]+temp1[i+1]) {
2791 
2792 					/* states are equal */
2793 					indgo[i] = n;
2794 					if(adb > 1)
2795 						Bprint(ftable,
2796 						"State %d: entry at %d equals state %d\n",
2797 						i, n, j);
2798 					return;
2799 				}
2800 
2801 				/* we have some disagreement */
2802 				goto nextn;
2803 			}
2804 		}
2805 
2806 		for(r = q1; r < q2; r += 2) {
2807 			if((s = *r+n+amem) >= &amem[ACTSIZE])
2808 				error("out of space in optimizer a array");
2809 			if(s > maxa)
2810 				maxa = s;
2811 			if(*s != 0 && *s != r[1])
2812 				error("clobber of a array, pos'n %d, by %d", s-amem, r[1]);
2813 			*s = r[1];
2814 		}
2815 		indgo[i] = n;
2816 		if(adb > 1)
2817 			Bprint(ftable, "State %d: entry at %d\n", i, indgo[i]);
2818 		return;
2819 	nextn:;
2820 	}
2821 	error("Error; failure to place state %d\n", i);
2822 }
2823 
2824 /*
2825  * finds the next i
2826  */
2827 int
nxti(void)2828 nxti(void)
2829 {
2830 	int i, max, maxi;
2831 
2832 	max = 0;
2833 	maxi = 0;
2834 	for(i = 1; i <= nnonter; i++)
2835 		if(ggreed[i] >= max) {
2836 			max = ggreed[i];
2837 			maxi = -i;
2838 		}
2839 	for(i = 0; i < nstate; ++i)
2840 		if(tystate[i] >= max) {
2841 			max = tystate[i];
2842 			maxi = i;
2843 		}
2844 	if(nxdb)
2845 		Bprint(ftable, "nxti = %d, max = %d\n", maxi, max);
2846 	if(max == 0)
2847 		return NOMORE;
2848 	return maxi;
2849 }
2850 
2851 /*
2852  * write summary
2853  */
2854 void
osummary(void)2855 osummary(void)
2856 {
2857 
2858 	int i, *p;
2859 
2860 	if(foutput == 0)
2861 		return;
2862 	i = 0;
2863 	for(p = maxa; p >= amem; p--)
2864 		if(*p == 0)
2865 			i++;
2866 
2867 	Bprint(foutput, "Optimizer space used: input %d/%d, output %d/%d\n",
2868 		(int)(pmem-mem0+1), MEMSIZE, (int)(maxa-amem+1), ACTSIZE);
2869 	Bprint(foutput, "%d table entries, %d zero\n", (int)(maxa-amem+1), i);
2870 	Bprint(foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff);
2871 }
2872 
2873 /*
2874  * this version is for C
2875  * write out the optimized parser
2876  */
2877 void
aoutput(void)2878 aoutput(void)
2879 {
2880 	Bprint(ftable, "#define\tYYLAST\t%d\n", (int)(maxa-amem+1));
2881 	arout("yyact", amem, (maxa-amem)+1);
2882 	arout("yypact", indgo, nstate);
2883 	arout("yypgo", pgo, nnonter+1);
2884 }
2885 
2886 void
arout(char * s,int * v,int n)2887 arout(char *s, int *v, int n)
2888 {
2889 	int i;
2890 
2891 	Bprint(ftable, "short	%s[] =\n{", s);
2892 	for(i = 0; i < n;) {
2893 		if(i%10 == 0)
2894 			Bprint(ftable, "\n");
2895 		Bprint(ftable, "%4d", v[i]);
2896 		i++;
2897 		if(i == n)
2898 			Bprint(ftable, "\n};\n");
2899 		else
2900 			Bprint(ftable, ",");
2901 	}
2902 }
2903 
2904 /*
2905  * read and convert an integer from the standard input
2906  * return the terminating character
2907  * blanks, tabs, and newlines are ignored
2908  */
2909 int
gtnm(void)2910 gtnm(void)
2911 {
2912 	int sign, val, c;
2913 
2914 	sign = 0;
2915 	val = 0;
2916 	while((c=Bgetrune(finput)) != Beof) {
2917 		if(isdigit(c)) {
2918 			val = val*10 + c-'0';
2919 			continue;
2920 		}
2921 		if(c == '-') {
2922 			sign = 1;
2923 			continue;
2924 		}
2925 		break;
2926 	}
2927 	if(sign)
2928 		val = -val;
2929 	*pmem++ = val;
2930 	if(pmem >= &mem0[MEMSIZE])
2931 		error("out of space");
2932 	return c;
2933 }
2934