xref: /plan9-contrib/sys/src/cmd/yacc.c (revision 219b2ee8daee37f4aad58d63f21287faa8e4ffdc)
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
5 
6 #define	Bungetrune	Bungetc		/* ok for now. */
7 
8 /*
9  * all these are 32 bit
10  */
11 #define TBITSET		((32+NTERMS)/32)	/* BOTCH?? +31 */
12 #define BIT(a,i)	((a)[(i)>>5] & (1<<((i)&037)))
13 #define SETBIT(a,i)	((a)[(i)>>5] |= (1<<((i)&037)))
14 #define NWORDS(n)	(((n)+32)/32)
15 
16 #define PARSER		"/sys/lib/yaccpar"
17 #define PARSERS		"/sys/lib/yaccpars"
18 #define TEMPNAME	"y.tmp.XXXXXX"
19 #define ACTNAME		"y.acts.XXXXXX"
20 #define OFILE		"tab.c"
21 #define FILEU		"output"
22 #define FILED		"tab.h"
23 #define FILEDEBUG	"debug"
24 
25 enum
26 {
27 /*
28  * the following are adjustable
29  * according to memory size
30  */
31 	ACTSIZE		= 30000,
32 	MEMSIZE		= 30000,
33 	NSTATES		= 2000,
34 	NTERMS		= 255,
35 	NPROD		= 800,
36 	NNONTERM	= 300,
37 	TEMPSIZE	= 2000,
38 	CNAMSZ		= 5000,
39 	LSETSIZE	= 600,
40 	WSETSIZE	= 350,
41 
42 	NAMESIZE	= 50,
43 	NTYPES		= 63,
44 	ISIZE		= 400,
45 
46 	PRIVATE		= 0xE000,	/* unicode private use */
47 
48 	/* relationships which must hold:
49 		TBITSET ints must hold NTERMS+1 bits...
50 		WSETSIZE >= NNONTERM
51 		LSETSIZE >= NNONTERM
52 		TEMPSIZE >= NTERMS + NNONTERM + 1
53 		TEMPSIZE >= NSTATES
54 	*/
55 
56 	NTBASE		= 010000,
57 	ERRCODE		= 8190,
58 	ACCEPTCODE	= 8191,
59 
60 	NOASC		= 0,	/* no assoc. */
61 	LASC		= 1,	/* left assoc. */
62 	RASC		= 2,	/* right assoc. */
63 	BASC		= 3,	/* binary assoc. */
64 
65 	/* flags for state generation */
66 
67 	DONE		= 0,
68 	MUSTDO		= 1,
69 	MUSTLOOKAHEAD	= 2,
70 
71 	/* flags for a rule having an action, and being reduced */
72 
73 	ACTFLAG		= 04,
74 	REDFLAG		= 010,
75 
76 	/* output parser flags */
77 	YYFLAG1		= -1000,
78 
79 	/* parse tokens */
80 	IDENTIFIER	= PRIVATE,
81 	MARK,
82 	TERM,
83 	LEFT,
84 	RIGHT,
85 	BINARY,
86 	PREC,
87 	LCURLY,
88 	IDENTCOLON,
89 	NUMBER,
90 	START,
91 	TYPEDEF,
92 	TYPENAME,
93 	UNION,
94 
95 	ENDFILE		= 0,
96 
97 	EMPTY		= 1,
98 	WHOKNOWS	= 0,
99 	OK		= 1,
100 	NOMORE		= -1000,
101 };
102 
103 	/* macros for getting associativity and precedence levels */
104 
105 #define ASSOC(i)	((i)&03)
106 #define PLEVEL(i)	(((i)>>4)&077)
107 #define TYPE(i)		(((i)>>10)&077)
108 
109 	/* macros for setting associativity and precedence levels */
110 
111 #define SETASC(i,j)	i |= j
112 #define SETPLEV(i,j)	i |= (j<<4)
113 #define SETTYPE(i,j)	i |= (j<<10)
114 
115 	/* looping macros */
116 
117 #define TLOOP(i)	for(i=1; i<=ntokens; i++)
118 #define NTLOOP(i)	for(i=0; i<=nnonter; i++)
119 #define PLOOP(s,i)	for(i=s; i<nprod; i++)
120 #define SLOOP(i)	for(i=0; i<nstate; i++)
121 #define WSBUMP(x)	x++
122 #define WSLOOP(s,j)	for(j=s; j<cwp; j++)
123 #define ITMLOOP(i,p,q)	for(q=pstate[i+1], p=pstate[i]; p<q; p++)
124 #define SETLOOP(i)	for(i=0; i<tbitset; i++)
125 
126 	/* command to clobber tempfiles after use */
127 
128 #define	ZAPFILE(x)	if(x) remove(x)
129 
130 	/* I/O descriptors */
131 
132 Biobuf*	faction;	/* file for saving actions */
133 Biobuf*	fdefine;	/* file for #defines */
134 Biobuf*	fdebug;		/* y.debug for strings for debugging */
135 Biobuf*	ftable;		/* y.tab.c file */
136 Biobuf*	ftemp;		/* tempfile to pass 2 */
137 Biobuf*	finput;		/* input file */
138 Biobuf*	foutput;	/* y.output file */
139 
140 	/* communication variables between various I/O routines */
141 
142 char*	infile;			/* input file name */
143 int	numbval;		/* value of an input number */
144 char	tokname[NAMESIZE+4];	/* input token name, slop for runes and 0 */
145 
146 	/* structure declarations */
147 
148 typedef
149 struct
150 {
151 	int	lset[TBITSET];
152 } Lkset;
153 
154 typedef
155 struct
156 {
157 	int*	pitem;
158 	Lkset*	look;
159 } Item;
160 
161 typedef
162 struct
163 {
164 	char*	name;
165 	int	value;
166 } Symb;
167 
168 typedef
169 struct
170 {
171 	int*	pitem;
172 	int	flag;
173 	Lkset	ws;
174 } Wset;
175 
176 	/* storage of names */
177 
178 char	cnames[CNAMSZ];		/* place where token and nonterminal names are stored */
179 int	cnamsz = CNAMSZ;	/* size of cnames */
180 char*	cnamp = cnames;		/* place where next name is to be put in */
181 int	ndefout = 4;		/* number of defined symbols output */
182 char*	tempname;
183 char*	actname;
184 char	ttempname[] = TEMPNAME;
185 char	tactname[] = ACTNAME;
186 char*	parser = PARSER;
187 char*	yydebug;
188 
189 	/* storage of types */
190 int	ntypes;			/* number of types defined */
191 char*	typeset[NTYPES];	/* pointers to type tags */
192 
193 	/* token information */
194 
195 int	ntokens = 0 ;		/* number of tokens */
196 Symb	tokset[NTERMS];
197 int	toklev[NTERMS];		/* vector with the precedence of the terminals */
198 
199 	/* nonterminal information */
200 
201 int	nnonter = -1;		/* the number of nonterminals */
202 Symb	nontrst[NNONTERM];
203 int	start;			/* start symbol */
204 
205 	/* assigned token type values */
206 int	extval = 0;
207 
208 char*	ytabc = OFILE;	/* name of y.tab.c */
209 
210 	/* grammar rule information */
211 
212 int	mem0[MEMSIZE] ;		/* production storage */
213 int*	mem = mem0;
214 int	nprod = 1;		/* number of productions */
215 int*	prdptr[NPROD];		/* pointers to descriptions of productions */
216 int	levprd[NPROD];		/* precedence levels for the productions */
217 int	rlines[NPROD];		/* line number for this rule */
218 
219 	/* state information */
220 
221 int	nstate = 0;		/* number of states */
222 Item*	pstate[NSTATES+2];	/* pointers to the descriptions of the states */
223 int	tystate[NSTATES];	/* contains type information about the states */
224 int	defact[NSTATES];	/* the default actions of states */
225 int	tstates[NTERMS];	/* states generated by terminal gotos */
226 int	ntstates[NNONTERM]; 	/* states generated by nonterminal gotos */
227 int	mstates[NSTATES];	/* chain of overflows of term/nonterm generation lists  */
228 int	lastred; 		/* the number of the last reduction of a state */
229 
230 	/* lookahead set information */
231 
232 Lkset	lkst[LSETSIZE];
233 int	nolook;			/* flag to turn off lookahead computations */
234 int	tbitset;		/* size of lookahead sets */
235 int	nlset = 0;		/* next lookahead set index */
236 int	nolook = 0;		/* flag to suppress lookahead computations */
237 Lkset	clset;  		/* temporary storage for lookahead computations */
238 
239 	/* working set information */
240 
241 Wset	wsets[WSETSIZE];
242 Wset*	cwp;
243 
244 	/* storage for action table */
245 
246 int	amem[ACTSIZE];		/* action table storage */
247 int*	memp = amem;		/* next free action table position */
248 int	indgo[NSTATES];		/* index to the stored goto table */
249 
250 	/* temporary vector, indexable by states, terms, or ntokens */
251 
252 int	temp1[TEMPSIZE];	/* temporary storage, indexed by terms + ntokens or states */
253 int	lineno = 1;		/* current input line number */
254 int	fatfl = 1;  		/* if on, error is fatal */
255 int	nerrors = 0;		/* number of errors */
256 
257 	/* statistics collection variables */
258 
259 int	zzgoent;
260 int	zzgobest;
261 int	zzacent;
262 int	zzexcp;
263 int	zzclose;
264 int	zzrrconf;
265 int	zzsrconf;
266 
267 int*	ggreed = lkst[0].lset;
268 int*	pgo = wsets[0].ws.lset;
269 int*	yypgo = &nontrst[0].value;
270 
271 int	maxspr = 0;  		/* maximum spread of any entry */
272 int	maxoff = 0;  		/* maximum offset into a array */
273 int*	pmem = mem0;
274 int*	maxa;
275 int	nxdb = 0;
276 int	adb = 0;
277 
278 
279 	/* storage for information about the nonterminals */
280 
281 int**	pres[NNONTERM+2];  	/* vector of pointers to productions yielding each nonterminal */
282 Lkset*	pfirst[NNONTERM+2];	/* vector of pointers to first sets for each nonterminal */
283 int	pempty[NNONTERM+1];	/* vector of nonterminals nontrivially deriving e */
284 
285 	/* random stuff picked out from between functions */
286 
287 int	indebug = 0;
288 Wset*	zzcwp = wsets;
289 int	zzgoent = 0;
290 int	zzgobest = 0;
291 int	zzacent = 0;
292 int	zzexcp = 0;
293 int	zzclose = 0;
294 int	zzsrconf = 0;
295 int*	zzmemsz = mem0;
296 int	zzrrconf = 0;
297 int	pidebug = 0;		/* debugging flag for putitem */
298 int	gsdebug = 0;
299 int	cldebug = 0;		/* debugging flag for closure */
300 int	pkdebug = 0;
301 int	g2debug = 0;
302 
303 struct
304 {
305 	char*	name;
306 	long	value;
307 } resrv[] =
308 {
309 	"binary",	BINARY,
310 	"left",		LEFT,
311 	"nonassoc",	BINARY,
312 	"prec",		PREC,
313 	"right",	RIGHT,
314 	"start",	START,
315 	"term",		TERM,
316 	"token",	TERM,
317 	"type",		TYPEDEF,
318 	"union",	UNION,
319 	0,
320 };
321 
322 	/* define functions */
323 
324 void	main(int, char**);
325 void	others(void);
326 char*	chcopy(char*, char*);
327 char*	writem(int*);
328 char*	symnam(int);
329 void	summary(void);
330 void	error(char*, ...);
331 void	aryfil(int*, int, int);
332 int	setunion(int*, int*);
333 void	prlook(Lkset*);
334 void	cpres(void);
335 void	cpfir(void);
336 int	state(int);
337 void	putitem(int*, Lkset*);
338 void	cempty(void);
339 void	stagen(void);
340 void	closure(int);
341 Lkset*	flset(Lkset*);
342 void	cleantmp(void);
343 void	intr(void);
344 void	setup(int, char**);
345 void	finact(void);
346 int	defin(int, char*);
347 void	defout(int);
348 char*	cstash(char*);
349 long	gettok(void);
350 int	fdtype(int);
351 int	chfind(int, char*);
352 void	cpyunion(void);
353 void	cpycode(void);
354 int	skipcom(void);
355 void	cpyact(int);
356 void	openup(char*, int, int, int, char*);
357 void	output(void);
358 int	apack(int*, int);
359 void	go2out(void);
360 void	go2gen(int);
361 void	precftn(int, int, int);
362 void	wract(int);
363 void	wrstate(int);
364 void	warray(char*, int*, int);
365 void	hideprod(void);
366 void	callopt(void);
367 void	gin(int);
368 void	stin(int);
369 int	nxti(void);
370 void	osummary(void);
371 void	aoutput(void);
372 void	arout(char*, int*, int);
373 int	gtnm(void);
374 
375 void
376 main(int argc, char *argv[])
377 {
378 
379 	setup(argc, argv);	/* initialize and read productions */
380 	tbitset = NWORDS(ntokens);
381 	cpres();		/* make table of which productions yield a given nonterminal */
382 	cempty();		/* make a table of which nonterminals can match the empty string */
383 	cpfir();		/* make a table of firsts of nonterminals */
384 	stagen();		/* generate the states */
385 	output();		/* write the states and the tables */
386 	go2out();
387 	hideprod();
388 	summary();
389 	callopt();
390 	others();
391 	exits(0);
392 }
393 
394 /*
395  * put out other arrays, copy the parsers
396  */
397 void
398 others(void)
399 {
400 	int c, i, j;
401 
402 	finput = Bopen(parser, OREAD);
403 	if(finput == 0)
404 		error("cannot find parser %s", parser);
405 	warray("yyr1", levprd, nprod);
406 	aryfil(temp1, nprod, 0);
407 	PLOOP(1, i)
408 		temp1[i] = prdptr[i+1]-prdptr[i]-2;
409 	warray("yyr2", temp1, nprod);
410 
411 	aryfil(temp1, nstate, -1000);
412 	TLOOP(i)
413 		for(j=tstates[i]; j!=0; j=mstates[j])
414 			temp1[j] = i;
415 	NTLOOP(i)
416 		for(j=ntstates[i]; j!=0; j=mstates[j])
417 			temp1[j] = -i;
418 	warray("yychk", temp1, nstate);
419 	warray("yydef", defact, nstate);
420 
421 	/* put out token translation tables */
422 	/* table 1 has 0-256 */
423 	aryfil(temp1, 256, 0);
424 	c = 0;
425 	TLOOP(i) {
426 		j = tokset[i].value;
427 		if(j >= 0 && j < 256) {
428 			if(temp1[j]) {
429 				print("yacc bug -- cant have 2 different Ts with same value\n");
430 				print("	%s and %s\n", tokset[i].name, tokset[temp1[j]].name);
431 				nerrors++;
432 			}
433 			temp1[j] = i;
434 			if(j > c)
435 				c = j;
436 		}
437 	}
438 	warray("yytok1", temp1, c+1);
439 
440 	/* table 2 has PRIVATE-PRIVATE+256 */
441 	aryfil(temp1, 256, 0);
442 	c = 0;
443 	TLOOP(i) {
444 		j = tokset[i].value - PRIVATE;
445 		if(j >= 0 && j < 256) {
446 			if(temp1[j]) {
447 				print("yacc bug -- cant have 2 different Ts with same value\n");
448 				print("	%s and %s\n", tokset[i].name, tokset[temp1[j]].name);
449 				nerrors++;
450 			}
451 			temp1[j] = i;
452 			if(j > c)
453 				c = j;
454 		}
455 	}
456 	warray("yytok2", temp1, c+1);
457 
458 	/* table 3 has everything else */
459 	Bprint(ftable, "long	yytok3[] =\n{\n");
460 	c = 0;
461 	TLOOP(i) {
462 		j = tokset[i].value;
463 		if(j >= 0 && j < 256)
464 			continue;
465 		if(j >= PRIVATE && j < 256+PRIVATE)
466 			continue;
467 
468 		Bprint(ftable, "%4d,%4d,", j, i);
469 		c++;
470 		if(c%5 == 0)
471 			Bprint(ftable, "\n");
472 	}
473 	Bprint(ftable, "%4d\n};\n", 0);
474 
475 	/* copy parser text */
476 	while((c=Bgetrune(finput)) != Beof) {
477 		if(c == '$') {
478 			if((c = Bgetrune(finput)) != 'A')
479 				Bputrune(ftable, '$');
480 			else { /* copy actions */
481 				faction = Bopen(actname, OREAD);
482 				if(faction == 0)
483 					error("cannot reopen action tempfile");
484 				while((c=Bgetrune(faction)) != Beof)
485 					Bputrune(ftable, c);
486 				Bterm(faction);
487 				ZAPFILE(actname);
488 				c = Bgetrune(finput);
489 			}
490 		}
491 		Bputrune(ftable, c);
492 	}
493 	Bterm(ftable);
494 }
495 
496 /*
497  * copies string q into p, returning next free char ptr
498  */
499 char*
500 chcopy(char* p, char* q)
501 {
502 	int c;
503 
504 	while(c = *q) {
505 		if(c == '"')
506 			*p++ = '\\';
507 		*p++ = c;
508 		q++;
509 	}
510 	*p = 0;
511 	return p;
512 }
513 
514 /*
515  * creates output string for item pointed to by pp
516  */
517 char*
518 writem(int *pp)
519 {
520 	int i,*p;
521 	static char sarr[ISIZE];
522 	char* q;
523 
524 	for(p=pp; *p>0; p++)
525 		;
526 	p = prdptr[-*p];
527 	q = chcopy(sarr, nontrst[*p-NTBASE].name);
528 	q = chcopy(q, ": ");
529 	for(;;) {
530 		*q = ' ';
531 		p++;
532 		if(p == pp)
533 			*q = '.';
534 		q++;
535 		*q = '\0';
536 		i = *p;
537 		if(i <= 0)
538 			break;
539 		q = chcopy(q, symnam(i));
540 		if(q > &sarr[ISIZE-30])
541 			error("item too big");
542 	}
543 
544 	/* an item calling for a reduction */
545 	i = *pp;
546 	if(i < 0 ) {
547 		q = chcopy(q, "    (");
548 		sprint(q, "%d)", -i);
549 	}
550 	return sarr;
551 }
552 
553 /*
554  * return a pointer to the name of symbol i
555  */
556 char*
557 symnam(int i)
558 {
559 	char* cp;
560 
561 	cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
562 	if(*cp == ' ')
563 		cp++;
564 	return cp;
565 }
566 
567 /*
568  * output the summary on y.output
569  */
570 void
571 summary(void)
572 {
573 
574 	if(foutput != 0) {
575 		Bprint(foutput, "\n%d/%d terminals, %d/%d nonterminals\n",
576 			ntokens, NTERMS, nnonter, NNONTERM);
577 		Bprint(foutput, "%d/%d grammar rules, %d/%d states\n",
578 			nprod, NPROD, nstate, NSTATES);
579 		Bprint(foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n",
580 			zzsrconf, zzrrconf);
581 		Bprint(foutput, "%d/%d working sets used\n",
582 			zzcwp-wsets, WSETSIZE);
583 		Bprint(foutput, "memory: states,etc. %d/%d, parser %d/%d\n",
584 			zzmemsz-mem0, MEMSIZE, memp-amem, ACTSIZE);
585 		Bprint(foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE);
586 		Bprint(foutput, "%d extra closures\n", zzclose - 2*nstate);
587 		Bprint(foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp);
588 		Bprint(foutput, "%d goto entries\n", zzgoent);
589 		Bprint(foutput, "%d entries saved by goto default\n", zzgobest);
590 	}
591 	if(zzsrconf != 0 || zzrrconf != 0) {
592 		print("\nconflicts: ");
593 		if(zzsrconf)
594 			print("%d shift/reduce", zzsrconf);
595 		if(zzsrconf && zzrrconf)
596 			print(", ");
597 		if(zzrrconf)
598 			print("%d reduce/reduce", zzrrconf);
599 		print("\n");
600 	}
601 	if(ftemp != 0)
602 		Bterm(ftemp);
603 	if(fdefine != 0)
604 		Bterm(fdefine);
605 }
606 
607 /*
608  * write out error comment -- NEEDS WORK
609  */
610 void
611 error(char *s, ...)
612 {
613 
614 	nerrors++;
615 	fprint(2, "\n fatal error:");
616 	fprint(2, s, (&s)[1]);
617 	fprint(2, ", %s:%d\n", infile, lineno);
618 	if(!fatfl)
619 		return;
620 	summary();
621 	cleantmp();
622 	exits("error");
623 }
624 
625 /*
626  * set elements 0 through n-1 to c
627  */
628 void
629 aryfil(int *v, int n, int c)
630 {
631 	int i;
632 
633 	for(i=0; i<n; i++)
634 		v[i] = c;
635 }
636 
637 /*
638  * set a to the union of a and b
639  * return 1 if b is not a subset of a, 0 otherwise
640  */
641 int
642 setunion(int *a, int *b)
643 {
644 	int i, x, sub;
645 
646 	sub = 0;
647 	SETLOOP(i) {
648 		x = *a;
649 		*a |= *b;
650 		if(*a != x)
651 			sub = 1;
652 		a++;
653 		b++;
654 	}
655 	return sub;
656 }
657 
658 void
659 prlook(Lkset* p)
660 {
661 	int j, *pp;
662 
663 	pp = p->lset;
664 	if(pp == 0)
665 		Bprint(foutput, "\tNULL");
666 	else {
667 		Bprint(foutput, " { ");
668 		TLOOP(j)
669 			if(BIT(pp,j))
670 				Bprint(foutput, "%s ", symnam(j));
671 		Bprint(foutput, "}");
672 	}
673 }
674 
675 /*
676  * compute an array with the beginnings of  productions yielding given nonterminals
677  * The array pres points to these lists
678  * the array pyield has the lists: the total size is only NPROD+1
679  */
680 void
681 cpres(void)
682 {
683 	int c, j, i, **pmem;
684 	static int *pyield[NPROD];
685 
686 	pmem = pyield;
687 	NTLOOP(i) {
688 		c = i+NTBASE;
689 		pres[i] = pmem;
690 		fatfl = 0;  	/* make undefined  symbols  nonfatal */
691 		PLOOP(0, j)
692 			if(*prdptr[j] == c)
693 				*pmem++ =  prdptr[j]+1;
694 		if(pres[i] == pmem)
695 			error("nonterminal %s not defined!", nontrst[i].name);
696 	}
697 	pres[i] = pmem;
698 	fatfl = 1;
699 	if(nerrors) {
700 		summary();
701 		cleantmp();
702 		exits("error");
703 	}
704 	if(pmem != &pyield[nprod])
705 		error("internal Yacc error: pyield %d", pmem-&pyield[nprod]);
706 }
707 
708 /*
709  * compute an array with the first of nonterminals
710  */
711 void
712 cpfir(void)
713 {
714 	int *p, **s, i, **t, ch, changes;
715 
716 	zzcwp = &wsets[nnonter];
717 	NTLOOP(i) {
718 		aryfil(wsets[i].ws.lset, tbitset, 0);
719 		t = pres[i+1];
720 		/* initially fill the sets */
721 		for(s=pres[i]; s<t; ++s)
722 			for(p = *s; (ch = *p) > 0; ++p) {
723 				if(ch < NTBASE) {
724 					SETBIT(wsets[i].ws.lset, ch);
725 					break;
726 				}
727 				if(!pempty[ch-NTBASE])
728 					break;
729 			}
730 	}
731 
732 	/* now, reflect transitivity */
733 	changes = 1;
734 	while(changes) {
735 		changes = 0;
736 		NTLOOP(i) {
737 			t = pres[i+1];
738 			for(s = pres[i]; s < t; ++s)
739 				for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) {
740 					changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset);
741 					if(!pempty[ch])
742 						break;
743 				}
744 		}
745 	}
746 
747 	NTLOOP(i)
748 		pfirst[i] = flset(&wsets[i].ws);
749 	if(!indebug)
750 		return;
751 	if(foutput != 0)
752 		NTLOOP(i) {
753 			Bprint(foutput, "\n%s: ", nontrst[i].name);
754 			prlook(pfirst[i]);
755 			Bprint(foutput, " %d\n", pempty[i]);
756 		}
757 }
758 
759 /*
760  * sorts last state,and sees if it equals earlier ones. returns state number
761  */
762 int
763 state(int c)
764 {
765 	Item *p1, *p2, *k, *l, *q1, *q2;
766 	int size1, size2, i;
767 
768 	p1 = pstate[nstate];
769 	p2 = pstate[nstate+1];
770 	if(p1 == p2)
771 		return 0;	/* null state */
772 	/* sort the items */
773 	for(k = p2-1; k > p1; k--)	/* make k the biggest */
774 		for(l = k-1; l >= p1; --l)
775 			if(l->pitem > k->pitem) {
776 				int *s;
777 				Lkset *ss;
778 
779 				s = k->pitem;
780 				k->pitem = l->pitem;
781 				l->pitem = s;
782 				ss = k->look;
783 				k->look = l->look;
784 				l->look = ss;
785 			}
786 	size1 = p2 - p1;	/* size of state */
787 
788 	for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) {
789 		/* get ith state */
790 		q1 = pstate[i];
791 		q2 = pstate[i+1];
792 		size2 = q2 - q1;
793 		if(size1 != size2)
794 			continue;
795 		k = p1;
796 		for(l = q1; l < q2; l++) {
797 			if(l->pitem != k->pitem)
798 				break;
799 			k++;
800 		}
801 		if(l != q2)
802 			continue;
803 		/* found it */
804 		pstate[nstate+1] = pstate[nstate];	/* delete last state */
805 		/* fix up lookaheads */
806 		if(nolook)
807 			return i;
808 		for(l = q1, k = p1; l < q2; ++l, ++k ) {
809 			int s;
810 
811 			SETLOOP(s)
812 				clset.lset[s] = l->look->lset[s];
813 			if(setunion(clset.lset, k->look->lset)) {
814 				tystate[i] = MUSTDO;
815 				/* register the new set */
816 				l->look = flset( &clset );
817 			}
818 		}
819 		return i;
820 	}
821 	/* state is new */
822 	if(nolook)
823 		error("yacc state/nolook error");
824 	pstate[nstate+2] = p2;
825 	if(nstate+1 >= NSTATES)
826 		error("too many states");
827 	if(c >= NTBASE) {
828 		mstates[nstate] = ntstates[c-NTBASE];
829 		ntstates[c-NTBASE] = nstate;
830 	} else {
831 		mstates[nstate] = tstates[c];
832 		tstates[c] = nstate;
833 	}
834 	tystate[nstate] = MUSTDO;
835 	return nstate++;
836 }
837 
838 void
839 putitem(int *ptr, Lkset *lptr)
840 {
841 	Item *j;
842 
843 	if(pidebug && foutput != 0)
844 		Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate);
845 	j = pstate[nstate+1];
846 	j->pitem = ptr;
847 	if(!nolook)
848 		j->look = flset(lptr);
849 	pstate[nstate+1] = ++j;
850 	if((int*)j > zzmemsz) {
851 		zzmemsz = (int*)j;
852 		if(zzmemsz >=  &mem0[MEMSIZE])
853 			error("out of state space");
854 	}
855 }
856 
857 /*
858  * mark nonterminals which derive the empty string
859  * also, look for nonterminals which don't derive any token strings
860  */
861 void
862 cempty(void)
863 {
864 
865 	int i, *p;
866 
867 	/* first, use the array pempty to detect productions that can never be reduced */
868 	/* set pempty to WHONOWS */
869 	aryfil(pempty, nnonter+1, WHOKNOWS);
870 
871 	/* now, look at productions, marking nonterminals which derive something */
872 more:
873 	PLOOP(0, i) {
874 		if(pempty[*prdptr[i] - NTBASE])
875 			continue;
876 		for(p = prdptr[i]+1; *p >= 0; ++p)
877 			if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS)
878 				break;
879 		/* production can be derived */
880 		if(*p < 0) {
881 			pempty[*prdptr[i]-NTBASE] = OK;
882 			goto more;
883 		}
884 	}
885 
886 	/* now, look at the nonterminals, to see if they are all OK */
887 	NTLOOP(i) {
888 		/* the added production rises or falls as the start symbol ... */
889 		if(i == 0)
890 			continue;
891 		if(pempty[i] != OK) {
892 			fatfl = 0;
893 			error("nonterminal %s never derives any token string", nontrst[i].name);
894 		}
895 	}
896 
897 	if(nerrors) {
898 		summary();
899 		cleantmp();
900 		exits("error");
901 	}
902 
903 	/* now, compute the pempty array, to see which nonterminals derive the empty string */
904 	/* set pempty to WHOKNOWS */
905 	aryfil( pempty, nnonter+1, WHOKNOWS);
906 
907 	/* loop as long as we keep finding empty nonterminals */
908 
909 again:
910 	PLOOP(1, i) {
911 		/* not known to be empty */
912 		if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) {
913 			for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p)
914 				;
915 			/* we have a nontrivially empty nonterminal */
916 			if(*p < 0) {
917 				pempty[*prdptr[i]-NTBASE] = EMPTY;
918 				/* got one ... try for another */
919 				goto again;
920 			}
921 		}
922 	}
923 }
924 
925 /*
926  * generate the states
927  */
928 void
929 stagen(void)
930 {
931 
932 	int c, i, j;
933 	Wset *p, *q;
934 
935 	/* initialize */
936 	nstate = 0;
937 
938 	/* THIS IS FUNNY from the standpoint of portability
939 	 * it represents the magic moment when the mem0 array, which has
940 	 * been holding the productions, starts to hold item pointers, of a
941 	 * different type...
942 	 * someday, alloc should be used to allocate all this stuff... for now, we
943 	 * accept that if pointers don't fit in integers, there is a problem...
944 	 */
945 
946 	pstate[0] = pstate[1] = (Item*)mem;
947 	aryfil(clset.lset, tbitset, 0);
948 	putitem(prdptr[0]+1, &clset);
949 	tystate[0] = MUSTDO;
950 	nstate = 1;
951 	pstate[2] = pstate[1];
952 
953 	aryfil(amem, ACTSIZE, 0);
954 
955 	/* now, the main state generation loop */
956 more:
957 	SLOOP(i) {
958 		if(tystate[i] != MUSTDO)
959 			continue;
960 		tystate[i] = DONE;
961 		aryfil(temp1, nnonter+1, 0);
962 		/* take state i, close it, and do gotos */
963 		closure(i);
964 		/* generate goto's */
965 		WSLOOP(wsets, p) {
966 			if(p->flag)
967 				continue;
968 			p->flag = 1;
969 			c = *(p->pitem);
970 			if(c <= 1) {
971 				if(pstate[i+1]-pstate[i] <= p-wsets)
972 					tystate[i] = MUSTLOOKAHEAD;
973 				continue;
974 			}
975 			/* do a goto on c */
976 			WSLOOP(p, q)
977 				/* this item contributes to the goto */
978 				if(c == *(q->pitem)) {
979 					putitem(q->pitem+1, &q->ws);
980 					q->flag = 1;
981 				}
982 			if(c < NTBASE)
983 				state(c);	/* register new state */
984 			else
985 				temp1[c-NTBASE] = state(c);
986 		}
987 		if(gsdebug && foutput != 0) {
988 			Bprint(foutput, "%d: ", i);
989 			NTLOOP(j)
990 				if(temp1[j])
991 					Bprint(foutput, "%s %d, ", nontrst[j].name, temp1[j]);
992 			Bprint(foutput, "\n");
993 		}
994 		indgo[i] = apack(&temp1[1], nnonter-1) - 1;
995 		/* we have done one goto; do some more */
996 		goto more;
997 	}
998 	/* no more to do... stop */
999 }
1000 
1001 /*
1002  * generate the closure of state i
1003  */
1004 void
1005 closure(int i)
1006 {
1007 
1008 	Wset *u, *v;
1009 	Item *p, *q;
1010 	int c, ch, work, k, *pi, **s, **t;
1011 
1012 	zzclose++;
1013 
1014 	/* first, copy kernel of state i to wsets */
1015 	cwp = wsets;
1016 	ITMLOOP(i, p, q) {
1017 		cwp->pitem = p->pitem;
1018 		cwp->flag = 1;			/* this item must get closed */
1019 		SETLOOP(k)
1020 			cwp->ws.lset[k] = p->look->lset[k];
1021 		WSBUMP(cwp);
1022 	}
1023 
1024 	/* now, go through the loop, closing each item */
1025 	work = 1;
1026 	while(work) {
1027 		work = 0;
1028 		WSLOOP(wsets, u) {
1029 			if(u->flag == 0)
1030 				continue;
1031 			/* dot is before c */
1032 			c = *(u->pitem);
1033 			if(c < NTBASE) {
1034 				u->flag = 0;
1035 				/* only interesting case is where . is before nonterminal */
1036 				continue;
1037 			}
1038 
1039 			/* compute the lookahead */
1040 			aryfil(clset.lset, tbitset, 0);
1041 
1042 			/* find items involving c */
1043 			WSLOOP(u, v)
1044 				if(v->flag == 1 && *(pi=v->pitem) == c) {
1045 					v->flag = 0;
1046 					if(nolook)
1047 						continue;
1048 					while((ch = *++pi) > 0) {
1049 						/* terminal symbol */
1050 						if(ch < NTBASE) {
1051 							SETBIT(clset.lset, ch);
1052 							break;
1053 						}
1054 						/* nonterminal symbol */
1055 						setunion(clset.lset, pfirst[ch-NTBASE]->lset);
1056 						if(!pempty[ch-NTBASE])
1057 							break;
1058 					}
1059 					if(ch <= 0)
1060 						setunion(clset.lset, v->ws.lset);
1061 				}
1062 
1063 			/*
1064 			 * now loop over productions derived from c
1065 			 * c is now nonterminal number
1066 			 */
1067 			c -= NTBASE;
1068 			t = pres[c+1];
1069 			for(s = pres[c]; s < t; ++s) {
1070 				/*
1071 				 * put these items into the closure
1072 				 * is the item there
1073 				 */
1074 				WSLOOP(wsets, v)
1075 					/* yes, it is there */
1076 					if(v->pitem == *s) {
1077 						if(nolook)
1078 							goto nexts;
1079 						if(setunion(v->ws.lset, clset.lset))
1080 							v->flag = work = 1;
1081 						goto nexts;
1082 					}
1083 
1084 				/*  not there; make a new entry */
1085 				if(cwp-wsets+1 >= WSETSIZE)
1086 					error( "working set overflow");
1087 				cwp->pitem = *s;
1088 				cwp->flag = 1;
1089 				if(!nolook) {
1090 					work = 1;
1091 					SETLOOP(k) cwp->ws.lset[k] = clset.lset[k];
1092 				}
1093 				WSBUMP(cwp);
1094 
1095 			nexts:;
1096 			}
1097 		}
1098 	}
1099 
1100 	/* have computed closure; flags are reset; return */
1101 	if(cwp > zzcwp)
1102 		zzcwp = cwp;
1103 	if(cldebug && foutput != 0) {
1104 		Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook);
1105 		WSLOOP(wsets, u) {
1106 			if(u->flag)
1107 				Bprint(foutput, "flag set!\n");
1108 			u->flag = 0;
1109 			Bprint(foutput, "\t%s", writem(u->pitem));
1110 			prlook(&u->ws);
1111 			Bprint(foutput, "\n");
1112 		}
1113 	}
1114 }
1115 
1116 /*
1117  * decide if the lookahead set pointed to by p is known
1118  * return pointer to a perminent location for the set
1119  */
1120 Lkset*
1121 flset(Lkset *p)
1122 {
1123 	Lkset *q;
1124 	int *u, *v, *w, j;
1125 
1126 	for(q = &lkst[nlset]; q-- > lkst;) {
1127 		u = p->lset;
1128 		v = q->lset;
1129 		w = &v[tbitset];
1130 		while(v < w)
1131 			if(*u++ != *v++)
1132 				goto more;
1133 		/* we have matched */
1134 		return q;
1135 	more:;
1136 	}
1137 	/* add a new one */
1138 	q = &lkst[nlset++];
1139 	if(nlset >= LSETSIZE)
1140 		error("too many lookahead sets");
1141 	SETLOOP(j)
1142 		q->lset[j] = p->lset[j];
1143 	return q;
1144 }
1145 
1146 void
1147 cleantmp(void)
1148 {
1149 	ZAPFILE(actname);
1150 	ZAPFILE(tempname);
1151 }
1152 
1153 void
1154 intr(void)
1155 {
1156 	cleantmp();
1157 	exits("interrupted");
1158 }
1159 
1160 void
1161 setup(int argc, char *argv[])
1162 {
1163 	long c, t;
1164 	int i, j, lev, ty, ytab, *p;
1165 	int vflag, dflag, stem;
1166 	char actnm[8], *stemc;
1167 
1168 	ytab = 0;
1169 	vflag = 0;
1170 	dflag = 0;
1171 	stem = 0;
1172 	stemc = "y";
1173 	foutput = 0;
1174 	fdefine = 0;
1175 	fdebug = 0;
1176 	ARGBEGIN{
1177 	case 'v':
1178 	case 'V':
1179 		vflag++;
1180 		break;
1181 	case 'D':
1182 		yydebug = ARGF();
1183 		break;
1184 	case 'd':
1185 		dflag++;
1186 		break;
1187 	case 'o':
1188 		ytab++;
1189 		ytabc = ARGF();
1190 		break;
1191 	case 's':
1192 		stem++;
1193 		stemc = ARGF();
1194 		break;
1195 	case 'S':
1196 		parser = PARSERS;
1197 		break;
1198 	default:
1199 		error("illegal option: %c", ARGC());
1200 	}ARGEND
1201 	openup(stemc, dflag, vflag, ytab, ytabc);
1202 
1203 	ftemp = Bopen(tempname = mktemp(ttempname), OWRITE);
1204 	faction = Bopen(actname = mktemp(tactname), OWRITE);
1205 	if(ftemp == 0 || faction == 0)
1206 		error("cannot open temp file");
1207 	if(argc < 1)
1208 		error("no input file");
1209 	infile = argv[0];
1210 	finput = Bopen(infile, OREAD);
1211 	if(finput == 0)
1212 		error("cannot open '%s'", argv[0]);
1213 	cnamp = cnames;
1214 
1215 	defin(0, "$end");
1216 	extval = PRIVATE;	/* tokens start in unicode 'private use' */
1217 	defin(0, "error");
1218 	defin(1, "$accept");
1219 	defin(0, "$unk");
1220 	mem = mem0;
1221 	i = 0;
1222 
1223 	for(t = gettok(); t != MARK && t != ENDFILE;)
1224 	switch(t) {
1225 	case ';':
1226 		t = gettok();
1227 		break;
1228 
1229 	case START:
1230 		if(gettok() != IDENTIFIER)
1231 			error("bad %%start construction");
1232 		start = chfind(1, tokname);
1233 		t = gettok();
1234 		continue;
1235 
1236 	case TYPEDEF:
1237 		if(gettok() != TYPENAME)
1238 			error("bad syntax in %%type");
1239 		ty = numbval;
1240 		for(;;) {
1241 			t = gettok();
1242 			switch(t) {
1243 			case IDENTIFIER:
1244 				if((t=chfind(1, tokname)) < NTBASE) {
1245 					j = TYPE(toklev[t]);
1246 					if(j != 0 && j != ty)
1247 						error("type redeclaration of token %s",
1248 							tokset[t].name);
1249 					else
1250 						SETTYPE(toklev[t], ty);
1251 				} else {
1252 					j = nontrst[t-NTBASE].value;
1253 					if(j != 0 && j != ty)
1254 						error("type redeclaration of nonterminal %s",
1255 							nontrst[t-NTBASE].name );
1256 					else
1257 						nontrst[t-NTBASE].value = ty;
1258 				}
1259 			case ',':
1260 				continue;
1261 			case ';':
1262 				t = gettok();
1263 			default:
1264 				break;
1265 			}
1266 			break;
1267 		}
1268 		continue;
1269 
1270 	case UNION:
1271 		/* copy the union declaration to the output */
1272 		cpyunion();
1273 		t = gettok();
1274 		continue;
1275 
1276 	case LEFT:
1277 	case BINARY:
1278 	case RIGHT:
1279 		i++;
1280 
1281 	case TERM:
1282 		/* nonzero means new prec. and assoc. */
1283 		lev = t-TERM;
1284 		ty = 0;
1285 
1286 		/* get identifiers so defined */
1287 		t = gettok();
1288 
1289 		/* there is a type defined */
1290 		if(t == TYPENAME) {
1291 			ty = numbval;
1292 			t = gettok();
1293 		}
1294 		for(;;) {
1295 			switch(t) {
1296 			case ',':
1297 				t = gettok();
1298 				continue;
1299 
1300 			case ';':
1301 				break;
1302 
1303 			case IDENTIFIER:
1304 				j = chfind(0, tokname);
1305 				if(j >= NTBASE)
1306 					error("%s defined earlier as nonterminal", tokname);
1307 				if(lev) {
1308 					if(ASSOC(toklev[j]))
1309 						error("redeclaration of precedence of %s", tokname);
1310 					SETASC(toklev[j], lev);
1311 					SETPLEV(toklev[j], i);
1312 				}
1313 				if(ty) {
1314 					if(TYPE(toklev[j]))
1315 						error("redeclaration of type of %s", tokname);
1316 					SETTYPE(toklev[j],ty);
1317 				}
1318 				t = gettok();
1319 				if(t == NUMBER) {
1320 					tokset[j].value = numbval;
1321 					if(j < ndefout && j > 3)
1322 						error("please define type number of %s earlier",
1323 							tokset[j].name);
1324 					t = gettok();
1325 				}
1326 				continue;
1327 			}
1328 			break;
1329 		}
1330 		continue;
1331 
1332 	case LCURLY:
1333 		defout(0);
1334 		cpycode();
1335 		t = gettok();
1336 		continue;
1337 
1338 	default:
1339 		error("syntax error");
1340 	}
1341 	if(t == ENDFILE)
1342 		error("unexpected EOF before %%");
1343 
1344 	/* t is MARK */
1345 	Bprint(ftable, "extern	int	yyerrflag;\n");
1346 	Bprint(ftable, "#ifndef	YYMAXDEPTH\n");
1347 	Bprint(ftable, "#define	YYMAXDEPTH	150\n");
1348 	Bprint(ftable, "#endif\n" );
1349 	if(!ntypes) {
1350 		Bprint(ftable, "#ifndef	YYSTYPE\n");
1351 		Bprint(ftable, "#define	YYSTYPE	int\n");
1352 		Bprint(ftable, "#endif\n");
1353 	}
1354 	Bprint(ftable, "YYSTYPE	yylval;\n");
1355 	Bprint(ftable, "YYSTYPE	yyval;\n");
1356 
1357 	prdptr[0] = mem;
1358 
1359 	/* added production */
1360 	*mem++ = NTBASE;
1361 
1362 	/* if start is 0, we will overwrite with the lhs of the first rule */
1363 	*mem++ = start;
1364 	*mem++ = 1;
1365 	*mem++ = 0;
1366 	prdptr[1] = mem;
1367 	while((t=gettok()) == LCURLY)
1368 		cpycode();
1369 	if(t != IDENTCOLON)
1370 		error("bad syntax on first rule");
1371 
1372 	if(!start)
1373 		prdptr[0][1] = chfind(1, tokname);
1374 
1375 	/* read rules */
1376 	while(t != MARK && t != ENDFILE) {
1377 		/* process a rule */
1378 		rlines[nprod] = lineno;
1379 		if(t == '|')
1380 			*mem++ = *prdptr[nprod-1];
1381 		else
1382 			if(t == IDENTCOLON) {
1383 				*mem = chfind(1, tokname);
1384 				if(*mem < NTBASE)
1385 					error("token illegal on LHS of grammar rule");
1386 				mem++;
1387 			} else
1388 				error("illegal rule: missing semicolon or | ?");
1389 		/* read rule body */
1390 		t = gettok();
1391 
1392 	more_rule:
1393 		while(t == IDENTIFIER) {
1394 			*mem = chfind(1, tokname);
1395 			if(*mem < NTBASE)
1396 				levprd[nprod] = toklev[*mem];
1397 			mem++;
1398 			t = gettok();
1399 		}
1400 		if(t == PREC) {
1401 			if(gettok() != IDENTIFIER)
1402 				error("illegal %%prec syntax");
1403 			j = chfind(2, tokname);
1404 			if(j >= NTBASE)
1405 				error("nonterminal %s illegal after %%prec",
1406 					nontrst[j-NTBASE].name);
1407 			levprd[nprod] = toklev[j];
1408 			t = gettok();
1409 		}
1410 		if(t == '=') {
1411 			levprd[nprod] |= ACTFLAG;
1412 			Bprint(faction, "\ncase %d:", nprod);
1413 			cpyact(mem-prdptr[nprod]-1);
1414 			Bprint(faction, " break;");
1415 			if((t=gettok()) == IDENTIFIER) {
1416 
1417 				/* action within rule... */
1418 				sprint(actnm, "$$%d", nprod);
1419 
1420 				/* make it a nonterminal */
1421 				j = chfind(1, actnm);
1422 
1423 				/*
1424 				 * the current rule will become rule number nprod+1
1425 				 * move the contents down, and make room for the null
1426 				 */
1427 				for(p = mem; p >= prdptr[nprod]; --p)
1428 					p[2] = *p;
1429 				mem += 2;
1430 
1431 				/* enter null production for action */
1432 				p = prdptr[nprod];
1433 				*p++ = j;
1434 				*p++ = -nprod;
1435 
1436 				/* update the production information */
1437 				levprd[nprod+1] = levprd[nprod] & ~ACTFLAG;
1438 				levprd[nprod] = ACTFLAG;
1439 				if(++nprod >= NPROD)
1440 					error("more than %d rules", NPROD);
1441 				prdptr[nprod] = p;
1442 
1443 				/* make the action appear in the original rule */
1444 				*mem++ = j;
1445 
1446 				/* get some more of the rule */
1447 				goto more_rule;
1448 			}
1449 		}
1450 
1451 		while(t == ';')
1452 			t = gettok();
1453 		*mem++ = -nprod;
1454 
1455 		/* check that default action is reasonable */
1456 		if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].value) {
1457 
1458 			/* no explicit action, LHS has value */
1459 			int tempty;
1460 
1461 			tempty = prdptr[nprod][1];
1462 			if(tempty < 0)
1463 				error("must return a value, since LHS has a type");
1464 			else
1465 				if(tempty >= NTBASE)
1466 					tempty = nontrst[tempty-NTBASE].value;
1467 				else
1468 					tempty = TYPE(toklev[tempty]);
1469 			if(tempty != nontrst[*prdptr[nprod]-NTBASE].value)
1470 				error("default action causes potential type clash");
1471 		}
1472 		nprod++;
1473 		if(nprod >= NPROD)
1474 			error("more than %d rules", NPROD);
1475 		prdptr[nprod] = mem;
1476 		levprd[nprod] = 0;
1477 	}
1478 
1479 	/* end of all rules */
1480 	defout(1);
1481 
1482 	finact();
1483 	if(t == MARK) {
1484 		Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1485 		while((c=Bgetrune(finput)) != Beof)
1486 			Bputrune(ftable, c);
1487 	}
1488 	Bterm(finput);
1489 }
1490 
1491 /*
1492  * finish action routine
1493  */
1494 void
1495 finact(void)
1496 {
1497 
1498 	Bterm(faction);
1499 	Bprint(ftable, "#define YYEOFCODE %d\n", 1);
1500 	Bprint(ftable, "#define YYERRCODE %d\n", 2);
1501 }
1502 
1503 /*
1504  * define s to be a terminal if t=0
1505  * or a nonterminal if t=1
1506  */
1507 int
1508 defin(int nt, char *s)
1509 {
1510 	int val;
1511 	Rune rune;
1512 
1513 	val = 0;
1514 	if(nt) {
1515 		nnonter++;
1516 		if(nnonter >= NNONTERM)
1517 			error("too many nonterminals, limit %d",NNONTERM);
1518 		nontrst[nnonter].name = cstash(s);
1519 		return NTBASE + nnonter;
1520 	}
1521 
1522 	/* must be a token */
1523 	ntokens++;
1524 	if(ntokens >= NTERMS)
1525 		error("too many terminals, limit %d", NTERMS);
1526 	tokset[ntokens].name = cstash(s);
1527 
1528 	/* establish value for token */
1529 	/* single character literal */
1530 	if(s[0] == ' ') {
1531 		val = chartorune(&rune, &s[1]);
1532 		if(s[val+1] == 0) {
1533 			val = rune;
1534 			goto out;
1535 		}
1536 	}
1537 
1538 	/* escape sequence */
1539 	if(s[0] == ' ' && s[1] == '\\') {
1540 		if(s[3] == 0) {
1541 			/* single character escape sequence */
1542 			switch(s[2]) {
1543 			case 'n':	val = '\n'; break;
1544 			case 'r':	val = '\r'; break;
1545 			case 'b':	val = '\b'; break;
1546 			case 't':	val = '\t'; break;
1547 			case 'f':	val = '\f'; break;
1548 			case '\'':	val = '\''; break;
1549 			case '"':	val = '"'; break;
1550 			case '\\':	val = '\\'; break;
1551 			default:	error("invalid escape");
1552 			}
1553 			goto out;
1554 		}
1555 
1556 		/* \nnn sequence */
1557 		if(s[2] >= '0' && s[2] <= '7') {
1558 			if(s[3] < '0' ||
1559 			   s[3] > '7' ||
1560 			   s[4] < '0' ||
1561 			   s[4] > '7' ||
1562 			   s[5] != 0)
1563 				error("illegal \\nnn construction");
1564 			val = 64*s[2] + 8*s[3] + s[4] - 73*'0';
1565 			if(val == 0)
1566 				error("'\\000' is illegal");
1567 			goto out;
1568 		}
1569 		error("unknown escape");
1570 	}
1571 	val = extval++;
1572 
1573 out:
1574 	tokset[ntokens].value = val;
1575 	toklev[ntokens] = 0;
1576 	return ntokens;
1577 }
1578 
1579 /*
1580  * write out the defines (at the end of the declaration section)
1581  */
1582 void
1583 defout(int last)
1584 {
1585 	int i, c;
1586 	char sar[NAMESIZE+10];
1587 
1588 	for(i=ndefout; i<=ntokens; i++) {
1589 		/* non-literals */
1590 		c = tokset[i].name[0];
1591 		if(c != ' ' && c != '$') {
1592 			Bprint(ftable, "#define	%s	%d\n",
1593 				tokset[i].name, tokset[i].value);
1594 			if(fdefine)
1595 				Bprint(fdefine, "#define\t%s\t%d\n",
1596 					tokset[i].name, tokset[i].value);
1597 		}
1598 	}
1599 	ndefout = ntokens+1;
1600 	if(last && fdebug) {
1601 		Bprint(fdebug, "char*	yytoknames[] =\n{\n");
1602 		TLOOP(i) {
1603 			if(tokset[i].name) {
1604 				chcopy(sar, tokset[i].name);
1605 				Bprint(fdebug, "\t\"%s\",\n", sar);
1606 				continue;
1607 			}
1608 			Bprint(fdebug, "\t0,\n");
1609 		}
1610 		Bprint(fdebug, "};\n");
1611 	}
1612 }
1613 
1614 char*
1615 cstash(char *s)
1616 {
1617 	char *temp;
1618 
1619 	temp = cnamp;
1620 	do {
1621 		if(cnamp >= &cnames[cnamsz])
1622 			error("too many characters in id's and literals");
1623 		else
1624 			*cnamp++ = *s;
1625 	} while(*s++);
1626 	return temp;
1627 }
1628 
1629 long
1630 gettok(void)
1631 {
1632 	long c;
1633 	Rune rune;
1634 	int i, base, match, reserve;
1635 	static int peekline;
1636 
1637 begin:
1638 	reserve = 0;
1639 	lineno += peekline;
1640 	peekline = 0;
1641 	c = Bgetrune(finput);
1642 	while(c == ' ' || c == '\n' || c == '\t' || c == '\f') {
1643 		if(c == '\n')
1644 			lineno++;
1645 		c = Bgetrune(finput);
1646 	}
1647 
1648 	/* skip comment */
1649 	if(c == '/') {
1650 		lineno += skipcom();
1651 		goto begin;
1652 	}
1653 	switch(c) {
1654 	case Beof:
1655 		return ENDFILE;
1656 
1657 	case '{':
1658 		Bungetrune(finput);
1659 		return '=';
1660 
1661 	case '<':
1662 		/* get, and look up, a type name (union member name) */
1663 		i = 0;
1664 		while((c=Bgetrune(finput)) != '>' && c >= 0 && c != '\n') {
1665 			rune = c;
1666 			c = runetochar(&tokname[i], &rune);
1667 			if(i < NAMESIZE)
1668 				i += c;
1669 		}
1670 		if(c != '>')
1671 			error("unterminated < ... > clause");
1672 		tokname[i] = 0;
1673 		for(i=1; i<=ntypes; i++)
1674 			if(!strcmp(typeset[i], tokname)) {
1675 				numbval = i;
1676 				return TYPENAME;
1677 			}
1678 		ntypes++;
1679 		numbval = ntypes;
1680 		typeset[numbval] = cstash(tokname);
1681 		return TYPENAME;
1682 
1683 	case '"':
1684 	case '\'':
1685 		match = c;
1686 		tokname[0] = ' ';
1687 		i = 1;
1688 		for(;;) {
1689 			c = Bgetrune(finput);
1690 			if(c == '\n' || c <= 0)
1691 				error("illegal or missing ' or \"" );
1692 			if(c == '\\') {
1693 				tokname[i] = '\\';
1694 				if(i < NAMESIZE)
1695 					i++;
1696 				c = Bgetrune(finput);
1697 			} else
1698 				if(c == match)
1699 					break;
1700 			rune = c;
1701 			c = runetochar(&tokname[i], &rune);
1702 			if(i < NAMESIZE)
1703 				i += c;
1704 		}
1705 		break;
1706 
1707 	case '%':
1708 	case '\\':
1709 		switch(c = Bgetrune(finput)) {
1710 		case '0':	return TERM;
1711 		case '<':	return LEFT;
1712 		case '2':	return BINARY;
1713 		case '>':	return RIGHT;
1714 		case '%':
1715 		case '\\':	return MARK;
1716 		case '=':	return PREC;
1717 		case '{':	return LCURLY;
1718 		default:	reserve = 1;
1719 		}
1720 
1721 	default:
1722 		/* number */
1723 		if(isdigit(c)) {
1724 			numbval = c-'0';
1725 			base = (c=='0')? 8: 10;
1726 			for(c = Bgetrune(finput); isdigit(c); c = Bgetrune(finput))
1727 				numbval = numbval*base + (c-'0');
1728 			Bungetrune(finput);
1729 			return NUMBER;
1730 		}
1731 		if(islower(c) || isupper(c) || c=='_' || c=='.' || c=='$')  {
1732 			i = 0;
1733 			while(islower(c) || isupper(c) || isdigit(c) ||
1734 			    c == '-' || c=='_' || c=='.' || c=='$') {
1735 				if(reserve && isupper(c))
1736 					c += 'a'-'A';
1737 				rune = c;
1738 				c = runetochar(&tokname[i], &rune);
1739 				if(i < NAMESIZE)
1740 					i += c;
1741 				c = Bgetrune(finput);
1742 			}
1743 		} else
1744 			return c;
1745 		Bungetrune(finput);
1746 	}
1747 	tokname[i] = 0;
1748 
1749 	/* find a reserved word */
1750 	if(reserve) {
1751 		for(c=0; resrv[c].name; c++)
1752 			if(strcmp(tokname, resrv[c].name) == 0)
1753 				return resrv[c].value;
1754 		error("invalid escape, or illegal reserved word: %s", tokname);
1755 	}
1756 
1757 	/* look ahead to distinguish IDENTIFIER from IDENTCOLON */
1758 	c = Bgetrune(finput);
1759 	while(c == ' ' || c == '\t'|| c == '\n' || c == '\f' || c == '/') {
1760 		if(c == '\n')
1761 			peekline++;
1762 		/* look for comments */
1763 		if(c == '/')
1764 			peekline += skipcom();
1765 		c = Bgetrune(finput);
1766 	}
1767 	if(c == ':')
1768 		return IDENTCOLON;
1769 	Bungetrune(finput);
1770 	return IDENTIFIER;
1771 }
1772 
1773 /*
1774  * determine the type of a symbol
1775  */
1776 int
1777 fdtype(int t)
1778 {
1779 	int v;
1780 
1781 	if(t >= NTBASE)
1782 		v = nontrst[t-NTBASE].value;
1783 	else
1784 		v = TYPE(toklev[t]);
1785 	if(v <= 0)
1786 		error("must specify type for %s", (t>=NTBASE)?
1787 			nontrst[t-NTBASE].name: tokset[t].name);
1788 	return v;
1789 }
1790 
1791 int
1792 chfind(int t, char *s)
1793 {
1794 	int i;
1795 
1796 	if(s[0] == ' ')
1797 		t = 0;
1798 	TLOOP(i)
1799 		if(!strcmp(s, tokset[i].name))
1800 			return i;
1801 	NTLOOP(i)
1802 		if(!strcmp(s, nontrst[i].name))
1803 			return NTBASE+i;
1804 
1805 	/* cannot find name */
1806 	if(t > 1)
1807 		error("%s should have been defined earlier", s);
1808 	return defin(t, s);
1809 }
1810 
1811 /*
1812  * copy the union declaration to the output, and the define file if present
1813  */
1814 void
1815 cpyunion(void)
1816 {
1817 	long c;
1818 	int level;
1819 
1820 	Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1821 	Bprint(ftable, "typedef union ");
1822 	if(fdefine != 0)
1823 		Bprint(fdefine, "\ntypedef union ");
1824 
1825 	level = 0;
1826 	for(;;) {
1827 		if((c=Bgetrune(finput)) == Beof)
1828 			error("EOF encountered while processing %%union");
1829 		Bputrune(ftable, c);
1830 		if(fdefine != 0)
1831 			Bputrune(fdefine, c);
1832 		switch(c) {
1833 		case '\n':
1834 			lineno++;
1835 			break;
1836 		case '{':
1837 			level++;
1838 			break;
1839 		case '}':
1840 			level--;
1841 
1842 			/* we are finished copying */
1843 			if(level == 0) {
1844 				Bprint(ftable, " YYSTYPE;\n");
1845 				if(fdefine != 0)
1846 					Bprint(fdefine, "\tYYSTYPE;\nextern\tYYSTYPE\tyylval;\n");
1847 				return;
1848 			}
1849 		}
1850 	}
1851 }
1852 
1853 /*
1854  * copies code between \{ and \}
1855  */
1856 void
1857 cpycode(void)
1858 {
1859 
1860 	long c;
1861 
1862 	c = Bgetrune(finput);
1863 	if(c == '\n') {
1864 		c = Bgetrune(finput);
1865 		lineno++;
1866 	}
1867 	Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1868 	while(c != Beof) {
1869 		if(c == '\\') {
1870 			if((c=Bgetrune(finput)) == '}')
1871 				return;
1872 			Bputc(ftable, '\\');
1873 		}
1874 		if(c == '%') {
1875 			if((c=Bgetrune(finput)) == '}')
1876 				return;
1877 			Bputc(ftable, '%');
1878 		}
1879 		Bputrune(ftable, c);
1880 		if(c == '\n')
1881 			lineno++;
1882 		c = Bgetrune(finput);
1883 	}
1884 	error("eof before %%}");
1885 }
1886 
1887 /*
1888  * skip over comments
1889  * skipcom is called after reading a '/'
1890  */
1891 int
1892 skipcom(void)
1893 {
1894 	long c;
1895 	int i;
1896 
1897 	/* i is the number of lines skipped */
1898 	i = 0;
1899 	if(Bgetrune(finput) != '*')
1900 		error("illegal comment");
1901 	c = Bgetrune(finput);
1902 	while(c != Beof) {
1903 		while(c == '*')
1904 			if((c=Bgetrune(finput)) == '/')
1905 				return i;
1906 		if(c == '\n')
1907 			i++;
1908 		c = Bgetrune(finput);
1909 	}
1910 	error("EOF inside comment");
1911 	return 0;
1912 }
1913 
1914 /*
1915  * copy C action to the next ; or closing }
1916  */
1917 void
1918 cpyact(int offset)
1919 {
1920 	long c;
1921 	int brac, match, j, s, fnd, tok;
1922 
1923 	Bprint(faction, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1924 	brac = 0;
1925 
1926 loop:
1927 	c = Bgetrune(finput);
1928 swt:
1929 	switch(c) {
1930 	case ';':
1931 		if(brac == 0) {
1932 			Bputrune(faction, c);
1933 			return;
1934 		}
1935 		goto lcopy;
1936 
1937 	case '{':
1938 		brac++;
1939 		goto lcopy;
1940 
1941 	case '$':
1942 		s = 1;
1943 		tok = -1;
1944 		c = Bgetrune(finput);
1945 
1946 		/* type description */
1947 		if(c == '<') {
1948 			Bungetrune(finput);
1949 			if(gettok() != TYPENAME)
1950 				error("bad syntax on $<ident> clause");
1951 			tok = numbval;
1952 			c = Bgetrune(finput);
1953 		}
1954 		if(c == '$') {
1955 			Bprint(faction, "yyval");
1956 
1957 			/* put out the proper tag... */
1958 			if(ntypes) {
1959 				if(tok < 0)
1960 					tok = fdtype(*prdptr[nprod]);
1961 				Bprint(faction, ".%s", typeset[tok]);
1962 			}
1963 			goto loop;
1964 		}
1965 		if(c == '-') {
1966 			s = -s;
1967 			c = Bgetrune(finput);
1968 		}
1969 		if(isdigit(c)) {
1970 			j = 0;
1971 			while(isdigit(c)) {
1972 				j = j*10 + (c-'0');
1973 				c = Bgetrune(finput);
1974 			}
1975 			Bungetrune(finput);
1976 			j = j*s - offset;
1977 			if(j > 0)
1978 				error("Illegal use of $%d", j+offset);
1979 
1980 		dollar:
1981 			Bprint(faction, "yypt[-%d].yyv", -j);
1982 
1983 			/* put out the proper tag */
1984 			if(ntypes) {
1985 				if(j+offset <= 0 && tok < 0)
1986 					error("must specify type of $%d", j+offset);
1987 				if(tok < 0)
1988 					tok = fdtype(prdptr[nprod][j+offset]);
1989 				Bprint(faction, ".%s", typeset[tok]);
1990 			}
1991 			goto loop;
1992 		}
1993 		if(isupper(c) || islower(c) || c == '_' || c == '.') {
1994 			int tok; /* tok used oustide for type info */
1995 
1996 			/* look for $name */
1997 			Bungetrune(finput);
1998 			if(gettok() != IDENTIFIER)
1999 				error("$ must be followed by an identifier");
2000 			tok = chfind(2, tokname);
2001 			if((c = Bgetrune(finput)) != '#') {
2002 				Bungetrune(finput);
2003 				fnd = -1;
2004 			} else
2005 				if(gettok() != NUMBER) {
2006 					error("# must be followed by number");
2007 					fnd = -1;
2008 				} else
2009 					fnd = numbval;
2010 			for(j=1; j<=offset; ++j)
2011 				if(tok == prdptr[nprod][j]) {
2012 					if(--fnd <= 0) {
2013 						j -= offset;
2014 						goto dollar;
2015 					}
2016 				}
2017 			error("$name or $name#number not found");
2018 		}
2019 		Bputc(faction, '$');
2020 		if(s < 0 )
2021 			Bputc(faction, '-');
2022 		goto swt;
2023 
2024 	case '}':
2025 		brac--;
2026 		if(brac)
2027 			goto lcopy;
2028 		Bputrune(faction, c);
2029 		return;
2030 
2031 	case '/':
2032 		/* look for comments */
2033 		Bputrune(faction, c);
2034 		c = Bgetrune(finput);
2035 		if(c != '*')
2036 			goto swt;
2037 
2038 		/* it really is a comment */
2039 		Bputrune(faction, c);
2040 		c = Bgetrune(finput);
2041 		while(c >= 0) {
2042 			while(c == '*') {
2043 				Bputrune(faction, c);
2044 				if((c=Bgetrune(finput)) == '/')
2045 					goto lcopy;
2046 			}
2047 			Bputrune(faction, c);
2048 			if(c == '\n')
2049 				lineno++;
2050 			c = Bgetrune(finput);
2051 		}
2052 		error("EOF inside comment");
2053 
2054 	case '\'':
2055 		/* character constant */
2056 		match = '\'';
2057 		goto string;
2058 
2059 	case '"':
2060 		/* character string */
2061 		match = '"';
2062 
2063 	string:
2064 		Bputrune(faction, c);
2065 		while(c = Bgetrune(finput)) {
2066 			if(c == '\\') {
2067 				Bputrune(faction, c);
2068 				c = Bgetrune(finput);
2069 				if(c == '\n')
2070 					lineno++;
2071 			} else
2072 				if(c == match)
2073 					goto lcopy;
2074 				if(c == '\n')
2075 					error("newline in string or char. const.");
2076 			Bputrune(faction, c);
2077 		}
2078 		error("EOF in string or character constant");
2079 
2080 	case Beof:
2081 		error("action does not terminate");
2082 
2083 	case '\n':
2084 		lineno++;
2085 		goto lcopy;
2086 	}
2087 
2088 lcopy:
2089 	Bputrune(faction, c);
2090 	goto loop;
2091 }
2092 
2093 void
2094 openup(char *stem, int dflag, int vflag, int ytab, char *ytabc)
2095 {
2096 	char buf[256];
2097 
2098 	if(vflag) {
2099 		sprint(buf, "%s.%s", stem, FILEU);
2100 		foutput = Bopen(buf, OWRITE);
2101 		if(foutput == 0)
2102 			error("cannot open %s", buf);
2103 	}
2104 	if(yydebug) {
2105 		sprint(buf, "%s.%s", stem, FILEDEBUG);
2106 		if((fdebug = Bopen(buf, OWRITE)) == 0)
2107 			error("can't open %s", buf);
2108 	}
2109 	if(dflag) {
2110 		sprint(buf, "%s.%s", stem, FILED);
2111 		fdefine = Bopen(buf, OWRITE);
2112 		if(fdefine == 0)
2113 			error("can't create %s", buf);
2114 	}
2115 	if(ytab == 0)
2116 		sprint(buf, "%s.%s", stem, OFILE);
2117 	else
2118 		strcpy(buf, ytabc);
2119 	ftable = Bopen(buf, OWRITE);
2120 	if(ftable == 0)
2121 		error("cannot open table file %s", buf);
2122 }
2123 
2124 /*
2125  * print the output for the states
2126  */
2127 void
2128 output(void)
2129 {
2130 	int i, k, c;
2131 	Wset *u, *v;
2132 
2133 	Bprint(ftable, "short	yyexca[] =\n{");
2134 	if(fdebug)
2135 		Bprint(fdebug, "char*	yystates[] =\n{\n");
2136 
2137 	/* output the stuff for state i */
2138 	SLOOP(i) {
2139 		nolook = tystate[i]!=MUSTLOOKAHEAD;
2140 		closure(i);
2141 
2142 		/* output actions */
2143 		nolook = 1;
2144 		aryfil(temp1, ntokens+nnonter+1, 0);
2145 		WSLOOP(wsets, u) {
2146 			c = *(u->pitem);
2147 			if(c > 1 && c < NTBASE && temp1[c] == 0) {
2148 				WSLOOP(u, v)
2149 					if(c == *(v->pitem))
2150 						putitem(v->pitem+1, (Lkset*)0);
2151 				temp1[c] = state(c);
2152 			} else
2153 				if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0)
2154 					temp1[c+ntokens] = amem[indgo[i]+c];
2155 		}
2156 		if(i == 1)
2157 			temp1[1] = ACCEPTCODE;
2158 
2159 		/* now, we have the shifts; look at the reductions */
2160 		lastred = 0;
2161 		WSLOOP(wsets, u) {
2162 			c = *u->pitem;
2163 
2164 			/* reduction */
2165 			if(c <= 0) {
2166 				lastred = -c;
2167 				TLOOP(k)
2168 					if(BIT(u->ws.lset, k)) {
2169 						if(temp1[k] == 0)
2170 							temp1[k] = c;
2171 						else
2172 						if(temp1[k] < 0) { /* reduce/reduce conflict */
2173 							if(foutput)
2174 								Bprint(foutput,
2175 									"\n%d: reduce/reduce conflict"
2176 									" (red'ns %d and %d ) on %s",
2177 									i, -temp1[k], lastred,
2178 									symnam(k));
2179 							if(-temp1[k] > lastred)
2180 								temp1[k] = -lastred;
2181 							zzrrconf++;
2182 						} else
2183 							/* potential shift/reduce conflict */
2184 							precftn( lastred, k, i );
2185 					}
2186 				}
2187 		}
2188 		wract(i);
2189 	}
2190 
2191 	if(fdebug)
2192 		Bprint(fdebug, "};\n");
2193 	Bprint(ftable, "};\n");
2194 	Bprint(ftable, "#define	YYNPROD	%d\n", nprod);
2195 	Bprint(ftable, "#define	YYPRIVATE %d\n", PRIVATE);
2196 	if(yydebug)
2197 		Bprint(ftable, "#define	yydebug	%s\n", yydebug);
2198 }
2199 
2200 /*
2201  * pack state i from temp1 into amem
2202  */
2203 int
2204 apack(int *p, int n)
2205 {
2206 	int *pp, *qq, *rr, off, *q, *r;
2207 
2208 	/* we don't need to worry about checking because
2209 	 * we will only look at entries known to be there...
2210 	 * eliminate leading and trailing 0's
2211 	 */
2212 
2213 	q = p+n;
2214 	for(pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off)
2215 		;
2216  	/* no actions */
2217 	if(pp > q)
2218 		return 0;
2219 	p = pp;
2220 
2221 	/* now, find a place for the elements from p to q, inclusive */
2222 	r = &amem[ACTSIZE-1];
2223 	for(rr = amem; rr <= r; rr++, off++) {
2224 		for(qq = rr, pp = p; pp <= q; pp++, qq++)
2225 			if(*pp != 0)
2226 				if(*pp != *qq && *qq != 0)
2227 					goto nextk;
2228 
2229 		/* we have found an acceptable k */
2230 		if(pkdebug && foutput != 0)
2231 			Bprint(foutput, "off = %d, k = %d\n", off, rr-amem);
2232 		for(qq = rr, pp = p; pp <= q; pp++, qq++)
2233 			if(*pp) {
2234 				if(qq > r)
2235 					error("action table overflow");
2236 				if(qq > memp)
2237 					memp = qq;
2238 				*qq = *pp;
2239 			}
2240 		if(pkdebug && foutput != 0)
2241 			for(pp = amem; pp <= memp; pp += 10) {
2242 				Bprint(foutput, "\t");
2243 				for(qq = pp; qq <= pp+9; qq++)
2244 					Bprint(foutput, "%d ", *qq);
2245 				Bprint(foutput, "\n");
2246 			}
2247 		return(off);
2248 	nextk:;
2249 	}
2250 	error("no space in action table");
2251 	return 0;
2252 }
2253 
2254 /*
2255  * output the gotos for the nontermninals
2256  */
2257 void
2258 go2out(void)
2259 {
2260 	int i, j, k, best, count, cbest, times;
2261 
2262 	/* mark begining of gotos */
2263 	Bprint(ftemp, "$\n");
2264 	for(i = 1; i <= nnonter; i++) {
2265 		go2gen(i);
2266 
2267 		/* find the best one to make default */
2268 		best = -1;
2269 		times = 0;
2270 
2271 		/* is j the most frequent */
2272 		for(j = 0; j <= nstate; j++) {
2273 			if(tystate[j] == 0)
2274 				continue;
2275 			if(tystate[j] == best)
2276 				continue;
2277 
2278 			/* is tystate[j] the most frequent */
2279 			count = 0;
2280 			cbest = tystate[j];
2281 			for(k = j; k <= nstate; k++)
2282 				if(tystate[k] == cbest)
2283 					count++;
2284 			if(count > times) {
2285 				best = cbest;
2286 				times = count;
2287 			}
2288 		}
2289 
2290 		/* best is now the default entry */
2291 		zzgobest += times-1;
2292 		for(j = 0; j <= nstate; j++)
2293 			if(tystate[j] != 0 && tystate[j] != best) {
2294 				Bprint(ftemp, "%d,%d,", j, tystate[j]);
2295 				zzgoent++;
2296 			}
2297 
2298 		/* now, the default */
2299 		zzgoent++;
2300 		Bprint(ftemp, "%d\n", best);
2301 	}
2302 }
2303 
2304 /*
2305  * output the gotos for nonterminal c
2306  */
2307 void
2308 go2gen(int c)
2309 {
2310 	int i, work, cc;
2311 	Item *p, *q;
2312 
2313 
2314 	/* first, find nonterminals with gotos on c */
2315 	aryfil(temp1, nnonter+1, 0);
2316 	temp1[c] = 1;
2317 	work = 1;
2318 	while(work) {
2319 		work = 0;
2320 		PLOOP(0, i)
2321 
2322 			/* cc is a nonterminal */
2323 			if((cc=prdptr[i][1]-NTBASE) >= 0)
2324 				/* cc has a goto on c */
2325 				if(temp1[cc] != 0) {
2326 
2327 					/* thus, the left side of production i does too */
2328 					cc = *prdptr[i]-NTBASE;
2329 					if(temp1[cc] == 0) {
2330 						  work = 1;
2331 						  temp1[cc] = 1;
2332 					}
2333 				}
2334 	}
2335 
2336 	/* now, we have temp1[c] = 1 if a goto on c in closure of cc */
2337 	if(g2debug && foutput != 0) {
2338 		Bprint(foutput, "%s: gotos on ", nontrst[c].name);
2339 		NTLOOP(i)
2340 			if(temp1[i])
2341 				Bprint(foutput, "%s ", nontrst[i].name);
2342 		Bprint(foutput, "\n");
2343 	}
2344 
2345 	/* now, go through and put gotos into tystate */
2346 	aryfil(tystate, nstate, 0);
2347 	SLOOP(i)
2348 		ITMLOOP(i, p, q)
2349 			if((cc = *p->pitem) >= NTBASE)
2350 				/* goto on c is possible */
2351 				if(temp1[cc-NTBASE]) {
2352 					tystate[i] = amem[indgo[i]+c];
2353 					break;
2354 				}
2355 }
2356 
2357 /*
2358  * decide a shift/reduce conflict by precedence.
2359  * r is a rule number, t a token number
2360  * the conflict is in state s
2361  * temp1[t] is changed to reflect the action
2362  */
2363 void
2364 precftn(int r, int t, int s)
2365 {
2366 	int lp, lt, action;
2367 
2368 	lp = levprd[r];
2369 	lt = toklev[t];
2370 	if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) {
2371 
2372 		/* conflict */
2373 		if(foutput != 0)
2374 			Bprint(foutput,
2375 				"\n%d: shift/reduce conflict (shift %d(%d), red'n %d(%d)) on %s",
2376 				s, temp1[t], PLEVEL(lt), r, PLEVEL(lp), symnam(t));
2377 		zzsrconf++;
2378 		return;
2379 	}
2380 	if(PLEVEL(lt) == PLEVEL(lp))
2381 		action = ASSOC(lt);
2382 	else
2383 		if(PLEVEL(lt) > PLEVEL(lp))
2384 			action = RASC;  /* shift */
2385 		else
2386 			action = LASC;  /* reduce */
2387 	switch(action) {
2388 	case BASC:  /* error action */
2389 		temp1[t] = ERRCODE;
2390 		break;
2391 	case LASC:  /* reduce */
2392 		temp1[t] = -r;
2393 		break;
2394 	}
2395 }
2396 
2397 /*
2398  * output state i
2399  * temp1 has the actions, lastred the default
2400  */
2401 void
2402 wract(int i)
2403 {
2404 	int p, p0, p1, ntimes, tred, count, j, flag;
2405 
2406 	/* find the best choice for lastred */
2407 	lastred = 0;
2408 	ntimes = 0;
2409 	TLOOP(j) {
2410 		if(temp1[j] >= 0)
2411 			continue;
2412 		if(temp1[j]+lastred == 0)
2413 			continue;
2414 		/* count the number of appearances of temp1[j] */
2415 		count = 0;
2416 		tred = -temp1[j];
2417 		levprd[tred] |= REDFLAG;
2418 		TLOOP(p)
2419 			if(temp1[p]+tred == 0)
2420 				count++;
2421 		if(count > ntimes) {
2422 			lastred = tred;
2423 			ntimes = count;
2424 		}
2425 	}
2426 
2427 	/*
2428 	 * for error recovery, arrange that, if there is a shift on the
2429 	 * error recovery token, `error', that the default be the error action
2430 	 */
2431 	if(temp1[2] > 0)
2432 		lastred = 0;
2433 
2434 	/* clear out entries in temp1 which equal lastred */
2435 	TLOOP(p)
2436 		if(temp1[p]+lastred == 0)
2437 			temp1[p] = 0;
2438 
2439 	wrstate(i);
2440 	defact[i] = lastred;
2441 	flag = 0;
2442 	TLOOP(p0)
2443 		if((p1=temp1[p0]) != 0) {
2444 			if(p1 < 0) {
2445 				p1 = -p1;
2446 				goto exc;
2447 			}
2448 			if(p1 == ACCEPTCODE) {
2449 				p1 = -1;
2450 				goto exc;
2451 			}
2452 			if(p1 == ERRCODE) {
2453 				p1 = 0;
2454 			exc:
2455 				if(flag++ == 0)
2456 					Bprint(ftable, "-1, %d,\n", i);
2457 				Bprint(ftable, "\t%d, %d,\n", p0, p1);
2458 				zzexcp++;
2459 				continue;
2460 			}
2461 			Bprint(ftemp, "%d,%d,", p0, p1);
2462 			zzacent++;
2463 		}
2464 	if(flag) {
2465 		defact[i] = -2;
2466 		Bprint(ftable, "\t-2, %d,\n", lastred);
2467 	}
2468 	Bprint(ftemp, "\n");
2469 }
2470 
2471 /*
2472  * writes state i
2473  */
2474 void
2475 wrstate(int i)
2476 {
2477 	int j0, j1;
2478 	Item *pp, *qq;
2479 	Wset *u;
2480 
2481 	if(fdebug) {
2482 		if(lastred) {
2483 			Bprint(fdebug, "	0, /*%d*/\n", i);
2484 		} else {
2485 			Bprint(fdebug, "	\"");
2486 			ITMLOOP(i, pp, qq)
2487 				Bprint(fdebug, "%s\\n", writem(pp->pitem));
2488 			if(tystate[i] == MUSTLOOKAHEAD)
2489 				WSLOOP(wsets + (pstate[i+1] - pstate[i]), u)
2490 					if(*u->pitem < 0)
2491 						Bprint(fdebug, "%s\\n", writem(u->pitem));
2492 			Bprint(fdebug, "\", /*%d*/\n", i);
2493 		}
2494 	}
2495 	if(foutput == 0)
2496 		return;
2497 	Bprint(foutput, "\nstate %d\n", i);
2498 	ITMLOOP(i, pp, qq)
2499 		Bprint(foutput, "\t%s\n", writem(pp->pitem));
2500 	if(tystate[i] == MUSTLOOKAHEAD)
2501 		/* print out empty productions in closure */
2502 		WSLOOP(wsets+(pstate[i+1]-pstate[i]), u)
2503 			if(*u->pitem < 0)
2504 				Bprint(foutput, "\t%s\n", writem(u->pitem));
2505 
2506 	/* check for state equal to another */
2507 	TLOOP(j0)
2508 		if((j1=temp1[j0]) != 0) {
2509 			Bprint(foutput, "\n\t%s  ", symnam(j0));
2510 			/* shift, error, or accept */
2511 			if(j1 > 0) {
2512 				if(j1 == ACCEPTCODE)
2513 					Bprint(foutput,  "accept");
2514 				else
2515 					if(j1 == ERRCODE)
2516 						Bprint(foutput, "error");
2517 					else
2518 						Bprint(foutput, "shift %d", j1);
2519 			} else
2520 				Bprint(foutput, "reduce %d (src line %d)", -j1, rlines[-j1]);
2521 		}
2522 
2523 	/* output the final production */
2524 	if(lastred)
2525 		Bprint(foutput, "\n\t.  reduce %d (src line %d)\n\n",
2526 			lastred, rlines[lastred]);
2527 	else
2528 		Bprint(foutput, "\n\t.  error\n\n");
2529 
2530 	/* now, output nonterminal actions */
2531 	j1 = ntokens;
2532 	for(j0 = 1; j0 <= nnonter; j0++) {
2533 		j1++;
2534 		if(temp1[j1])
2535 			Bprint(foutput, "\t%s  goto %d\n", symnam(j0+NTBASE), temp1[j1]);
2536 	}
2537 }
2538 
2539 void
2540 warray(char *s, int *v, int n)
2541 {
2542 	int i;
2543 
2544 	Bprint(ftable, "short	%s[] =\n{", s);
2545 	for(i=0;;) {
2546 		if(i%10 == 0)
2547 			Bprint(ftable, "\n");
2548 		Bprint(ftable, "%4d", v[i]);
2549 		i++;
2550 		if(i >= n) {
2551 			Bprint(ftable, "\n};\n");
2552 			break;
2553 		}
2554 		Bprint(ftable, ",");
2555 	}
2556 }
2557 
2558 /*
2559  * in order to free up the mem and amem arrays for the optimizer,
2560  * and still be able to output yyr1, etc., after the sizes of
2561  * the action array is known, we hide the nonterminals
2562  * derived by productions in levprd.
2563  */
2564 
2565 void
2566 hideprod(void)
2567 {
2568 	int i, j;
2569 
2570 	j = 0;
2571 	levprd[0] = 0;
2572 	PLOOP(1, i) {
2573 		if(!(levprd[i] & REDFLAG)) {
2574 			j++;
2575 			if(foutput != 0)
2576 				Bprint(foutput, "Rule not reduced:   %s\n", writem(prdptr[i]));
2577 		}
2578 		levprd[i] = *prdptr[i] - NTBASE;
2579 	}
2580 	if(j)
2581 		print("%d rules never reduced\n", j);
2582 }
2583 
2584 
2585 void
2586 callopt(void)
2587 {
2588 	int i, *p, j, k, *q;
2589 
2590 	/* read the arrays from tempfile and set parameters */
2591 	if((finput=Bopen(tempname, OREAD)) == 0)
2592 		error("optimizer cannot open tempfile");
2593 	pgo[0] = 0;
2594 	temp1[0] = 0;
2595 	nstate = 0;
2596 	nnonter = 0;
2597 	for(;;) {
2598 		switch(gtnm()) {
2599 		case '\n':
2600 			nstate++;
2601 			pmem--;
2602 			temp1[nstate] = pmem - mem0;
2603 		case ',':
2604 			continue;
2605 		case '$':
2606 			break;
2607 		default:
2608 			error("bad tempfile");
2609 		}
2610 		break;
2611 	}
2612 
2613 	pmem--;
2614 	temp1[nstate] = yypgo[0] = pmem - mem0;
2615 	for(;;) {
2616 		switch(gtnm()) {
2617 		case '\n':
2618 			nnonter++;
2619 			yypgo[nnonter] = pmem-mem0;
2620 		case ',':
2621 			continue;
2622 		case -1:
2623 			break;
2624 		default:
2625 			error("bad tempfile");
2626 		}
2627 		break;
2628 	}
2629 	pmem--;
2630 	yypgo[nnonter--] = pmem - mem0;
2631 	for(i = 0; i < nstate; i++) {
2632 		k = 32000;
2633 		j = 0;
2634 		q = mem0 + temp1[i+1];
2635 		for(p = mem0 + temp1[i]; p < q ; p += 2) {
2636 			if(*p > j)
2637 				j = *p;
2638 			if(*p < k)
2639 				k = *p;
2640 		}
2641 		/* nontrivial situation */
2642 		if(k <= j) {
2643 			/* j is now the range */
2644 /*			j -= k;			/* call scj */
2645 			if(k > maxoff)
2646 				maxoff = k;
2647 		}
2648 		tystate[i] = (temp1[i+1]-temp1[i]) + 2*j;
2649 		if(j > maxspr)
2650 			maxspr = j;
2651 	}
2652 
2653 	/* initialize ggreed table */
2654 	for(i = 1; i <= nnonter; i++) {
2655 		ggreed[i] = 1;
2656 		j = 0;
2657 
2658 		/* minimum entry index is always 0 */
2659 		q = mem0 + yypgo[i+1] - 1;
2660 		for(p = mem0+yypgo[i]; p < q ; p += 2) {
2661 			ggreed[i] += 2;
2662 			if(*p > j)
2663 				j = *p;
2664 		}
2665 		ggreed[i] = ggreed[i] + 2*j;
2666 		if(j > maxoff)
2667 			maxoff = j;
2668 	}
2669 
2670 	/* now, prepare to put the shift actions into the amem array */
2671 	for(i = 0; i < ACTSIZE; i++)
2672 		amem[i] = 0;
2673 	maxa = amem;
2674 	for(i = 0; i < nstate; i++) {
2675 		if(tystate[i] == 0 && adb > 1)
2676 			Bprint(ftable, "State %d: null\n", i);
2677 		indgo[i] = YYFLAG1;
2678 	}
2679 	while((i = nxti()) != NOMORE)
2680 		if(i >= 0)
2681 			stin(i);
2682 		else
2683 			gin(-i);
2684 
2685 	/* print amem array */
2686 	if(adb > 2 )
2687 		for(p = amem; p <= maxa; p += 10) {
2688 			Bprint(ftable, "%4d  ", p-amem);
2689 			for(i = 0; i < 10; ++i)
2690 				Bprint(ftable, "%4d  ", p[i]);
2691 			Bprint(ftable, "\n");
2692 		}
2693 
2694 	/* write out the output appropriate to the language */
2695 	aoutput();
2696 	osummary();
2697 	ZAPFILE(tempname);
2698 }
2699 
2700 void
2701 gin(int i)
2702 {
2703 	int *p, *r, *s, *q1, *q2;
2704 
2705 	/* enter gotos on nonterminal i into array amem */
2706 	ggreed[i] = 0;
2707 
2708 	q2 = mem0+ yypgo[i+1] - 1;
2709 	q1 = mem0 + yypgo[i];
2710 
2711 	/* now, find amem place for it */
2712 	for(p = amem; p < &amem[ACTSIZE]; p++) {
2713 		if(*p)
2714 			continue;
2715 		for(r = q1; r < q2; r += 2) {
2716 			s = p + *r + 1;
2717 			if(*s)
2718 				goto nextgp;
2719 			if(s > maxa)
2720 				if((maxa = s) > &amem[ACTSIZE])
2721 					error("a array overflow");
2722 		}
2723 		/* we have found amem spot */
2724 		*p = *q2;
2725 		if(p > maxa)
2726 			if((maxa = p) > &amem[ACTSIZE])
2727 				error("a array overflow");
2728 		for(r = q1; r < q2; r += 2) {
2729 			s = p + *r + 1;
2730 			*s = r[1];
2731 		}
2732 		pgo[i] = p-amem;
2733 		if(adb > 1)
2734 			Bprint(ftable, "Nonterminal %d, entry at %d\n", i, pgo[i]);
2735 		return;
2736 
2737 	nextgp:;
2738 	}
2739 	error("cannot place goto %d\n", i);
2740 }
2741 
2742 void
2743 stin(int i)
2744 {
2745 	int *r, *s, n, flag, j, *q1, *q2;
2746 
2747 	tystate[i] = 0;
2748 
2749 	/* enter state i into the amem array */
2750 	q2 = mem0+temp1[i+1];
2751 	q1 = mem0+temp1[i];
2752 	/* find an acceptable place */
2753 	for(n = -maxoff; n < ACTSIZE; n++) {
2754 		flag = 0;
2755 		for(r = q1; r < q2; r += 2) {
2756 			if((s = *r + n + amem) < amem)
2757 				goto nextn;
2758 			if(*s == 0)
2759 				flag++;
2760 			else
2761 				if(*s != r[1])
2762 					goto nextn;
2763 		}
2764 
2765 		/* check that the position equals another only if the states are identical */
2766 		for(j=0; j<nstate; j++) {
2767 			if(indgo[j] == n) {
2768 
2769 				/* we have some disagreement */
2770 				if(flag)
2771 					goto nextn;
2772 				if(temp1[j+1]+temp1[i] == temp1[j]+temp1[i+1]) {
2773 
2774 					/* states are equal */
2775 					indgo[i] = n;
2776 					if(adb > 1)
2777 						Bprint(ftable,
2778 						"State %d: entry at %d equals state %d\n",
2779 						i, n, j);
2780 					return;
2781 				}
2782 
2783 				/* we have some disagreement */
2784 				goto nextn;
2785 			}
2786 		}
2787 
2788 		for(r = q1; r < q2; r += 2) {
2789 			if((s = *r+n+amem) >= &amem[ACTSIZE])
2790 				error("out of space in optimizer a array");
2791 			if(s > maxa)
2792 				maxa = s;
2793 			if(*s != 0 && *s != r[1])
2794 				error("clobber of a array, pos'n %d, by %d", s-amem, r[1]);
2795 			*s = r[1];
2796 		}
2797 		indgo[i] = n;
2798 		if(adb > 1)
2799 			Bprint(ftable, "State %d: entry at %d\n", i, indgo[i]);
2800 		return;
2801 	nextn:;
2802 	}
2803 	error("Error; failure to place state %d\n", i);
2804 }
2805 
2806 /*
2807  * finds the next i
2808  */
2809 int
2810 nxti(void)
2811 {
2812 	int i, max, maxi;
2813 
2814 	max = 0;
2815 	maxi = 0;
2816 	for(i = 1; i <= nnonter; i++)
2817 		if(ggreed[i] >= max) {
2818 			max = ggreed[i];
2819 			maxi = -i;
2820 		}
2821 	for(i = 0; i < nstate; ++i)
2822 		if(tystate[i] >= max) {
2823 			max = tystate[i];
2824 			maxi = i;
2825 		}
2826 	if(nxdb)
2827 		Bprint(ftable, "nxti = %d, max = %d\n", maxi, max);
2828 	if(max == 0)
2829 		return NOMORE;
2830 	return maxi;
2831 }
2832 
2833 /*
2834  * write summary
2835  */
2836 void
2837 osummary(void)
2838 {
2839 
2840 	int i, *p;
2841 
2842 	if(foutput == 0)
2843 		return;
2844 	i = 0;
2845 	for(p = maxa; p >= amem; p--)
2846 		if(*p == 0)
2847 			i++;
2848 
2849 	Bprint(foutput, "Optimizer space used: input %d/%d, output %d/%d\n",
2850 		pmem-mem0+1, MEMSIZE, maxa-amem+1, ACTSIZE);
2851 	Bprint(foutput, "%d table entries, %d zero\n", (maxa-amem)+1, i);
2852 	Bprint(foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff);
2853 }
2854 
2855 /*
2856  * this version is for C
2857  * write out the optimized parser
2858  */
2859 void
2860 aoutput(void)
2861 {
2862 	Bprint(ftable, "#define\tYYLAST\t%d\n", maxa-amem+1);
2863 	arout("yyact", amem, (maxa-amem)+1);
2864 	arout("yypact", indgo, nstate);
2865 	arout("yypgo", pgo, nnonter+1);
2866 }
2867 
2868 void
2869 arout(char *s, int *v, int n)
2870 {
2871 	int i;
2872 
2873 	Bprint(ftable, "short	%s[] =\n{", s);
2874 	for(i = 0; i < n;) {
2875 		if(i%10 == 0)
2876 			Bprint(ftable, "\n");
2877 		Bprint(ftable, "%4d", v[i]);
2878 		i++;
2879 		if(i == n)
2880 			Bprint(ftable, "\n};\n");
2881 		else
2882 			Bprint(ftable, ",");
2883 	}
2884 }
2885 
2886 /*
2887  * read and convert an integer from the standard input
2888  * return the terminating character
2889  * blanks, tabs, and newlines are ignored
2890  */
2891 int
2892 gtnm(void)
2893 {
2894 	int s, val, c;
2895 
2896 	s = 1;
2897 	val = 0;
2898 	while((c=Bgetrune(finput)) != Beof) {
2899 		if(isdigit(c))
2900 			val = val*10 + c-'0';
2901 		else
2902 			if(c == '-')
2903 				s = -1;
2904 			else
2905 				break;
2906 	}
2907 	*pmem++ = s*val;
2908 	if(pmem > &mem0[MEMSIZE])
2909 		error("out of space");
2910 	return c;
2911 }
2912