xref: /plan9/sys/src/cmd/yacc.c (revision ec59a3ddbfceee0efe34584c2c9981a5e5ff1ec4)
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
5 
6 #define	Bungetrune	Bungetc		/* ok for now. */
7 
8 /*
9  * all these are 32 bit
10  */
11 #define TBITSET		((32+NTERMS)/32)	/* BOTCH?? +31 */
12 #define BIT(a,i)	((a)[(i)>>5] & (1<<((i)&037)))
13 #define SETBIT(a,i)	((a)[(i)>>5] |= (1<<((i)&037)))
14 #define NWORDS(n)	(((n)+32)/32)
15 
16 #define PARSER		"/sys/lib/yaccpar"
17 #define PARSERS		"/sys/lib/yaccpars"
18 #define TEMPNAME	"y.tmp.XXXXXX"
19 #define ACTNAME		"y.acts.XXXXXX"
20 #define OFILE		"tab.c"
21 #define FILEU		"output"
22 #define FILED		"tab.h"
23 #define FILEDEBUG	"debug"
24 
25 enum
26 {
27 /*
28  * the following are adjustable
29  * according to memory size
30  */
31 	ACTSIZE		= 40000,
32 	MEMSIZE		= 40000,
33 	NSTATES		= 2000,
34 	NTERMS		= 511,
35 	NPROD		= 1600,
36 	NNONTERM	= 600,
37 	TEMPSIZE	= 2000,
38 	CNAMSZ		= 10000,
39 	LSETSIZE	= 2400,
40 	WSETSIZE	= 350,
41 
42 	NAMESIZE	= 50,
43 	NTYPES		= 63,
44 	ISIZE		= 400,
45 
46 	PRIVATE		= 0xE000,	/* unicode private use */
47 
48 	/* relationships which must hold:
49 		TBITSET ints must hold NTERMS+1 bits...
50 		WSETSIZE >= NNONTERM
51 		LSETSIZE >= NNONTERM
52 		TEMPSIZE >= NTERMS + NNONTERM + 1
53 		TEMPSIZE >= NSTATES
54 	*/
55 
56 	NTBASE		= 010000,
57 	ERRCODE		= 8190,
58 	ACCEPTCODE	= 8191,
59 
60 	NOASC		= 0,	/* no assoc. */
61 	LASC		= 1,	/* left assoc. */
62 	RASC		= 2,	/* right assoc. */
63 	BASC		= 3,	/* binary assoc. */
64 
65 	/* flags for state generation */
66 
67 	DONE		= 0,
68 	MUSTDO		= 1,
69 	MUSTLOOKAHEAD	= 2,
70 
71 	/* flags for a rule having an action, and being reduced */
72 
73 	ACTFLAG		= 04,
74 	REDFLAG		= 010,
75 
76 	/* output parser flags */
77 	YYFLAG1		= -1000,
78 
79 	/* parse tokens */
80 	IDENTIFIER	= PRIVATE,
81 	MARK,
82 	TERM,
83 	LEFT,
84 	RIGHT,
85 	BINARY,
86 	PREC,
87 	LCURLY,
88 	IDENTCOLON,
89 	NUMBER,
90 	START,
91 	TYPEDEF,
92 	TYPENAME,
93 	UNION,
94 
95 	ENDFILE		= 0,
96 
97 	EMPTY		= 1,
98 	WHOKNOWS	= 0,
99 	OK		= 1,
100 	NOMORE		= -1000,
101 };
102 
103 	/* macros for getting associativity and precedence levels */
104 
105 #define ASSOC(i)	((i)&03)
106 #define PLEVEL(i)	(((i)>>4)&077)
107 #define TYPE(i)		(((i)>>10)&077)
108 
109 	/* macros for setting associativity and precedence levels */
110 
111 #define SETASC(i,j)	i |= j
112 #define SETPLEV(i,j)	i |= (j<<4)
113 #define SETTYPE(i,j)	i |= (j<<10)
114 
115 	/* looping macros */
116 
117 #define TLOOP(i)	for(i=1; i<=ntokens; i++)
118 #define NTLOOP(i)	for(i=0; i<=nnonter; i++)
119 #define PLOOP(s,i)	for(i=s; i<nprod; i++)
120 #define SLOOP(i)	for(i=0; i<nstate; i++)
121 #define WSBUMP(x)	x++
122 #define WSLOOP(s,j)	for(j=s; j<cwp; j++)
123 #define ITMLOOP(i,p,q)	for(q=pstate[i+1], p=pstate[i]; p<q; p++)
124 #define SETLOOP(i)	for(i=0; i<tbitset; i++)
125 
126 	/* command to clobber tempfiles after use */
127 
128 #define	ZAPFILE(x)	if(x) remove(x)
129 
130 	/* I/O descriptors */
131 
132 Biobuf*	faction;	/* file for saving actions */
133 Biobuf*	fdefine;	/* file for #defines */
134 Biobuf*	fdebug;		/* y.debug for strings for debugging */
135 Biobuf*	ftable;		/* y.tab.c file */
136 Biobuf*	ftemp;		/* tempfile to pass 2 */
137 Biobuf*	finput;		/* input file */
138 Biobuf*	foutput;	/* y.output file */
139 
140 	/* communication variables between various I/O routines */
141 
142 char*	infile;			/* input file name */
143 int	numbval;		/* value of an input number */
144 char	tokname[NAMESIZE+4];	/* input token name, slop for runes and 0 */
145 
146 	/* structure declarations */
147 
148 typedef
149 struct
150 {
151 	int	lset[TBITSET];
152 } Lkset;
153 
154 typedef
155 struct
156 {
157 	int*	pitem;
158 	Lkset*	look;
159 } Item;
160 
161 typedef
162 struct
163 {
164 	char*	name;
165 	int	value;
166 } Symb;
167 
168 typedef
169 struct
170 {
171 	int*	pitem;
172 	int	flag;
173 	Lkset	ws;
174 } Wset;
175 
176 	/* storage of names */
177 
178 char	cnames[CNAMSZ];		/* place where token and nonterminal names are stored */
179 int	cnamsz = CNAMSZ;	/* size of cnames */
180 char*	cnamp = cnames;		/* place where next name is to be put in */
181 int	ndefout = 4;		/* number of defined symbols output */
182 char*	tempname;
183 char*	actname;
184 char	ttempname[] = TEMPNAME;
185 char	tactname[] = ACTNAME;
186 char*	parser = PARSER;
187 char*	yydebug;
188 
189 	/* storage of types */
190 int	ntypes;			/* number of types defined */
191 char*	typeset[NTYPES];	/* pointers to type tags */
192 
193 	/* token information */
194 
195 int	ntokens = 0 ;		/* number of tokens */
196 Symb	tokset[NTERMS];
197 int	toklev[NTERMS];		/* vector with the precedence of the terminals */
198 
199 	/* nonterminal information */
200 
201 int	nnonter = -1;		/* the number of nonterminals */
202 Symb	nontrst[NNONTERM];
203 int	start;			/* start symbol */
204 
205 	/* assigned token type values */
206 int	extval = 0;
207 
208 char*	ytabc = OFILE;	/* name of y.tab.c */
209 
210 	/* grammar rule information */
211 
212 int	mem0[MEMSIZE] ;		/* production storage */
213 int*	mem = mem0;
214 int	nprod = 1;		/* number of productions */
215 int*	prdptr[NPROD];		/* pointers to descriptions of productions */
216 int	levprd[NPROD];		/* precedence levels for the productions */
217 int	rlines[NPROD];		/* line number for this rule */
218 
219 	/* state information */
220 
221 int	nstate = 0;		/* number of states */
222 Item*	pstate[NSTATES+2];	/* pointers to the descriptions of the states */
223 int	tystate[NSTATES];	/* contains type information about the states */
224 int	defact[NSTATES];	/* the default actions of states */
225 int	tstates[NTERMS];	/* states generated by terminal gotos */
226 int	ntstates[NNONTERM]; 	/* states generated by nonterminal gotos */
227 int	mstates[NSTATES];	/* chain of overflows of term/nonterm generation lists  */
228 int	lastred; 		/* the number of the last reduction of a state */
229 
230 	/* lookahead set information */
231 
232 Lkset	lkst[LSETSIZE];
233 int	nolook;			/* flag to turn off lookahead computations */
234 int	tbitset;		/* size of lookahead sets */
235 int	nlset = 0;		/* next lookahead set index */
236 int	nolook = 0;		/* flag to suppress lookahead computations */
237 Lkset	clset;  		/* temporary storage for lookahead computations */
238 
239 	/* working set information */
240 
241 Wset	wsets[WSETSIZE];
242 Wset*	cwp;
243 
244 	/* storage for action table */
245 
246 int	amem[ACTSIZE];		/* action table storage */
247 int*	memp = amem;		/* next free action table position */
248 int	indgo[NSTATES];		/* index to the stored goto table */
249 
250 	/* temporary vector, indexable by states, terms, or ntokens */
251 
252 int	temp1[TEMPSIZE];	/* temporary storage, indexed by terms + ntokens or states */
253 int	lineno = 1;		/* current input line number */
254 int	fatfl = 1;  		/* if on, error is fatal */
255 int	nerrors = 0;		/* number of errors */
256 
257 	/* statistics collection variables */
258 
259 int	zzgoent;
260 int	zzgobest;
261 int	zzacent;
262 int	zzexcp;
263 int	zzclose;
264 int	zzrrconf;
265 int	zzsrconf;
266 
267 int*	ggreed = lkst[0].lset;
268 int*	pgo = wsets[0].ws.lset;
269 int*	yypgo = &nontrst[0].value;
270 
271 int	maxspr = 0;  		/* maximum spread of any entry */
272 int	maxoff = 0;  		/* maximum offset into a array */
273 int*	pmem = mem0;
274 int*	maxa;
275 int	nxdb = 0;
276 int	adb = 0;
277 
278 
279 	/* storage for information about the nonterminals */
280 
281 int**	pres[NNONTERM+2];  	/* vector of pointers to productions yielding each nonterminal */
282 Lkset*	pfirst[NNONTERM+2];	/* vector of pointers to first sets for each nonterminal */
283 int	pempty[NNONTERM+1];	/* vector of nonterminals nontrivially deriving e */
284 
285 	/* random stuff picked out from between functions */
286 
287 int	indebug = 0;
288 Wset*	zzcwp = wsets;
289 int	zzgoent = 0;
290 int	zzgobest = 0;
291 int	zzacent = 0;
292 int	zzexcp = 0;
293 int	zzclose = 0;
294 int	zzsrconf = 0;
295 int*	zzmemsz = mem0;
296 int	zzrrconf = 0;
297 int	pidebug = 0;		/* debugging flag for putitem */
298 int	gsdebug = 0;
299 int	cldebug = 0;		/* debugging flag for closure */
300 int	pkdebug = 0;
301 int	g2debug = 0;
302 
303 struct
304 {
305 	char*	name;
306 	long	value;
307 } resrv[] =
308 {
309 	"binary",	BINARY,
310 	"left",		LEFT,
311 	"nonassoc",	BINARY,
312 	"prec",		PREC,
313 	"right",	RIGHT,
314 	"start",	START,
315 	"term",		TERM,
316 	"token",	TERM,
317 	"type",		TYPEDEF,
318 	"union",	UNION,
319 	0,
320 };
321 
322 	/* define functions */
323 
324 void	main(int, char**);
325 void	others(void);
326 char*	chcopy(char*, char*);
327 char*	writem(int*);
328 char*	symnam(int);
329 void	summary(void);
330 void	error(char*, ...);
331 void	aryfil(int*, int, int);
332 int	setunion(int*, int*);
333 void	prlook(Lkset*);
334 void	cpres(void);
335 void	cpfir(void);
336 int	state(int);
337 void	putitem(int*, Lkset*);
338 void	cempty(void);
339 void	stagen(void);
340 void	closure(int);
341 Lkset*	flset(Lkset*);
342 void	cleantmp(void);
343 void	intr(void);
344 void	setup(int, char**);
345 void	finact(void);
346 int	defin(int, char*);
347 void	defout(int);
348 char*	cstash(char*);
349 long	gettok(void);
350 int	fdtype(int);
351 int	chfind(int, char*);
352 void	cpyunion(void);
353 void	cpycode(void);
354 int	skipcom(void);
355 void	cpyact(int);
356 void	openup(char*, int, int, int, char*);
357 void	output(void);
358 int	apack(int*, int);
359 void	go2out(void);
360 void	go2gen(int);
361 void	precftn(int, int, int);
362 void	wract(int);
363 void	wrstate(int);
364 void	warray(char*, int*, int);
365 void	hideprod(void);
366 void	callopt(void);
367 void	gin(int);
368 void	stin(int);
369 int	nxti(void);
370 void	osummary(void);
371 void	aoutput(void);
372 void	arout(char*, int*, int);
373 int	gtnm(void);
374 
375 void
376 main(int argc, char *argv[])
377 {
378 
379 	setup(argc, argv);	/* initialize and read productions */
380 	tbitset = NWORDS(ntokens);
381 	cpres();		/* make table of which productions yield a given nonterminal */
382 	cempty();		/* make a table of which nonterminals can match the empty string */
383 	cpfir();		/* make a table of firsts of nonterminals */
384 	stagen();		/* generate the states */
385 	output();		/* write the states and the tables */
386 	go2out();
387 	hideprod();
388 	summary();
389 	callopt();
390 	others();
391 	exits(0);
392 }
393 
394 /*
395  * put out other arrays, copy the parsers
396  */
397 void
398 others(void)
399 {
400 	int c, i, j;
401 
402 	finput = Bopen(parser, OREAD);
403 	if(finput == 0)
404 		error("cannot find parser %s", parser);
405 	warray("yyr1", levprd, nprod);
406 	aryfil(temp1, nprod, 0);
407 	PLOOP(1, i)
408 		temp1[i] = prdptr[i+1]-prdptr[i]-2;
409 	warray("yyr2", temp1, nprod);
410 
411 	aryfil(temp1, nstate, -1000);
412 	TLOOP(i)
413 		for(j=tstates[i]; j!=0; j=mstates[j])
414 			temp1[j] = i;
415 	NTLOOP(i)
416 		for(j=ntstates[i]; j!=0; j=mstates[j])
417 			temp1[j] = -i;
418 	warray("yychk", temp1, nstate);
419 	warray("yydef", defact, nstate);
420 
421 	/* put out token translation tables */
422 	/* table 1 has 0-256 */
423 	aryfil(temp1, 256, 0);
424 	c = 0;
425 	TLOOP(i) {
426 		j = tokset[i].value;
427 		if(j >= 0 && j < 256) {
428 			if(temp1[j]) {
429 				print("yacc bug -- cant have 2 different Ts with same value\n");
430 				print("	%s and %s\n", tokset[i].name, tokset[temp1[j]].name);
431 				nerrors++;
432 			}
433 			temp1[j] = i;
434 			if(j > c)
435 				c = j;
436 		}
437 	}
438 	warray("yytok1", temp1, c+1);
439 
440 	/* table 2 has PRIVATE-PRIVATE+256 */
441 	aryfil(temp1, 256, 0);
442 	c = 0;
443 	TLOOP(i) {
444 		j = tokset[i].value - PRIVATE;
445 		if(j >= 0 && j < 256) {
446 			if(temp1[j]) {
447 				print("yacc bug -- cant have 2 different Ts with same value\n");
448 				print("	%s and %s\n", tokset[i].name, tokset[temp1[j]].name);
449 				nerrors++;
450 			}
451 			temp1[j] = i;
452 			if(j > c)
453 				c = j;
454 		}
455 	}
456 	warray("yytok2", temp1, c+1);
457 
458 	/* table 3 has everything else */
459 	Bprint(ftable, "long	yytok3[] =\n{\n");
460 	c = 0;
461 	TLOOP(i) {
462 		j = tokset[i].value;
463 		if(j >= 0 && j < 256)
464 			continue;
465 		if(j >= PRIVATE && j < 256+PRIVATE)
466 			continue;
467 
468 		Bprint(ftable, "%4d,%4d,", j, i);
469 		c++;
470 		if(c%5 == 0)
471 			Bprint(ftable, "\n");
472 	}
473 	Bprint(ftable, "%4d\n};\n", 0);
474 
475 	/* copy parser text */
476 	while((c=Bgetrune(finput)) != Beof) {
477 		if(c == '$') {
478 			if((c = Bgetrune(finput)) != 'A')
479 				Bputrune(ftable, '$');
480 			else { /* copy actions */
481 				faction = Bopen(actname, OREAD);
482 				if(faction == 0)
483 					error("cannot reopen action tempfile");
484 				while((c=Bgetrune(faction)) != Beof)
485 					Bputrune(ftable, c);
486 				Bterm(faction);
487 				ZAPFILE(actname);
488 				c = Bgetrune(finput);
489 			}
490 		}
491 		Bputrune(ftable, c);
492 	}
493 	Bterm(ftable);
494 }
495 
496 /*
497  * copies string q into p, returning next free char ptr
498  */
499 char*
500 chcopy(char* p, char* q)
501 {
502 	int c;
503 
504 	while(c = *q) {
505 		if(c == '"')
506 			*p++ = '\\';
507 		*p++ = c;
508 		q++;
509 	}
510 	*p = 0;
511 	return p;
512 }
513 
514 /*
515  * creates output string for item pointed to by pp
516  */
517 char*
518 writem(int *pp)
519 {
520 	int i,*p;
521 	static char sarr[ISIZE];
522 	char* q;
523 
524 	for(p=pp; *p>0; p++)
525 		;
526 	p = prdptr[-*p];
527 	q = chcopy(sarr, nontrst[*p-NTBASE].name);
528 	q = chcopy(q, ": ");
529 	for(;;) {
530 		*q = ' ';
531 		p++;
532 		if(p == pp)
533 			*q = '.';
534 		q++;
535 		*q = '\0';
536 		i = *p;
537 		if(i <= 0)
538 			break;
539 		q = chcopy(q, symnam(i));
540 		if(q > &sarr[ISIZE-30])
541 			error("item too big");
542 	}
543 
544 	/* an item calling for a reduction */
545 	i = *pp;
546 	if(i < 0 ) {
547 		q = chcopy(q, "    (");
548 		sprint(q, "%d)", -i);
549 	}
550 	return sarr;
551 }
552 
553 /*
554  * return a pointer to the name of symbol i
555  */
556 char*
557 symnam(int i)
558 {
559 	char* cp;
560 
561 	cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
562 	if(*cp == ' ')
563 		cp++;
564 	return cp;
565 }
566 
567 /*
568  * output the summary on y.output
569  */
570 void
571 summary(void)
572 {
573 
574 	if(foutput != 0) {
575 		Bprint(foutput, "\n%d/%d terminals, %d/%d nonterminals\n",
576 			ntokens, NTERMS, nnonter, NNONTERM);
577 		Bprint(foutput, "%d/%d grammar rules, %d/%d states\n",
578 			nprod, NPROD, nstate, NSTATES);
579 		Bprint(foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n",
580 			zzsrconf, zzrrconf);
581 		Bprint(foutput, "%d/%d working sets used\n",
582 			(int)(zzcwp-wsets), WSETSIZE);
583 		Bprint(foutput, "memory: states,etc. %d/%d, parser %d/%d\n",
584 			(int)(zzmemsz-mem0), MEMSIZE, (int)(memp-amem), ACTSIZE);
585 		Bprint(foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE);
586 		Bprint(foutput, "%d extra closures\n", zzclose - 2*nstate);
587 		Bprint(foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp);
588 		Bprint(foutput, "%d goto entries\n", zzgoent);
589 		Bprint(foutput, "%d entries saved by goto default\n", zzgobest);
590 	}
591 	if(zzsrconf != 0 || zzrrconf != 0) {
592 		print("\nconflicts: ");
593 		if(zzsrconf)
594 			print("%d shift/reduce", zzsrconf);
595 		if(zzsrconf && zzrrconf)
596 			print(", ");
597 		if(zzrrconf)
598 			print("%d reduce/reduce", zzrrconf);
599 		print("\n");
600 	}
601 	if(ftemp != 0) {
602 		Bterm(ftemp);
603 		ftemp = 0;
604 	}
605 	if(fdefine != 0) {
606 		Bterm(fdefine);
607 		fdefine = 0;
608 	}
609 }
610 
611 /*
612  * write out error comment -- NEEDS WORK
613  */
614 void
615 error(char *s, ...)
616 {
617 
618 	nerrors++;
619 	fprint(2, "\n fatal error:");
620 	fprint(2, s, (&s)[1]);
621 	fprint(2, ", %s:%d\n", infile, lineno);
622 	if(!fatfl)
623 		return;
624 	summary();
625 	cleantmp();
626 	exits("error");
627 }
628 
629 /*
630  * set elements 0 through n-1 to c
631  */
632 void
633 aryfil(int *v, int n, int c)
634 {
635 	int i;
636 
637 	for(i=0; i<n; i++)
638 		v[i] = c;
639 }
640 
641 /*
642  * set a to the union of a and b
643  * return 1 if b is not a subset of a, 0 otherwise
644  */
645 int
646 setunion(int *a, int *b)
647 {
648 	int i, x, sub;
649 
650 	sub = 0;
651 	SETLOOP(i) {
652 		x = *a;
653 		*a |= *b;
654 		if(*a != x)
655 			sub = 1;
656 		a++;
657 		b++;
658 	}
659 	return sub;
660 }
661 
662 void
663 prlook(Lkset* p)
664 {
665 	int j, *pp;
666 
667 	pp = p->lset;
668 	if(pp == 0)
669 		Bprint(foutput, "\tNULL");
670 	else {
671 		Bprint(foutput, " { ");
672 		TLOOP(j)
673 			if(BIT(pp,j))
674 				Bprint(foutput, "%s ", symnam(j));
675 		Bprint(foutput, "}");
676 	}
677 }
678 
679 /*
680  * compute an array with the beginnings of  productions yielding given nonterminals
681  * The array pres points to these lists
682  * the array pyield has the lists: the total size is only NPROD+1
683  */
684 void
685 cpres(void)
686 {
687 	int c, j, i, **pmem;
688 	static int *pyield[NPROD];
689 
690 	pmem = pyield;
691 	NTLOOP(i) {
692 		c = i+NTBASE;
693 		pres[i] = pmem;
694 		fatfl = 0;  	/* make undefined  symbols  nonfatal */
695 		PLOOP(0, j)
696 			if(*prdptr[j] == c)
697 				*pmem++ =  prdptr[j]+1;
698 		if(pres[i] == pmem)
699 			error("nonterminal %s not defined!", nontrst[i].name);
700 	}
701 	pres[i] = pmem;
702 	fatfl = 1;
703 	if(nerrors) {
704 		summary();
705 		cleantmp();
706 		exits("error");
707 	}
708 	if(pmem != &pyield[nprod])
709 		error("internal Yacc error: pyield %d", pmem-&pyield[nprod]);
710 }
711 
712 /*
713  * compute an array with the first of nonterminals
714  */
715 void
716 cpfir(void)
717 {
718 	int *p, **s, i, **t, ch, changes;
719 
720 	zzcwp = &wsets[nnonter];
721 	NTLOOP(i) {
722 		aryfil(wsets[i].ws.lset, tbitset, 0);
723 		t = pres[i+1];
724 		/* initially fill the sets */
725 		for(s=pres[i]; s<t; ++s)
726 			for(p = *s; (ch = *p) > 0; ++p) {
727 				if(ch < NTBASE) {
728 					SETBIT(wsets[i].ws.lset, ch);
729 					break;
730 				}
731 				if(!pempty[ch-NTBASE])
732 					break;
733 			}
734 	}
735 
736 	/* now, reflect transitivity */
737 	changes = 1;
738 	while(changes) {
739 		changes = 0;
740 		NTLOOP(i) {
741 			t = pres[i+1];
742 			for(s = pres[i]; s < t; ++s)
743 				for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) {
744 					changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset);
745 					if(!pempty[ch])
746 						break;
747 				}
748 		}
749 	}
750 
751 	NTLOOP(i)
752 		pfirst[i] = flset(&wsets[i].ws);
753 	if(!indebug)
754 		return;
755 	if(foutput != 0)
756 		NTLOOP(i) {
757 			Bprint(foutput, "\n%s: ", nontrst[i].name);
758 			prlook(pfirst[i]);
759 			Bprint(foutput, " %d\n", pempty[i]);
760 		}
761 }
762 
763 /*
764  * sorts last state,and sees if it equals earlier ones. returns state number
765  */
766 int
767 state(int c)
768 {
769 	Item *p1, *p2, *k, *l, *q1, *q2;
770 	int size1, size2, i;
771 
772 	p1 = pstate[nstate];
773 	p2 = pstate[nstate+1];
774 	if(p1 == p2)
775 		return 0;	/* null state */
776 	/* sort the items */
777 	for(k = p2-1; k > p1; k--)	/* make k the biggest */
778 		for(l = k-1; l >= p1; --l)
779 			if(l->pitem > k->pitem) {
780 				int *s;
781 				Lkset *ss;
782 
783 				s = k->pitem;
784 				k->pitem = l->pitem;
785 				l->pitem = s;
786 				ss = k->look;
787 				k->look = l->look;
788 				l->look = ss;
789 			}
790 	size1 = p2 - p1;	/* size of state */
791 
792 	for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) {
793 		/* get ith state */
794 		q1 = pstate[i];
795 		q2 = pstate[i+1];
796 		size2 = q2 - q1;
797 		if(size1 != size2)
798 			continue;
799 		k = p1;
800 		for(l = q1; l < q2; l++) {
801 			if(l->pitem != k->pitem)
802 				break;
803 			k++;
804 		}
805 		if(l != q2)
806 			continue;
807 		/* found it */
808 		pstate[nstate+1] = pstate[nstate];	/* delete last state */
809 		/* fix up lookaheads */
810 		if(nolook)
811 			return i;
812 		for(l = q1, k = p1; l < q2; ++l, ++k ) {
813 			int s;
814 
815 			SETLOOP(s)
816 				clset.lset[s] = l->look->lset[s];
817 			if(setunion(clset.lset, k->look->lset)) {
818 				tystate[i] = MUSTDO;
819 				/* register the new set */
820 				l->look = flset( &clset );
821 			}
822 		}
823 		return i;
824 	}
825 	/* state is new */
826 	if(nolook)
827 		error("yacc state/nolook error");
828 	pstate[nstate+2] = p2;
829 	if(nstate+1 >= NSTATES)
830 		error("too many states");
831 	if(c >= NTBASE) {
832 		mstates[nstate] = ntstates[c-NTBASE];
833 		ntstates[c-NTBASE] = nstate;
834 	} else {
835 		mstates[nstate] = tstates[c];
836 		tstates[c] = nstate;
837 	}
838 	tystate[nstate] = MUSTDO;
839 	return nstate++;
840 }
841 
842 void
843 putitem(int *ptr, Lkset *lptr)
844 {
845 	Item *j;
846 
847 	if(pidebug && foutput != 0)
848 		Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate);
849 	j = pstate[nstate+1];
850 	j->pitem = ptr;
851 	if(!nolook)
852 		j->look = flset(lptr);
853 	pstate[nstate+1] = ++j;
854 	if((int*)j > zzmemsz) {
855 		zzmemsz = (int*)j;
856 		if(zzmemsz >=  &mem0[MEMSIZE])
857 			error("out of state space");
858 	}
859 }
860 
861 /*
862  * mark nonterminals which derive the empty string
863  * also, look for nonterminals which don't derive any token strings
864  */
865 void
866 cempty(void)
867 {
868 
869 	int i, *p;
870 
871 	/* first, use the array pempty to detect productions that can never be reduced */
872 	/* set pempty to WHONOWS */
873 	aryfil(pempty, nnonter+1, WHOKNOWS);
874 
875 	/* now, look at productions, marking nonterminals which derive something */
876 more:
877 	PLOOP(0, i) {
878 		if(pempty[*prdptr[i] - NTBASE])
879 			continue;
880 		for(p = prdptr[i]+1; *p >= 0; ++p)
881 			if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS)
882 				break;
883 		/* production can be derived */
884 		if(*p < 0) {
885 			pempty[*prdptr[i]-NTBASE] = OK;
886 			goto more;
887 		}
888 	}
889 
890 	/* now, look at the nonterminals, to see if they are all OK */
891 	NTLOOP(i) {
892 		/* the added production rises or falls as the start symbol ... */
893 		if(i == 0)
894 			continue;
895 		if(pempty[i] != OK) {
896 			fatfl = 0;
897 			error("nonterminal %s never derives any token string", nontrst[i].name);
898 		}
899 	}
900 
901 	if(nerrors) {
902 		summary();
903 		cleantmp();
904 		exits("error");
905 	}
906 
907 	/* now, compute the pempty array, to see which nonterminals derive the empty string */
908 	/* set pempty to WHOKNOWS */
909 	aryfil( pempty, nnonter+1, WHOKNOWS);
910 
911 	/* loop as long as we keep finding empty nonterminals */
912 
913 again:
914 	PLOOP(1, i) {
915 		/* not known to be empty */
916 		if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) {
917 			for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p)
918 				;
919 			/* we have a nontrivially empty nonterminal */
920 			if(*p < 0) {
921 				pempty[*prdptr[i]-NTBASE] = EMPTY;
922 				/* got one ... try for another */
923 				goto again;
924 			}
925 		}
926 	}
927 }
928 
929 /*
930  * generate the states
931  */
932 void
933 stagen(void)
934 {
935 
936 	int c, i, j, more;
937 	Wset *p, *q;
938 
939 	/* initialize */
940 	nstate = 0;
941 
942 	/* THIS IS FUNNY from the standpoint of portability
943 	 * it represents the magic moment when the mem0 array, which has
944 	 * been holding the productions, starts to hold item pointers, of a
945 	 * different type...
946 	 * someday, alloc should be used to allocate all this stuff... for now, we
947 	 * accept that if pointers don't fit in integers, there is a problem...
948 	 */
949 
950 	pstate[0] = pstate[1] = (Item*)mem;
951 	aryfil(clset.lset, tbitset, 0);
952 	putitem(prdptr[0]+1, &clset);
953 	tystate[0] = MUSTDO;
954 	nstate = 1;
955 	pstate[2] = pstate[1];
956 
957 	aryfil(amem, ACTSIZE, 0);
958 
959 	/* now, the main state generation loop */
960 	for(more=1; more;) {
961 		more = 0;
962 		SLOOP(i) {
963 			if(tystate[i] != MUSTDO)
964 				continue;
965 			tystate[i] = DONE;
966 			aryfil(temp1, nnonter+1, 0);
967 			/* take state i, close it, and do gotos */
968 			closure(i);
969 			/* generate goto's */
970 			WSLOOP(wsets, p) {
971 				if(p->flag)
972 					continue;
973 				p->flag = 1;
974 				c = *(p->pitem);
975 				if(c <= 1) {
976 					if(pstate[i+1]-pstate[i] <= p-wsets)
977 						tystate[i] = MUSTLOOKAHEAD;
978 					continue;
979 				}
980 				/* do a goto on c */
981 				WSLOOP(p, q)
982 					/* this item contributes to the goto */
983 					if(c == *(q->pitem)) {
984 						putitem(q->pitem+1, &q->ws);
985 						q->flag = 1;
986 					}
987 				if(c < NTBASE)
988 					state(c);	/* register new state */
989 				else
990 					temp1[c-NTBASE] = state(c);
991 			}
992 			if(gsdebug && foutput != 0) {
993 				Bprint(foutput, "%d: ", i);
994 				NTLOOP(j)
995 					if(temp1[j])
996 						Bprint(foutput, "%s %d, ",
997 						nontrst[j].name, temp1[j]);
998 				Bprint(foutput, "\n");
999 			}
1000 			indgo[i] = apack(&temp1[1], nnonter-1) - 1;
1001 			/* do some more */
1002 			more = 1;
1003 		}
1004 	}
1005 }
1006 
1007 /*
1008  * generate the closure of state i
1009  */
1010 void
1011 closure(int i)
1012 {
1013 
1014 	Wset *u, *v;
1015 	Item *p, *q;
1016 	int c, ch, work, k, *pi, **s, **t;
1017 
1018 	zzclose++;
1019 
1020 	/* first, copy kernel of state i to wsets */
1021 	cwp = wsets;
1022 	ITMLOOP(i, p, q) {
1023 		cwp->pitem = p->pitem;
1024 		cwp->flag = 1;			/* this item must get closed */
1025 		SETLOOP(k)
1026 			cwp->ws.lset[k] = p->look->lset[k];
1027 		WSBUMP(cwp);
1028 	}
1029 
1030 	/* now, go through the loop, closing each item */
1031 	work = 1;
1032 	while(work) {
1033 		work = 0;
1034 		WSLOOP(wsets, u) {
1035 			if(u->flag == 0)
1036 				continue;
1037 			/* dot is before c */
1038 			c = *(u->pitem);
1039 			if(c < NTBASE) {
1040 				u->flag = 0;
1041 				/* only interesting case is where . is before nonterminal */
1042 				continue;
1043 			}
1044 
1045 			/* compute the lookahead */
1046 			aryfil(clset.lset, tbitset, 0);
1047 
1048 			/* find items involving c */
1049 			WSLOOP(u, v)
1050 				if(v->flag == 1 && *(pi=v->pitem) == c) {
1051 					v->flag = 0;
1052 					if(nolook)
1053 						continue;
1054 					while((ch = *++pi) > 0) {
1055 						/* terminal symbol */
1056 						if(ch < NTBASE) {
1057 							SETBIT(clset.lset, ch);
1058 							break;
1059 						}
1060 						/* nonterminal symbol */
1061 						setunion(clset.lset, pfirst[ch-NTBASE]->lset);
1062 						if(!pempty[ch-NTBASE])
1063 							break;
1064 					}
1065 					if(ch <= 0)
1066 						setunion(clset.lset, v->ws.lset);
1067 				}
1068 
1069 			/*
1070 			 * now loop over productions derived from c
1071 			 * c is now nonterminal number
1072 			 */
1073 			c -= NTBASE;
1074 			t = pres[c+1];
1075 			for(s = pres[c]; s < t; ++s) {
1076 				/*
1077 				 * put these items into the closure
1078 				 * is the item there
1079 				 */
1080 				WSLOOP(wsets, v)
1081 					/* yes, it is there */
1082 					if(v->pitem == *s) {
1083 						if(nolook)
1084 							goto nexts;
1085 						if(setunion(v->ws.lset, clset.lset))
1086 							v->flag = work = 1;
1087 						goto nexts;
1088 					}
1089 
1090 				/*  not there; make a new entry */
1091 				if(cwp-wsets+1 >= WSETSIZE)
1092 					error( "working set overflow");
1093 				cwp->pitem = *s;
1094 				cwp->flag = 1;
1095 				if(!nolook) {
1096 					work = 1;
1097 					SETLOOP(k) cwp->ws.lset[k] = clset.lset[k];
1098 				}
1099 				WSBUMP(cwp);
1100 
1101 			nexts:;
1102 			}
1103 		}
1104 	}
1105 
1106 	/* have computed closure; flags are reset; return */
1107 	if(cwp > zzcwp)
1108 		zzcwp = cwp;
1109 	if(cldebug && foutput != 0) {
1110 		Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook);
1111 		WSLOOP(wsets, u) {
1112 			if(u->flag)
1113 				Bprint(foutput, "flag set!\n");
1114 			u->flag = 0;
1115 			Bprint(foutput, "\t%s", writem(u->pitem));
1116 			prlook(&u->ws);
1117 			Bprint(foutput, "\n");
1118 		}
1119 	}
1120 }
1121 
1122 /*
1123  * decide if the lookahead set pointed to by p is known
1124  * return pointer to a perminent location for the set
1125  */
1126 Lkset*
1127 flset(Lkset *p)
1128 {
1129 	Lkset *q;
1130 	int *u, *v, *w, j;
1131 
1132 	for(q = &lkst[nlset]; q-- > lkst;) {
1133 		u = p->lset;
1134 		v = q->lset;
1135 		w = &v[tbitset];
1136 		while(v < w)
1137 			if(*u++ != *v++)
1138 				goto more;
1139 		/* we have matched */
1140 		return q;
1141 	more:;
1142 	}
1143 	/* add a new one */
1144 	q = &lkst[nlset++];
1145 	if(nlset >= LSETSIZE)
1146 		error("too many lookahead sets");
1147 	SETLOOP(j)
1148 		q->lset[j] = p->lset[j];
1149 	return q;
1150 }
1151 
1152 void
1153 cleantmp(void)
1154 {
1155 	ZAPFILE(actname);
1156 	ZAPFILE(tempname);
1157 }
1158 
1159 void
1160 intr(void)
1161 {
1162 	cleantmp();
1163 	exits("interrupted");
1164 }
1165 
1166 void
1167 setup(int argc, char *argv[])
1168 {
1169 	long c, t;
1170 	int i, j, lev, ty, ytab, *p;
1171 	int vflag, dflag, stem;
1172 	char actnm[8], *stemc, *s, dirbuf[128];
1173 
1174 	ytab = 0;
1175 	vflag = 0;
1176 	dflag = 0;
1177 	stem = 0;
1178 	stemc = "y";
1179 	foutput = 0;
1180 	fdefine = 0;
1181 	fdebug = 0;
1182 	ARGBEGIN{
1183 	case 'v':
1184 	case 'V':
1185 		vflag++;
1186 		break;
1187 	case 'D':
1188 		yydebug = ARGF();
1189 		break;
1190 	case 'd':
1191 		dflag++;
1192 		break;
1193 	case 'o':
1194 		ytab++;
1195 		ytabc = ARGF();
1196 		break;
1197 	case 's':
1198 		stem++;
1199 		stemc = ARGF();
1200 		break;
1201 	case 'S':
1202 		parser = PARSERS;
1203 		break;
1204 	default:
1205 		error("illegal option: %c", ARGC());
1206 	}ARGEND
1207 	openup(stemc, dflag, vflag, ytab, ytabc);
1208 
1209 	ftemp = Bopen(tempname = mktemp(ttempname), OWRITE);
1210 	faction = Bopen(actname = mktemp(tactname), OWRITE);
1211 	if(ftemp == 0 || faction == 0)
1212 		error("cannot open temp file");
1213 	if(argc < 1)
1214 		error("no input file");
1215 	infile = argv[0];
1216 	if(infile[0] != '/' && getwd(dirbuf, sizeof dirbuf)!=nil){
1217 		i = strlen(infile)+1+strlen(dirbuf)+1+10;
1218 		s = malloc(i);
1219 		if(s != nil){
1220 			snprint(s, i, "%s/%s", dirbuf, infile);
1221 			cleanname(s);
1222 			infile = s;
1223 		}
1224 	}
1225 	finput = Bopen(infile, OREAD);
1226 	if(finput == 0)
1227 		error("cannot open '%s'", argv[0]);
1228 	cnamp = cnames;
1229 
1230 	defin(0, "$end");
1231 	extval = PRIVATE;	/* tokens start in unicode 'private use' */
1232 	defin(0, "error");
1233 	defin(1, "$accept");
1234 	defin(0, "$unk");
1235 	mem = mem0;
1236 	i = 0;
1237 
1238 	for(t = gettok(); t != MARK && t != ENDFILE;)
1239 	switch(t) {
1240 	case ';':
1241 		t = gettok();
1242 		break;
1243 
1244 	case START:
1245 		if(gettok() != IDENTIFIER)
1246 			error("bad %%start construction");
1247 		start = chfind(1, tokname);
1248 		t = gettok();
1249 		continue;
1250 
1251 	case TYPEDEF:
1252 		if(gettok() != TYPENAME)
1253 			error("bad syntax in %%type");
1254 		ty = numbval;
1255 		for(;;) {
1256 			t = gettok();
1257 			switch(t) {
1258 			case IDENTIFIER:
1259 				if((t=chfind(1, tokname)) < NTBASE) {
1260 					j = TYPE(toklev[t]);
1261 					if(j != 0 && j != ty)
1262 						error("type redeclaration of token %s",
1263 							tokset[t].name);
1264 					else
1265 						SETTYPE(toklev[t], ty);
1266 				} else {
1267 					j = nontrst[t-NTBASE].value;
1268 					if(j != 0 && j != ty)
1269 						error("type redeclaration of nonterminal %s",
1270 							nontrst[t-NTBASE].name );
1271 					else
1272 						nontrst[t-NTBASE].value = ty;
1273 				}
1274 			case ',':
1275 				continue;
1276 			case ';':
1277 				t = gettok();
1278 			default:
1279 				break;
1280 			}
1281 			break;
1282 		}
1283 		continue;
1284 
1285 	case UNION:
1286 		/* copy the union declaration to the output */
1287 		cpyunion();
1288 		t = gettok();
1289 		continue;
1290 
1291 	case LEFT:
1292 	case BINARY:
1293 	case RIGHT:
1294 		i++;
1295 
1296 	case TERM:
1297 		/* nonzero means new prec. and assoc. */
1298 		lev = t-TERM;
1299 		ty = 0;
1300 
1301 		/* get identifiers so defined */
1302 		t = gettok();
1303 
1304 		/* there is a type defined */
1305 		if(t == TYPENAME) {
1306 			ty = numbval;
1307 			t = gettok();
1308 		}
1309 		for(;;) {
1310 			switch(t) {
1311 			case ',':
1312 				t = gettok();
1313 				continue;
1314 
1315 			case ';':
1316 				break;
1317 
1318 			case IDENTIFIER:
1319 				j = chfind(0, tokname);
1320 				if(j >= NTBASE)
1321 					error("%s defined earlier as nonterminal", tokname);
1322 				if(lev) {
1323 					if(ASSOC(toklev[j]))
1324 						error("redeclaration of precedence of %s", tokname);
1325 					SETASC(toklev[j], lev);
1326 					SETPLEV(toklev[j], i);
1327 				}
1328 				if(ty) {
1329 					if(TYPE(toklev[j]))
1330 						error("redeclaration of type of %s", tokname);
1331 					SETTYPE(toklev[j],ty);
1332 				}
1333 				t = gettok();
1334 				if(t == NUMBER) {
1335 					tokset[j].value = numbval;
1336 					if(j < ndefout && j > 3)
1337 						error("please define type number of %s earlier",
1338 							tokset[j].name);
1339 					t = gettok();
1340 				}
1341 				continue;
1342 			}
1343 			break;
1344 		}
1345 		continue;
1346 
1347 	case LCURLY:
1348 		defout(0);
1349 		cpycode();
1350 		t = gettok();
1351 		continue;
1352 
1353 	default:
1354 		error("syntax error");
1355 	}
1356 	if(t == ENDFILE)
1357 		error("unexpected EOF before %%");
1358 
1359 	/* t is MARK */
1360 	Bprint(ftable, "extern	int	yyerrflag;\n");
1361 	Bprint(ftable, "#ifndef	YYMAXDEPTH\n");
1362 	Bprint(ftable, "#define	YYMAXDEPTH	150\n");
1363 	Bprint(ftable, "#endif\n" );
1364 	if(!ntypes) {
1365 		Bprint(ftable, "#ifndef	YYSTYPE\n");
1366 		Bprint(ftable, "#define	YYSTYPE	int\n");
1367 		Bprint(ftable, "#endif\n");
1368 	}
1369 	Bprint(ftable, "YYSTYPE	yylval;\n");
1370 	Bprint(ftable, "YYSTYPE	yyval;\n");
1371 
1372 	prdptr[0] = mem;
1373 
1374 	/* added production */
1375 	*mem++ = NTBASE;
1376 
1377 	/* if start is 0, we will overwrite with the lhs of the first rule */
1378 	*mem++ = start;
1379 	*mem++ = 1;
1380 	*mem++ = 0;
1381 	prdptr[1] = mem;
1382 	while((t=gettok()) == LCURLY)
1383 		cpycode();
1384 	if(t != IDENTCOLON)
1385 		error("bad syntax on first rule");
1386 
1387 	if(!start)
1388 		prdptr[0][1] = chfind(1, tokname);
1389 
1390 	/* read rules */
1391 	while(t != MARK && t != ENDFILE) {
1392 		/* process a rule */
1393 		rlines[nprod] = lineno;
1394 		if(t == '|')
1395 			*mem++ = *prdptr[nprod-1];
1396 		else
1397 			if(t == IDENTCOLON) {
1398 				*mem = chfind(1, tokname);
1399 				if(*mem < NTBASE)
1400 					error("token illegal on LHS of grammar rule");
1401 				mem++;
1402 			} else
1403 				error("illegal rule: missing semicolon or | ?");
1404 		/* read rule body */
1405 		t = gettok();
1406 
1407 	more_rule:
1408 		while(t == IDENTIFIER) {
1409 			*mem = chfind(1, tokname);
1410 			if(*mem < NTBASE)
1411 				levprd[nprod] = toklev[*mem];
1412 			mem++;
1413 			t = gettok();
1414 		}
1415 		if(t == PREC) {
1416 			if(gettok() != IDENTIFIER)
1417 				error("illegal %%prec syntax");
1418 			j = chfind(2, tokname);
1419 			if(j >= NTBASE)
1420 				error("nonterminal %s illegal after %%prec",
1421 					nontrst[j-NTBASE].name);
1422 			levprd[nprod] = toklev[j];
1423 			t = gettok();
1424 		}
1425 		if(t == '=') {
1426 			levprd[nprod] |= ACTFLAG;
1427 			Bprint(faction, "\ncase %d:", nprod);
1428 			cpyact(mem-prdptr[nprod]-1);
1429 			Bprint(faction, " break;");
1430 			if((t=gettok()) == IDENTIFIER) {
1431 
1432 				/* action within rule... */
1433 				sprint(actnm, "$$%d", nprod);
1434 
1435 				/* make it a nonterminal */
1436 				j = chfind(1, actnm);
1437 
1438 				/*
1439 				 * the current rule will become rule number nprod+1
1440 				 * move the contents down, and make room for the null
1441 				 */
1442 				for(p = mem; p >= prdptr[nprod]; --p)
1443 					p[2] = *p;
1444 				mem += 2;
1445 
1446 				/* enter null production for action */
1447 				p = prdptr[nprod];
1448 				*p++ = j;
1449 				*p++ = -nprod;
1450 
1451 				/* update the production information */
1452 				levprd[nprod+1] = levprd[nprod] & ~ACTFLAG;
1453 				levprd[nprod] = ACTFLAG;
1454 				if(++nprod >= NPROD)
1455 					error("more than %d rules", NPROD);
1456 				prdptr[nprod] = p;
1457 
1458 				/* make the action appear in the original rule */
1459 				*mem++ = j;
1460 
1461 				/* get some more of the rule */
1462 				goto more_rule;
1463 			}
1464 		}
1465 
1466 		while(t == ';')
1467 			t = gettok();
1468 		*mem++ = -nprod;
1469 
1470 		/* check that default action is reasonable */
1471 		if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].value) {
1472 
1473 			/* no explicit action, LHS has value */
1474 			int tempty;
1475 
1476 			tempty = prdptr[nprod][1];
1477 			if(tempty < 0)
1478 				error("must return a value, since LHS has a type");
1479 			else
1480 				if(tempty >= NTBASE)
1481 					tempty = nontrst[tempty-NTBASE].value;
1482 				else
1483 					tempty = TYPE(toklev[tempty]);
1484 			if(tempty != nontrst[*prdptr[nprod]-NTBASE].value)
1485 				error("default action causes potential type clash");
1486 		}
1487 		nprod++;
1488 		if(nprod >= NPROD)
1489 			error("more than %d rules", NPROD);
1490 		prdptr[nprod] = mem;
1491 		levprd[nprod] = 0;
1492 	}
1493 
1494 	/* end of all rules */
1495 	defout(1);
1496 
1497 	finact();
1498 	if(t == MARK) {
1499 		Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1500 		while((c=Bgetrune(finput)) != Beof)
1501 			Bputrune(ftable, c);
1502 	}
1503 	Bterm(finput);
1504 }
1505 
1506 /*
1507  * finish action routine
1508  */
1509 void
1510 finact(void)
1511 {
1512 
1513 	Bterm(faction);
1514 	Bprint(ftable, "#define YYEOFCODE %d\n", 1);
1515 	Bprint(ftable, "#define YYERRCODE %d\n", 2);
1516 }
1517 
1518 /*
1519  * define s to be a terminal if t=0
1520  * or a nonterminal if t=1
1521  */
1522 int
1523 defin(int nt, char *s)
1524 {
1525 	int val;
1526 	Rune rune;
1527 
1528 	val = 0;
1529 	if(nt) {
1530 		nnonter++;
1531 		if(nnonter >= NNONTERM)
1532 			error("too many nonterminals, limit %d",NNONTERM);
1533 		nontrst[nnonter].name = cstash(s);
1534 		return NTBASE + nnonter;
1535 	}
1536 
1537 	/* must be a token */
1538 	ntokens++;
1539 	if(ntokens >= NTERMS)
1540 		error("too many terminals, limit %d", NTERMS);
1541 	tokset[ntokens].name = cstash(s);
1542 
1543 	/* establish value for token */
1544 	/* single character literal */
1545 	if(s[0] == ' ') {
1546 		val = chartorune(&rune, &s[1]);
1547 		if(s[val+1] == 0) {
1548 			val = rune;
1549 			goto out;
1550 		}
1551 	}
1552 
1553 	/* escape sequence */
1554 	if(s[0] == ' ' && s[1] == '\\') {
1555 		if(s[3] == 0) {
1556 			/* single character escape sequence */
1557 			switch(s[2]) {
1558 			case 'n':	val = '\n'; break;
1559 			case 'r':	val = '\r'; break;
1560 			case 'b':	val = '\b'; break;
1561 			case 't':	val = '\t'; break;
1562 			case 'f':	val = '\f'; break;
1563 			case '\'':	val = '\''; break;
1564 			case '"':	val = '"'; break;
1565 			case '\\':	val = '\\'; break;
1566 			default:	error("invalid escape");
1567 			}
1568 			goto out;
1569 		}
1570 
1571 		/* \nnn sequence */
1572 		if(s[2] >= '0' && s[2] <= '7') {
1573 			if(s[3] < '0' ||
1574 			   s[3] > '7' ||
1575 			   s[4] < '0' ||
1576 			   s[4] > '7' ||
1577 			   s[5] != 0)
1578 				error("illegal \\nnn construction");
1579 			val = 64*s[2] + 8*s[3] + s[4] - 73*'0';
1580 			if(val == 0)
1581 				error("'\\000' is illegal");
1582 			goto out;
1583 		}
1584 		error("unknown escape");
1585 	}
1586 	val = extval++;
1587 
1588 out:
1589 	tokset[ntokens].value = val;
1590 	toklev[ntokens] = 0;
1591 	return ntokens;
1592 }
1593 
1594 /*
1595  * write out the defines (at the end of the declaration section)
1596  */
1597 void
1598 defout(int last)
1599 {
1600 	int i, c;
1601 	char sar[NAMESIZE+10];
1602 
1603 	for(i=ndefout; i<=ntokens; i++) {
1604 		/* non-literals */
1605 		c = tokset[i].name[0];
1606 		if(c != ' ' && c != '$') {
1607 			Bprint(ftable, "#define	%s	%d\n",
1608 				tokset[i].name, tokset[i].value);
1609 			if(fdefine)
1610 				Bprint(fdefine, "#define\t%s\t%d\n",
1611 					tokset[i].name, tokset[i].value);
1612 		}
1613 	}
1614 	ndefout = ntokens+1;
1615 	if(last && fdebug) {
1616 		Bprint(fdebug, "char*	yytoknames[] =\n{\n");
1617 		TLOOP(i) {
1618 			if(tokset[i].name) {
1619 				chcopy(sar, tokset[i].name);
1620 				Bprint(fdebug, "\t\"%s\",\n", sar);
1621 				continue;
1622 			}
1623 			Bprint(fdebug, "\t0,\n");
1624 		}
1625 		Bprint(fdebug, "};\n");
1626 	}
1627 }
1628 
1629 char*
1630 cstash(char *s)
1631 {
1632 	char *temp;
1633 
1634 	temp = cnamp;
1635 	do {
1636 		if(cnamp >= &cnames[cnamsz])
1637 			error("too many characters in id's and literals");
1638 		else
1639 			*cnamp++ = *s;
1640 	} while(*s++);
1641 	return temp;
1642 }
1643 
1644 long
1645 gettok(void)
1646 {
1647 	long c;
1648 	Rune rune;
1649 	int i, base, match, reserve;
1650 	static int peekline;
1651 
1652 begin:
1653 	reserve = 0;
1654 	lineno += peekline;
1655 	peekline = 0;
1656 	c = Bgetrune(finput);
1657 	while(c == ' ' || c == '\n' || c == '\t' || c == '\f') {
1658 		if(c == '\n')
1659 			lineno++;
1660 		c = Bgetrune(finput);
1661 	}
1662 
1663 	/* skip comment */
1664 	if(c == '/') {
1665 		lineno += skipcom();
1666 		goto begin;
1667 	}
1668 	switch(c) {
1669 	case Beof:
1670 		return ENDFILE;
1671 
1672 	case '{':
1673 		Bungetrune(finput);
1674 		return '=';
1675 
1676 	case '<':
1677 		/* get, and look up, a type name (union member name) */
1678 		i = 0;
1679 		while((c=Bgetrune(finput)) != '>' && c >= 0 && c != '\n') {
1680 			rune = c;
1681 			c = runetochar(&tokname[i], &rune);
1682 			if(i < NAMESIZE)
1683 				i += c;
1684 		}
1685 		if(c != '>')
1686 			error("unterminated < ... > clause");
1687 		tokname[i] = 0;
1688 		for(i=1; i<=ntypes; i++)
1689 			if(!strcmp(typeset[i], tokname)) {
1690 				numbval = i;
1691 				return TYPENAME;
1692 			}
1693 		ntypes++;
1694 		numbval = ntypes;
1695 		typeset[numbval] = cstash(tokname);
1696 		return TYPENAME;
1697 
1698 	case '"':
1699 	case '\'':
1700 		match = c;
1701 		tokname[0] = ' ';
1702 		i = 1;
1703 		for(;;) {
1704 			c = Bgetrune(finput);
1705 			if(c == '\n' || c <= 0)
1706 				error("illegal or missing ' or \"" );
1707 			if(c == '\\') {
1708 				tokname[i] = '\\';
1709 				if(i < NAMESIZE)
1710 					i++;
1711 				c = Bgetrune(finput);
1712 			} else
1713 				if(c == match)
1714 					break;
1715 			rune = c;
1716 			c = runetochar(&tokname[i], &rune);
1717 			if(i < NAMESIZE)
1718 				i += c;
1719 		}
1720 		break;
1721 
1722 	case '%':
1723 	case '\\':
1724 		switch(c = Bgetrune(finput)) {
1725 		case '0':	return TERM;
1726 		case '<':	return LEFT;
1727 		case '2':	return BINARY;
1728 		case '>':	return RIGHT;
1729 		case '%':
1730 		case '\\':	return MARK;
1731 		case '=':	return PREC;
1732 		case '{':	return LCURLY;
1733 		default:	reserve = 1;
1734 		}
1735 
1736 	default:
1737 		/* number */
1738 		if(isdigit(c)) {
1739 			numbval = c-'0';
1740 			base = (c=='0')? 8: 10;
1741 			for(c = Bgetrune(finput); isdigit(c); c = Bgetrune(finput))
1742 				numbval = numbval*base + (c-'0');
1743 			Bungetrune(finput);
1744 			return NUMBER;
1745 		}
1746 		if(islower(c) || isupper(c) || c=='_' || c=='.' || c=='$')  {
1747 			i = 0;
1748 			while(islower(c) || isupper(c) || isdigit(c) ||
1749 			    c == '-' || c=='_' || c=='.' || c=='$') {
1750 				if(reserve && isupper(c))
1751 					c += 'a'-'A';
1752 				rune = c;
1753 				c = runetochar(&tokname[i], &rune);
1754 				if(i < NAMESIZE)
1755 					i += c;
1756 				c = Bgetrune(finput);
1757 			}
1758 		} else
1759 			return c;
1760 		Bungetrune(finput);
1761 	}
1762 	tokname[i] = 0;
1763 
1764 	/* find a reserved word */
1765 	if(reserve) {
1766 		for(c=0; resrv[c].name; c++)
1767 			if(strcmp(tokname, resrv[c].name) == 0)
1768 				return resrv[c].value;
1769 		error("invalid escape, or illegal reserved word: %s", tokname);
1770 	}
1771 
1772 	/* look ahead to distinguish IDENTIFIER from IDENTCOLON */
1773 	c = Bgetrune(finput);
1774 	while(c == ' ' || c == '\t'|| c == '\n' || c == '\f' || c == '/') {
1775 		if(c == '\n')
1776 			peekline++;
1777 		/* look for comments */
1778 		if(c == '/')
1779 			peekline += skipcom();
1780 		c = Bgetrune(finput);
1781 	}
1782 	if(c == ':')
1783 		return IDENTCOLON;
1784 	Bungetrune(finput);
1785 	return IDENTIFIER;
1786 }
1787 
1788 /*
1789  * determine the type of a symbol
1790  */
1791 int
1792 fdtype(int t)
1793 {
1794 	int v;
1795 
1796 	if(t >= NTBASE)
1797 		v = nontrst[t-NTBASE].value;
1798 	else
1799 		v = TYPE(toklev[t]);
1800 	if(v <= 0)
1801 		error("must specify type for %s", (t>=NTBASE)?
1802 			nontrst[t-NTBASE].name: tokset[t].name);
1803 	return v;
1804 }
1805 
1806 int
1807 chfind(int t, char *s)
1808 {
1809 	int i;
1810 
1811 	if(s[0] == ' ')
1812 		t = 0;
1813 	TLOOP(i)
1814 		if(!strcmp(s, tokset[i].name))
1815 			return i;
1816 	NTLOOP(i)
1817 		if(!strcmp(s, nontrst[i].name))
1818 			return NTBASE+i;
1819 
1820 	/* cannot find name */
1821 	if(t > 1)
1822 		error("%s should have been defined earlier", s);
1823 	return defin(t, s);
1824 }
1825 
1826 /*
1827  * copy the union declaration to the output, and the define file if present
1828  */
1829 void
1830 cpyunion(void)
1831 {
1832 	long c;
1833 	int level;
1834 
1835 	Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1836 	Bprint(ftable, "typedef union ");
1837 	if(fdefine != 0)
1838 		Bprint(fdefine, "\ntypedef union ");
1839 
1840 	level = 0;
1841 	for(;;) {
1842 		if((c=Bgetrune(finput)) == Beof)
1843 			error("EOF encountered while processing %%union");
1844 		Bputrune(ftable, c);
1845 		if(fdefine != 0)
1846 			Bputrune(fdefine, c);
1847 		switch(c) {
1848 		case '\n':
1849 			lineno++;
1850 			break;
1851 		case '{':
1852 			level++;
1853 			break;
1854 		case '}':
1855 			level--;
1856 
1857 			/* we are finished copying */
1858 			if(level == 0) {
1859 				Bprint(ftable, " YYSTYPE;\n");
1860 				if(fdefine != 0)
1861 					Bprint(fdefine, "\tYYSTYPE;\nextern\tYYSTYPE\tyylval;\n");
1862 				return;
1863 			}
1864 		}
1865 	}
1866 }
1867 
1868 /*
1869  * copies code between \{ and \}
1870  */
1871 void
1872 cpycode(void)
1873 {
1874 
1875 	long c;
1876 
1877 	c = Bgetrune(finput);
1878 	if(c == '\n') {
1879 		c = Bgetrune(finput);
1880 		lineno++;
1881 	}
1882 	Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1883 	while(c != Beof) {
1884 		if(c == '\\') {
1885 			if((c=Bgetrune(finput)) == '}')
1886 				return;
1887 			Bputc(ftable, '\\');
1888 		}
1889 		if(c == '%') {
1890 			if((c=Bgetrune(finput)) == '}')
1891 				return;
1892 			Bputc(ftable, '%');
1893 		}
1894 		Bputrune(ftable, c);
1895 		if(c == '\n')
1896 			lineno++;
1897 		c = Bgetrune(finput);
1898 	}
1899 	error("eof before %%}");
1900 }
1901 
1902 /*
1903  * skip over comments
1904  * skipcom is called after reading a '/'
1905  */
1906 int
1907 skipcom(void)
1908 {
1909 	long c;
1910 	int i;
1911 
1912 	/* i is the number of lines skipped */
1913 	i = 0;
1914 	if(Bgetrune(finput) != '*')
1915 		error("illegal comment");
1916 	c = Bgetrune(finput);
1917 	while(c != Beof) {
1918 		while(c == '*')
1919 			if((c=Bgetrune(finput)) == '/')
1920 				return i;
1921 		if(c == '\n')
1922 			i++;
1923 		c = Bgetrune(finput);
1924 	}
1925 	error("EOF inside comment");
1926 	return 0;
1927 }
1928 
1929 /*
1930  * copy C action to the next ; or closing }
1931  */
1932 void
1933 cpyact(int offset)
1934 {
1935 	long c;
1936 	int brac, match, j, s, fnd, tok;
1937 
1938 	Bprint(faction, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1939 	brac = 0;
1940 
1941 loop:
1942 	c = Bgetrune(finput);
1943 swt:
1944 	switch(c) {
1945 	case ';':
1946 		if(brac == 0) {
1947 			Bputrune(faction, c);
1948 			return;
1949 		}
1950 		goto lcopy;
1951 
1952 	case '{':
1953 		brac++;
1954 		goto lcopy;
1955 
1956 	case '$':
1957 		s = 1;
1958 		tok = -1;
1959 		c = Bgetrune(finput);
1960 
1961 		/* type description */
1962 		if(c == '<') {
1963 			Bungetrune(finput);
1964 			if(gettok() != TYPENAME)
1965 				error("bad syntax on $<ident> clause");
1966 			tok = numbval;
1967 			c = Bgetrune(finput);
1968 		}
1969 		if(c == '$') {
1970 			Bprint(faction, "yyval");
1971 
1972 			/* put out the proper tag... */
1973 			if(ntypes) {
1974 				if(tok < 0)
1975 					tok = fdtype(*prdptr[nprod]);
1976 				Bprint(faction, ".%s", typeset[tok]);
1977 			}
1978 			goto loop;
1979 		}
1980 		if(c == '-') {
1981 			s = -s;
1982 			c = Bgetrune(finput);
1983 		}
1984 		if(isdigit(c)) {
1985 			j = 0;
1986 			while(isdigit(c)) {
1987 				j = j*10 + (c-'0');
1988 				c = Bgetrune(finput);
1989 			}
1990 			Bungetrune(finput);
1991 			j = j*s - offset;
1992 			if(j > 0)
1993 				error("Illegal use of $%d", j+offset);
1994 
1995 		dollar:
1996 			Bprint(faction, "yypt[-%d].yyv", -j);
1997 
1998 			/* put out the proper tag */
1999 			if(ntypes) {
2000 				if(j+offset <= 0 && tok < 0)
2001 					error("must specify type of $%d", j+offset);
2002 				if(tok < 0)
2003 					tok = fdtype(prdptr[nprod][j+offset]);
2004 				Bprint(faction, ".%s", typeset[tok]);
2005 			}
2006 			goto loop;
2007 		}
2008 		if(isupper(c) || islower(c) || c == '_' || c == '.') {
2009 			int tok; /* tok used oustide for type info */
2010 
2011 			/* look for $name */
2012 			Bungetrune(finput);
2013 			if(gettok() != IDENTIFIER)
2014 				error("$ must be followed by an identifier");
2015 			tok = chfind(2, tokname);
2016 			if((c = Bgetrune(finput)) != '#') {
2017 				Bungetrune(finput);
2018 				fnd = -1;
2019 			} else
2020 				if(gettok() != NUMBER) {
2021 					error("# must be followed by number");
2022 					fnd = -1;
2023 				} else
2024 					fnd = numbval;
2025 			for(j=1; j<=offset; ++j)
2026 				if(tok == prdptr[nprod][j]) {
2027 					if(--fnd <= 0) {
2028 						j -= offset;
2029 						goto dollar;
2030 					}
2031 				}
2032 			error("$name or $name#number not found");
2033 		}
2034 		Bputc(faction, '$');
2035 		if(s < 0 )
2036 			Bputc(faction, '-');
2037 		goto swt;
2038 
2039 	case '}':
2040 		brac--;
2041 		if(brac)
2042 			goto lcopy;
2043 		Bputrune(faction, c);
2044 		return;
2045 
2046 	case '/':
2047 		/* look for comments */
2048 		Bputrune(faction, c);
2049 		c = Bgetrune(finput);
2050 		if(c != '*')
2051 			goto swt;
2052 
2053 		/* it really is a comment */
2054 		Bputrune(faction, c);
2055 		c = Bgetrune(finput);
2056 		while(c >= 0) {
2057 			while(c == '*') {
2058 				Bputrune(faction, c);
2059 				if((c=Bgetrune(finput)) == '/')
2060 					goto lcopy;
2061 			}
2062 			Bputrune(faction, c);
2063 			if(c == '\n')
2064 				lineno++;
2065 			c = Bgetrune(finput);
2066 		}
2067 		error("EOF inside comment");
2068 
2069 	case '\'':
2070 		/* character constant */
2071 		match = '\'';
2072 		goto string;
2073 
2074 	case '"':
2075 		/* character string */
2076 		match = '"';
2077 
2078 	string:
2079 		Bputrune(faction, c);
2080 		while(c = Bgetrune(finput)) {
2081 			if(c == '\\') {
2082 				Bputrune(faction, c);
2083 				c = Bgetrune(finput);
2084 				if(c == '\n')
2085 					lineno++;
2086 			} else
2087 				if(c == match)
2088 					goto lcopy;
2089 				if(c == '\n')
2090 					error("newline in string or char. const.");
2091 			Bputrune(faction, c);
2092 		}
2093 		error("EOF in string or character constant");
2094 
2095 	case Beof:
2096 		error("action does not terminate");
2097 
2098 	case '\n':
2099 		lineno++;
2100 		goto lcopy;
2101 	}
2102 
2103 lcopy:
2104 	Bputrune(faction, c);
2105 	goto loop;
2106 }
2107 
2108 void
2109 openup(char *stem, int dflag, int vflag, int ytab, char *ytabc)
2110 {
2111 	char buf[256];
2112 
2113 	if(vflag) {
2114 		snprint(buf, sizeof buf, "%s.%s", stem, FILEU);
2115 		foutput = Bopen(buf, OWRITE);
2116 		if(foutput == 0)
2117 			error("cannot open %s", buf);
2118 	}
2119 	if(yydebug) {
2120 		snprint(buf, sizeof buf, "%s.%s", stem, FILEDEBUG);
2121 		if((fdebug = Bopen(buf, OWRITE)) == 0)
2122 			error("can't open %s", buf);
2123 	}
2124 	if(dflag) {
2125 		snprint(buf, sizeof buf, "%s.%s", stem, FILED);
2126 		fdefine = Bopen(buf, OWRITE);
2127 		if(fdefine == 0)
2128 			error("can't create %s", buf);
2129 	}
2130 	if(ytab == 0)
2131 		snprint(buf, sizeof buf, "%s.%s", stem, OFILE);
2132 	else
2133 		strecpy(buf, buf+sizeof buf, ytabc);
2134 	ftable = Bopen(buf, OWRITE);
2135 	if(ftable == 0)
2136 		error("cannot open table file %s", buf);
2137 }
2138 
2139 /*
2140  * print the output for the states
2141  */
2142 void
2143 output(void)
2144 {
2145 	int i, k, c;
2146 	Wset *u, *v;
2147 
2148 	Bprint(ftable, "short	yyexca[] =\n{");
2149 	if(fdebug)
2150 		Bprint(fdebug, "char*	yystates[] =\n{\n");
2151 
2152 	/* output the stuff for state i */
2153 	SLOOP(i) {
2154 		nolook = tystate[i]!=MUSTLOOKAHEAD;
2155 		closure(i);
2156 
2157 		/* output actions */
2158 		nolook = 1;
2159 		aryfil(temp1, ntokens+nnonter+1, 0);
2160 		WSLOOP(wsets, u) {
2161 			c = *(u->pitem);
2162 			if(c > 1 && c < NTBASE && temp1[c] == 0) {
2163 				WSLOOP(u, v)
2164 					if(c == *(v->pitem))
2165 						putitem(v->pitem+1, (Lkset*)0);
2166 				temp1[c] = state(c);
2167 			} else
2168 				if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0)
2169 					temp1[c+ntokens] = amem[indgo[i]+c];
2170 		}
2171 		if(i == 1)
2172 			temp1[1] = ACCEPTCODE;
2173 
2174 		/* now, we have the shifts; look at the reductions */
2175 		lastred = 0;
2176 		WSLOOP(wsets, u) {
2177 			c = *u->pitem;
2178 
2179 			/* reduction */
2180 			if(c <= 0) {
2181 				lastred = -c;
2182 				TLOOP(k)
2183 					if(BIT(u->ws.lset, k)) {
2184 						if(temp1[k] == 0)
2185 							temp1[k] = c;
2186 						else
2187 						if(temp1[k] < 0) { /* reduce/reduce conflict */
2188 							if(foutput)
2189 								Bprint(foutput,
2190 									"\n%d: reduce/reduce conflict"
2191 									" (red'ns %d and %d ) on %s",
2192 									i, -temp1[k], lastred,
2193 									symnam(k));
2194 							if(-temp1[k] > lastred)
2195 								temp1[k] = -lastred;
2196 							zzrrconf++;
2197 						} else
2198 							/* potential shift/reduce conflict */
2199 							precftn( lastred, k, i );
2200 					}
2201 				}
2202 		}
2203 		wract(i);
2204 	}
2205 
2206 	if(fdebug)
2207 		Bprint(fdebug, "};\n");
2208 	Bprint(ftable, "};\n");
2209 	Bprint(ftable, "#define	YYNPROD	%d\n", nprod);
2210 	Bprint(ftable, "#define	YYPRIVATE %d\n", PRIVATE);
2211 	if(yydebug)
2212 		Bprint(ftable, "#define	yydebug	%s\n", yydebug);
2213 }
2214 
2215 /*
2216  * pack state i from temp1 into amem
2217  */
2218 int
2219 apack(int *p, int n)
2220 {
2221 	int *pp, *qq, *rr, off, *q, *r;
2222 
2223 	/* we don't need to worry about checking because
2224 	 * we will only look at entries known to be there...
2225 	 * eliminate leading and trailing 0's
2226 	 */
2227 
2228 	q = p+n;
2229 	for(pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off)
2230 		;
2231  	/* no actions */
2232 	if(pp > q)
2233 		return 0;
2234 	p = pp;
2235 
2236 	/* now, find a place for the elements from p to q, inclusive */
2237 	r = &amem[ACTSIZE-1];
2238 	for(rr = amem; rr <= r; rr++, off++) {
2239 		for(qq = rr, pp = p; pp <= q; pp++, qq++)
2240 			if(*pp != 0)
2241 				if(*pp != *qq && *qq != 0)
2242 					goto nextk;
2243 
2244 		/* we have found an acceptable k */
2245 		if(pkdebug && foutput != 0)
2246 			Bprint(foutput, "off = %d, k = %d\n", off, (int)(rr-amem));
2247 		for(qq = rr, pp = p; pp <= q; pp++, qq++)
2248 			if(*pp) {
2249 				if(qq > r)
2250 					error("action table overflow");
2251 				if(qq > memp)
2252 					memp = qq;
2253 				*qq = *pp;
2254 			}
2255 		if(pkdebug && foutput != 0)
2256 			for(pp = amem; pp <= memp; pp += 10) {
2257 				Bprint(foutput, "\t");
2258 				for(qq = pp; qq <= pp+9; qq++)
2259 					Bprint(foutput, "%d ", *qq);
2260 				Bprint(foutput, "\n");
2261 			}
2262 		return(off);
2263 	nextk:;
2264 	}
2265 	error("no space in action table");
2266 	return 0;
2267 }
2268 
2269 /*
2270  * output the gotos for the nontermninals
2271  */
2272 void
2273 go2out(void)
2274 {
2275 	int i, j, k, best, count, cbest, times;
2276 
2277 	/* mark begining of gotos */
2278 	Bprint(ftemp, "$\n");
2279 	for(i = 1; i <= nnonter; i++) {
2280 		go2gen(i);
2281 
2282 		/* find the best one to make default */
2283 		best = -1;
2284 		times = 0;
2285 
2286 		/* is j the most frequent */
2287 		for(j = 0; j <= nstate; j++) {
2288 			if(tystate[j] == 0)
2289 				continue;
2290 			if(tystate[j] == best)
2291 				continue;
2292 
2293 			/* is tystate[j] the most frequent */
2294 			count = 0;
2295 			cbest = tystate[j];
2296 			for(k = j; k <= nstate; k++)
2297 				if(tystate[k] == cbest)
2298 					count++;
2299 			if(count > times) {
2300 				best = cbest;
2301 				times = count;
2302 			}
2303 		}
2304 
2305 		/* best is now the default entry */
2306 		zzgobest += times-1;
2307 		for(j = 0; j <= nstate; j++)
2308 			if(tystate[j] != 0 && tystate[j] != best) {
2309 				Bprint(ftemp, "%d,%d,", j, tystate[j]);
2310 				zzgoent++;
2311 			}
2312 
2313 		/* now, the default */
2314 		if(best == -1)
2315 			best = 0;
2316 		zzgoent++;
2317 		Bprint(ftemp, "%d\n", best);
2318 	}
2319 }
2320 
2321 /*
2322  * output the gotos for nonterminal c
2323  */
2324 void
2325 go2gen(int c)
2326 {
2327 	int i, work, cc;
2328 	Item *p, *q;
2329 
2330 
2331 	/* first, find nonterminals with gotos on c */
2332 	aryfil(temp1, nnonter+1, 0);
2333 	temp1[c] = 1;
2334 	work = 1;
2335 	while(work) {
2336 		work = 0;
2337 		PLOOP(0, i)
2338 
2339 			/* cc is a nonterminal */
2340 			if((cc=prdptr[i][1]-NTBASE) >= 0)
2341 				/* cc has a goto on c */
2342 				if(temp1[cc] != 0) {
2343 
2344 					/* thus, the left side of production i does too */
2345 					cc = *prdptr[i]-NTBASE;
2346 					if(temp1[cc] == 0) {
2347 						  work = 1;
2348 						  temp1[cc] = 1;
2349 					}
2350 				}
2351 	}
2352 
2353 	/* now, we have temp1[c] = 1 if a goto on c in closure of cc */
2354 	if(g2debug && foutput != 0) {
2355 		Bprint(foutput, "%s: gotos on ", nontrst[c].name);
2356 		NTLOOP(i)
2357 			if(temp1[i])
2358 				Bprint(foutput, "%s ", nontrst[i].name);
2359 		Bprint(foutput, "\n");
2360 	}
2361 
2362 	/* now, go through and put gotos into tystate */
2363 	aryfil(tystate, nstate, 0);
2364 	SLOOP(i)
2365 		ITMLOOP(i, p, q)
2366 			if((cc = *p->pitem) >= NTBASE)
2367 				/* goto on c is possible */
2368 				if(temp1[cc-NTBASE]) {
2369 					tystate[i] = amem[indgo[i]+c];
2370 					break;
2371 				}
2372 }
2373 
2374 /*
2375  * decide a shift/reduce conflict by precedence.
2376  * r is a rule number, t a token number
2377  * the conflict is in state s
2378  * temp1[t] is changed to reflect the action
2379  */
2380 void
2381 precftn(int r, int t, int s)
2382 {
2383 	int lp, lt, action;
2384 
2385 	lp = levprd[r];
2386 	lt = toklev[t];
2387 	if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) {
2388 
2389 		/* conflict */
2390 		if(foutput != 0)
2391 			Bprint(foutput,
2392 				"\n%d: shift/reduce conflict (shift %d(%d), red'n %d(%d)) on %s",
2393 				s, temp1[t], PLEVEL(lt), r, PLEVEL(lp), symnam(t));
2394 		zzsrconf++;
2395 		return;
2396 	}
2397 	if(PLEVEL(lt) == PLEVEL(lp))
2398 		action = ASSOC(lt);
2399 	else
2400 		if(PLEVEL(lt) > PLEVEL(lp))
2401 			action = RASC;  /* shift */
2402 		else
2403 			action = LASC;  /* reduce */
2404 	switch(action) {
2405 	case BASC:  /* error action */
2406 		temp1[t] = ERRCODE;
2407 		break;
2408 	case LASC:  /* reduce */
2409 		temp1[t] = -r;
2410 		break;
2411 	}
2412 }
2413 
2414 /*
2415  * output state i
2416  * temp1 has the actions, lastred the default
2417  */
2418 void
2419 wract(int i)
2420 {
2421 	int p, p0, p1, ntimes, tred, count, j, flag;
2422 
2423 	/* find the best choice for lastred */
2424 	lastred = 0;
2425 	ntimes = 0;
2426 	TLOOP(j) {
2427 		if(temp1[j] >= 0)
2428 			continue;
2429 		if(temp1[j]+lastred == 0)
2430 			continue;
2431 		/* count the number of appearances of temp1[j] */
2432 		count = 0;
2433 		tred = -temp1[j];
2434 		levprd[tred] |= REDFLAG;
2435 		TLOOP(p)
2436 			if(temp1[p]+tred == 0)
2437 				count++;
2438 		if(count > ntimes) {
2439 			lastred = tred;
2440 			ntimes = count;
2441 		}
2442 	}
2443 
2444 	/*
2445 	 * for error recovery, arrange that, if there is a shift on the
2446 	 * error recovery token, `error', that the default be the error action
2447 	 */
2448 	if(temp1[2] > 0)
2449 		lastred = 0;
2450 
2451 	/* clear out entries in temp1 which equal lastred */
2452 	TLOOP(p)
2453 		if(temp1[p]+lastred == 0)
2454 			temp1[p] = 0;
2455 
2456 	wrstate(i);
2457 	defact[i] = lastred;
2458 	flag = 0;
2459 	TLOOP(p0)
2460 		if((p1=temp1[p0]) != 0) {
2461 			if(p1 < 0) {
2462 				p1 = -p1;
2463 				goto exc;
2464 			}
2465 			if(p1 == ACCEPTCODE) {
2466 				p1 = -1;
2467 				goto exc;
2468 			}
2469 			if(p1 == ERRCODE) {
2470 				p1 = 0;
2471 			exc:
2472 				if(flag++ == 0)
2473 					Bprint(ftable, "-1, %d,\n", i);
2474 				Bprint(ftable, "\t%d, %d,\n", p0, p1);
2475 				zzexcp++;
2476 				continue;
2477 			}
2478 			Bprint(ftemp, "%d,%d,", p0, p1);
2479 			zzacent++;
2480 		}
2481 	if(flag) {
2482 		defact[i] = -2;
2483 		Bprint(ftable, "\t-2, %d,\n", lastred);
2484 	}
2485 	Bprint(ftemp, "\n");
2486 }
2487 
2488 /*
2489  * writes state i
2490  */
2491 void
2492 wrstate(int i)
2493 {
2494 	int j0, j1;
2495 	Item *pp, *qq;
2496 	Wset *u;
2497 
2498 	if(fdebug) {
2499 		if(lastred) {
2500 			Bprint(fdebug, "	0, /*%d*/\n", i);
2501 		} else {
2502 			Bprint(fdebug, "	\"");
2503 			ITMLOOP(i, pp, qq)
2504 				Bprint(fdebug, "%s\\n", writem(pp->pitem));
2505 			if(tystate[i] == MUSTLOOKAHEAD)
2506 				WSLOOP(wsets + (pstate[i+1] - pstate[i]), u)
2507 					if(*u->pitem < 0)
2508 						Bprint(fdebug, "%s\\n", writem(u->pitem));
2509 			Bprint(fdebug, "\", /*%d*/\n", i);
2510 		}
2511 	}
2512 	if(foutput == 0)
2513 		return;
2514 	Bprint(foutput, "\nstate %d\n", i);
2515 	ITMLOOP(i, pp, qq)
2516 		Bprint(foutput, "\t%s\n", writem(pp->pitem));
2517 	if(tystate[i] == MUSTLOOKAHEAD)
2518 		/* print out empty productions in closure */
2519 		WSLOOP(wsets+(pstate[i+1]-pstate[i]), u)
2520 			if(*u->pitem < 0)
2521 				Bprint(foutput, "\t%s\n", writem(u->pitem));
2522 
2523 	/* check for state equal to another */
2524 	TLOOP(j0)
2525 		if((j1=temp1[j0]) != 0) {
2526 			Bprint(foutput, "\n\t%s  ", symnam(j0));
2527 			/* shift, error, or accept */
2528 			if(j1 > 0) {
2529 				if(j1 == ACCEPTCODE)
2530 					Bprint(foutput,  "accept");
2531 				else
2532 					if(j1 == ERRCODE)
2533 						Bprint(foutput, "error");
2534 					else
2535 						Bprint(foutput, "shift %d", j1);
2536 			} else
2537 				Bprint(foutput, "reduce %d (src line %d)", -j1, rlines[-j1]);
2538 		}
2539 
2540 	/* output the final production */
2541 	if(lastred)
2542 		Bprint(foutput, "\n\t.  reduce %d (src line %d)\n\n",
2543 			lastred, rlines[lastred]);
2544 	else
2545 		Bprint(foutput, "\n\t.  error\n\n");
2546 
2547 	/* now, output nonterminal actions */
2548 	j1 = ntokens;
2549 	for(j0 = 1; j0 <= nnonter; j0++) {
2550 		j1++;
2551 		if(temp1[j1])
2552 			Bprint(foutput, "\t%s  goto %d\n", symnam(j0+NTBASE), temp1[j1]);
2553 	}
2554 }
2555 
2556 void
2557 warray(char *s, int *v, int n)
2558 {
2559 	int i;
2560 
2561 	Bprint(ftable, "short	%s[] =\n{", s);
2562 	for(i=0;;) {
2563 		if(i%10 == 0)
2564 			Bprint(ftable, "\n");
2565 		Bprint(ftable, "%4d", v[i]);
2566 		i++;
2567 		if(i >= n) {
2568 			Bprint(ftable, "\n};\n");
2569 			break;
2570 		}
2571 		Bprint(ftable, ",");
2572 	}
2573 }
2574 
2575 /*
2576  * in order to free up the mem and amem arrays for the optimizer,
2577  * and still be able to output yyr1, etc., after the sizes of
2578  * the action array is known, we hide the nonterminals
2579  * derived by productions in levprd.
2580  */
2581 
2582 void
2583 hideprod(void)
2584 {
2585 	int i, j;
2586 
2587 	j = 0;
2588 	levprd[0] = 0;
2589 	PLOOP(1, i) {
2590 		if(!(levprd[i] & REDFLAG)) {
2591 			j++;
2592 			if(foutput != 0)
2593 				Bprint(foutput, "Rule not reduced:   %s\n", writem(prdptr[i]));
2594 		}
2595 		levprd[i] = *prdptr[i] - NTBASE;
2596 	}
2597 	if(j)
2598 		print("%d rules never reduced\n", j);
2599 }
2600 
2601 void
2602 callopt(void)
2603 {
2604 	int i, *p, j, k, *q;
2605 
2606 	/* read the arrays from tempfile and set parameters */
2607 	finput = Bopen(tempname, OREAD);
2608 	if(finput == 0)
2609 		error("optimizer cannot open tempfile");
2610 
2611 	pgo[0] = 0;
2612 	temp1[0] = 0;
2613 	nstate = 0;
2614 	nnonter = 0;
2615 	for(;;) {
2616 		switch(gtnm()) {
2617 		case '\n':
2618 			nstate++;
2619 			pmem--;
2620 			temp1[nstate] = pmem - mem0;
2621 		case ',':
2622 			continue;
2623 		case '$':
2624 			break;
2625 		default:
2626 			error("bad tempfile");
2627 		}
2628 		break;
2629 	}
2630 
2631 	pmem--;
2632 	temp1[nstate] = yypgo[0] = pmem - mem0;
2633 	for(;;) {
2634 		switch(gtnm()) {
2635 		case '\n':
2636 			nnonter++;
2637 			yypgo[nnonter] = pmem-mem0;
2638 		case ',':
2639 			continue;
2640 		case -1:
2641 			break;
2642 		default:
2643 			error("bad tempfile");
2644 		}
2645 		break;
2646 	}
2647 	pmem--;
2648 	yypgo[nnonter--] = pmem - mem0;
2649 	for(i = 0; i < nstate; i++) {
2650 		k = 32000;
2651 		j = 0;
2652 		q = mem0 + temp1[i+1];
2653 		for(p = mem0 + temp1[i]; p < q ; p += 2) {
2654 			if(*p > j)
2655 				j = *p;
2656 			if(*p < k)
2657 				k = *p;
2658 		}
2659 		/* nontrivial situation */
2660 		if(k <= j) {
2661 			/* j is now the range */
2662 /*			j -= k;			/* call scj */
2663 			if(k > maxoff)
2664 				maxoff = k;
2665 		}
2666 		tystate[i] = (temp1[i+1]-temp1[i]) + 2*j;
2667 		if(j > maxspr)
2668 			maxspr = j;
2669 	}
2670 
2671 	/* initialize ggreed table */
2672 	for(i = 1; i <= nnonter; i++) {
2673 		ggreed[i] = 1;
2674 		j = 0;
2675 
2676 		/* minimum entry index is always 0 */
2677 		q = mem0 + yypgo[i+1] - 1;
2678 		for(p = mem0+yypgo[i]; p < q ; p += 2) {
2679 			ggreed[i] += 2;
2680 			if(*p > j)
2681 				j = *p;
2682 		}
2683 		ggreed[i] = ggreed[i] + 2*j;
2684 		if(j > maxoff)
2685 			maxoff = j;
2686 	}
2687 
2688 	/* now, prepare to put the shift actions into the amem array */
2689 	for(i = 0; i < ACTSIZE; i++)
2690 		amem[i] = 0;
2691 	maxa = amem;
2692 	for(i = 0; i < nstate; i++) {
2693 		if(tystate[i] == 0 && adb > 1)
2694 			Bprint(ftable, "State %d: null\n", i);
2695 		indgo[i] = YYFLAG1;
2696 	}
2697 	while((i = nxti()) != NOMORE)
2698 		if(i >= 0)
2699 			stin(i);
2700 		else
2701 			gin(-i);
2702 
2703 	/* print amem array */
2704 	if(adb > 2 )
2705 		for(p = amem; p <= maxa; p += 10) {
2706 			Bprint(ftable, "%4d  ", (int)(p-amem));
2707 			for(i = 0; i < 10; ++i)
2708 				Bprint(ftable, "%4d  ", p[i]);
2709 			Bprint(ftable, "\n");
2710 		}
2711 
2712 	/* write out the output appropriate to the language */
2713 	aoutput();
2714 	osummary();
2715 	ZAPFILE(tempname);
2716 }
2717 
2718 void
2719 gin(int i)
2720 {
2721 	int *p, *r, *s, *q1, *q2;
2722 
2723 	/* enter gotos on nonterminal i into array amem */
2724 	ggreed[i] = 0;
2725 
2726 	q2 = mem0+ yypgo[i+1] - 1;
2727 	q1 = mem0 + yypgo[i];
2728 
2729 	/* now, find amem place for it */
2730 	for(p = amem; p < &amem[ACTSIZE]; p++) {
2731 		if(*p)
2732 			continue;
2733 		for(r = q1; r < q2; r += 2) {
2734 			s = p + *r + 1;
2735 			if(*s)
2736 				goto nextgp;
2737 			if(s > maxa)
2738 				if((maxa = s) > &amem[ACTSIZE])
2739 					error("a array overflow");
2740 		}
2741 		/* we have found amem spot */
2742 		*p = *q2;
2743 		if(p > maxa)
2744 			if((maxa = p) > &amem[ACTSIZE])
2745 				error("a array overflow");
2746 		for(r = q1; r < q2; r += 2) {
2747 			s = p + *r + 1;
2748 			*s = r[1];
2749 		}
2750 		pgo[i] = p-amem;
2751 		if(adb > 1)
2752 			Bprint(ftable, "Nonterminal %d, entry at %d\n", i, pgo[i]);
2753 		return;
2754 
2755 	nextgp:;
2756 	}
2757 	error("cannot place goto %d\n", i);
2758 }
2759 
2760 void
2761 stin(int i)
2762 {
2763 	int *r, *s, n, flag, j, *q1, *q2;
2764 
2765 	tystate[i] = 0;
2766 
2767 	/* enter state i into the amem array */
2768 	q2 = mem0+temp1[i+1];
2769 	q1 = mem0+temp1[i];
2770 	/* find an acceptable place */
2771 	for(n = -maxoff; n < ACTSIZE; n++) {
2772 		flag = 0;
2773 		for(r = q1; r < q2; r += 2) {
2774 			if((s = *r + n + amem) < amem)
2775 				goto nextn;
2776 			if(*s == 0)
2777 				flag++;
2778 			else
2779 				if(*s != r[1])
2780 					goto nextn;
2781 		}
2782 
2783 		/* check that the position equals another only if the states are identical */
2784 		for(j=0; j<nstate; j++) {
2785 			if(indgo[j] == n) {
2786 
2787 				/* we have some disagreement */
2788 				if(flag)
2789 					goto nextn;
2790 				if(temp1[j+1]+temp1[i] == temp1[j]+temp1[i+1]) {
2791 
2792 					/* states are equal */
2793 					indgo[i] = n;
2794 					if(adb > 1)
2795 						Bprint(ftable,
2796 						"State %d: entry at %d equals state %d\n",
2797 						i, n, j);
2798 					return;
2799 				}
2800 
2801 				/* we have some disagreement */
2802 				goto nextn;
2803 			}
2804 		}
2805 
2806 		for(r = q1; r < q2; r += 2) {
2807 			if((s = *r+n+amem) >= &amem[ACTSIZE])
2808 				error("out of space in optimizer a array");
2809 			if(s > maxa)
2810 				maxa = s;
2811 			if(*s != 0 && *s != r[1])
2812 				error("clobber of a array, pos'n %d, by %d", s-amem, r[1]);
2813 			*s = r[1];
2814 		}
2815 		indgo[i] = n;
2816 		if(adb > 1)
2817 			Bprint(ftable, "State %d: entry at %d\n", i, indgo[i]);
2818 		return;
2819 	nextn:;
2820 	}
2821 	error("Error; failure to place state %d\n", i);
2822 }
2823 
2824 /*
2825  * finds the next i
2826  */
2827 int
2828 nxti(void)
2829 {
2830 	int i, max, maxi;
2831 
2832 	max = 0;
2833 	maxi = 0;
2834 	for(i = 1; i <= nnonter; i++)
2835 		if(ggreed[i] >= max) {
2836 			max = ggreed[i];
2837 			maxi = -i;
2838 		}
2839 	for(i = 0; i < nstate; ++i)
2840 		if(tystate[i] >= max) {
2841 			max = tystate[i];
2842 			maxi = i;
2843 		}
2844 	if(nxdb)
2845 		Bprint(ftable, "nxti = %d, max = %d\n", maxi, max);
2846 	if(max == 0)
2847 		return NOMORE;
2848 	return maxi;
2849 }
2850 
2851 /*
2852  * write summary
2853  */
2854 void
2855 osummary(void)
2856 {
2857 
2858 	int i, *p;
2859 
2860 	if(foutput == 0)
2861 		return;
2862 	i = 0;
2863 	for(p = maxa; p >= amem; p--)
2864 		if(*p == 0)
2865 			i++;
2866 
2867 	Bprint(foutput, "Optimizer space used: input %d/%d, output %d/%d\n",
2868 		(int)(pmem-mem0+1), MEMSIZE, (int)(maxa-amem+1), ACTSIZE);
2869 	Bprint(foutput, "%d table entries, %d zero\n", (int)(maxa-amem+1), i);
2870 	Bprint(foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff);
2871 }
2872 
2873 /*
2874  * this version is for C
2875  * write out the optimized parser
2876  */
2877 void
2878 aoutput(void)
2879 {
2880 	Bprint(ftable, "#define\tYYLAST\t%d\n", (int)(maxa-amem+1));
2881 	arout("yyact", amem, (maxa-amem)+1);
2882 	arout("yypact", indgo, nstate);
2883 	arout("yypgo", pgo, nnonter+1);
2884 }
2885 
2886 void
2887 arout(char *s, int *v, int n)
2888 {
2889 	int i;
2890 
2891 	Bprint(ftable, "short	%s[] =\n{", s);
2892 	for(i = 0; i < n;) {
2893 		if(i%10 == 0)
2894 			Bprint(ftable, "\n");
2895 		Bprint(ftable, "%4d", v[i]);
2896 		i++;
2897 		if(i == n)
2898 			Bprint(ftable, "\n};\n");
2899 		else
2900 			Bprint(ftable, ",");
2901 	}
2902 }
2903 
2904 /*
2905  * read and convert an integer from the standard input
2906  * return the terminating character
2907  * blanks, tabs, and newlines are ignored
2908  */
2909 int
2910 gtnm(void)
2911 {
2912 	int sign, val, c;
2913 
2914 	sign = 0;
2915 	val = 0;
2916 	while((c=Bgetrune(finput)) != Beof) {
2917 		if(isdigit(c)) {
2918 			val = val*10 + c-'0';
2919 			continue;
2920 		}
2921 		if(c == '-') {
2922 			sign = 1;
2923 			continue;
2924 		}
2925 		break;
2926 	}
2927 	if(sign)
2928 		val = -val;
2929 	*pmem++ = val;
2930 	if(pmem >= &mem0[MEMSIZE])
2931 		error("out of space");
2932 	return c;
2933 }
2934