xref: /plan9-contrib/sys/src/cmd/yacc.c (revision e612cfbde65d90a6d1f45b9c8a893c248b3fa24d)
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
5 
6 #define	Bungetrune	Bungetc		/* ok for now. */
7 
8 /*
9  * all these are 32 bit
10  */
11 #define TBITSET		((32+NTERMS)/32)	/* BOTCH?? +31 */
12 #define BIT(a,i)	((a)[(i)>>5] & (1<<((i)&037)))
13 #define SETBIT(a,i)	((a)[(i)>>5] |= (1<<((i)&037)))
14 #define NWORDS(n)	(((n)+32)/32)
15 
16 #define PARSER		"/sys/lib/yaccpar"
17 #define PARSERS		"/sys/lib/yaccpars"
18 #define TEMPNAME	"y.tmp.XXXXXX"
19 #define ACTNAME		"y.acts.XXXXXX"
20 #define OFILE		"tab.c"
21 #define FILEU		"output"
22 #define FILED		"tab.h"
23 #define FILEDEBUG	"debug"
24 
25 enum
26 {
27 /*
28  * the following are adjustable
29  * according to memory size
30  */
31 	ACTSIZE		= 40000,
32 	MEMSIZE		= 40000,
33 	NSTATES		= 2000,
34 	NTERMS		= 511,
35 	NPROD		= 1600,
36 	NNONTERM	= 600,
37 	TEMPSIZE	= 2000,
38 	CNAMSZ		= 10000,
39 	LSETSIZE	= 2400,
40 	WSETSIZE	= 350,
41 
42 	NAMESIZE	= 50,
43 	NTYPES		= 63,
44 	ISIZE		= 400,
45 
46 	PRIVATE		= 0xE000,	/* unicode private use */
47 
48 	/* relationships which must hold:
49 		TBITSET ints must hold NTERMS+1 bits...
50 		WSETSIZE >= NNONTERM
51 		LSETSIZE >= NNONTERM
52 		TEMPSIZE >= NTERMS + NNONTERM + 1
53 		TEMPSIZE >= NSTATES
54 	*/
55 
56 	NTBASE		= 010000,
57 	ERRCODE		= 8190,
58 	ACCEPTCODE	= 8191,
59 
60 	NOASC		= 0,	/* no assoc. */
61 	LASC		= 1,	/* left assoc. */
62 	RASC		= 2,	/* right assoc. */
63 	BASC		= 3,	/* binary assoc. */
64 
65 	/* flags for state generation */
66 
67 	DONE		= 0,
68 	MUSTDO		= 1,
69 	MUSTLOOKAHEAD	= 2,
70 
71 	/* flags for a rule having an action, and being reduced */
72 
73 	ACTFLAG		= 04,
74 	REDFLAG		= 010,
75 
76 	/* output parser flags */
77 	YYFLAG1		= -1000,
78 
79 	/* parse tokens */
80 	IDENTIFIER	= PRIVATE,
81 	MARK,
82 	TERM,
83 	LEFT,
84 	RIGHT,
85 	BINARY,
86 	PREC,
87 	LCURLY,
88 	IDENTCOLON,
89 	NUMBER,
90 	START,
91 	TYPEDEF,
92 	TYPENAME,
93 	UNION,
94 
95 	ENDFILE		= 0,
96 
97 	EMPTY		= 1,
98 	WHOKNOWS	= 0,
99 	OK		= 1,
100 	NOMORE		= -1000,
101 };
102 
103 	/* macros for getting associativity and precedence levels */
104 
105 #define ASSOC(i)	((i)&03)
106 #define PLEVEL(i)	(((i)>>4)&077)
107 #define TYPE(i)		(((i)>>10)&077)
108 
109 	/* macros for setting associativity and precedence levels */
110 
111 #define SETASC(i,j)	i |= j
112 #define SETPLEV(i,j)	i |= (j<<4)
113 #define SETTYPE(i,j)	i |= (j<<10)
114 
115 	/* looping macros */
116 
117 #define TLOOP(i)	for(i=1; i<=ntokens; i++)
118 #define NTLOOP(i)	for(i=0; i<=nnonter; i++)
119 #define PLOOP(s,i)	for(i=s; i<nprod; i++)
120 #define SLOOP(i)	for(i=0; i<nstate; i++)
121 #define WSBUMP(x)	x++
122 #define WSLOOP(s,j)	for(j=s; j<cwp; j++)
123 #define ITMLOOP(i,p,q)	for(q=pstate[i+1], p=pstate[i]; p<q; p++)
124 #define SETLOOP(i)	for(i=0; i<tbitset; i++)
125 
126 	/* command to clobber tempfiles after use */
127 
128 #define	ZAPFILE(x)	if(x) remove(x)
129 
130 	/* I/O descriptors */
131 
132 Biobuf*	faction;	/* file for saving actions */
133 Biobuf*	fdefine;	/* file for #defines */
134 Biobuf*	fdebug;		/* y.debug for strings for debugging */
135 Biobuf*	ftable;		/* y.tab.c file */
136 Biobuf*	ftemp;		/* tempfile to pass 2 */
137 Biobuf*	finput;		/* input file */
138 Biobuf*	foutput;	/* y.output file */
139 
140 	/* communication variables between various I/O routines */
141 
142 char*	infile;			/* input file name */
143 int	numbval;		/* value of an input number */
144 char	tokname[NAMESIZE+UTFmax+1]; /* input token name, slop for runes and 0 */
145 
146 	/* structure declarations */
147 
148 typedef
149 struct
150 {
151 	int	lset[TBITSET];
152 } Lkset;
153 
154 typedef
155 struct
156 {
157 	int*	pitem;
158 	Lkset*	look;
159 } Item;
160 
161 typedef
162 struct
163 {
164 	char*	name;
165 	int	value;
166 } Symb;
167 
168 typedef
169 struct
170 {
171 	int*	pitem;
172 	int	flag;
173 	Lkset	ws;
174 } Wset;
175 
176 	/* storage of names */
177 
178 char	cnames[CNAMSZ];		/* place where token and nonterminal names are stored */
179 int	cnamsz = CNAMSZ;	/* size of cnames */
180 char*	cnamp = cnames;		/* place where next name is to be put in */
181 int	ndefout = 4;		/* number of defined symbols output */
182 char*	tempname;
183 char*	actname;
184 char	ttempname[] = TEMPNAME;
185 char	tactname[] = ACTNAME;
186 char*	parser = PARSER;
187 char*	yydebug;
188 
189 	/* storage of types */
190 int	ntypes;			/* number of types defined */
191 char*	typeset[NTYPES];	/* pointers to type tags */
192 
193 	/* token information */
194 
195 int	ntokens = 0 ;		/* number of tokens */
196 Symb	tokset[NTERMS];
197 int	toklev[NTERMS];		/* vector with the precedence of the terminals */
198 
199 	/* nonterminal information */
200 
201 int	nnonter = -1;		/* the number of nonterminals */
202 Symb	nontrst[NNONTERM];
203 int	start;			/* start symbol */
204 
205 	/* assigned token type values */
206 int	extval = 0;
207 
208 char*	ytabc = OFILE;	/* name of y.tab.c */
209 
210 	/* grammar rule information */
211 
212 int	mem0[MEMSIZE] ;		/* production storage */
213 int*	mem = mem0;
214 int	nprod = 1;		/* number of productions */
215 int*	prdptr[NPROD];		/* pointers to descriptions of productions */
216 int	levprd[NPROD];		/* precedence levels for the productions */
217 int	rlines[NPROD];		/* line number for this rule */
218 
219 	/* state information */
220 
221 int	nstate = 0;		/* number of states */
222 Item*	pstate[NSTATES+2];	/* pointers to the descriptions of the states */
223 int	tystate[NSTATES];	/* contains type information about the states */
224 int	defact[NSTATES];	/* the default actions of states */
225 int	tstates[NTERMS];	/* states generated by terminal gotos */
226 int	ntstates[NNONTERM]; 	/* states generated by nonterminal gotos */
227 int	mstates[NSTATES];	/* chain of overflows of term/nonterm generation lists  */
228 int	lastred; 		/* the number of the last reduction of a state */
229 
230 	/* lookahead set information */
231 
232 Lkset	lkst[LSETSIZE];
233 int	nolook;			/* flag to turn off lookahead computations */
234 int	tbitset;		/* size of lookahead sets */
235 int	nlset = 0;		/* next lookahead set index */
236 int	nolook = 0;		/* flag to suppress lookahead computations */
237 Lkset	clset;  		/* temporary storage for lookahead computations */
238 
239 	/* working set information */
240 
241 Wset	wsets[WSETSIZE];
242 Wset*	cwp;
243 
244 	/* storage for action table */
245 
246 int	amem[ACTSIZE];		/* action table storage */
247 int*	memp = amem;		/* next free action table position */
248 int	indgo[NSTATES];		/* index to the stored goto table */
249 
250 	/* temporary vector, indexable by states, terms, or ntokens */
251 
252 int	temp1[TEMPSIZE];	/* temporary storage, indexed by terms + ntokens or states */
253 int	lineno = 1;		/* current input line number */
254 int	fatfl = 1;  		/* if on, error is fatal */
255 int	nerrors = 0;		/* number of errors */
256 
257 	/* statistics collection variables */
258 
259 int	zzgoent;
260 int	zzgobest;
261 int	zzacent;
262 int	zzexcp;
263 int	zzclose;
264 int	zzrrconf;
265 int	zzsrconf;
266 
267 int*	ggreed = lkst[0].lset;
268 int*	pgo = wsets[0].ws.lset;
269 int*	yypgo = &nontrst[0].value;
270 
271 int	maxspr = 0;  		/* maximum spread of any entry */
272 int	maxoff = 0;  		/* maximum offset into a array */
273 int*	pmem = mem0;
274 int*	maxa;
275 int	nxdb = 0;
276 int	adb = 0;
277 
278 
279 	/* storage for information about the nonterminals */
280 
281 int**	pres[NNONTERM+2];  	/* vector of pointers to productions yielding each nonterminal */
282 Lkset*	pfirst[NNONTERM+2];	/* vector of pointers to first sets for each nonterminal */
283 int	pempty[NNONTERM+1];	/* vector of nonterminals nontrivially deriving e */
284 
285 	/* random stuff picked out from between functions */
286 
287 int	indebug = 0;
288 Wset*	zzcwp = wsets;
289 int	zzgoent = 0;
290 int	zzgobest = 0;
291 int	zzacent = 0;
292 int	zzexcp = 0;
293 int	zzclose = 0;
294 int	zzsrconf = 0;
295 int*	zzmemsz = mem0;
296 int	zzrrconf = 0;
297 int	pidebug = 0;		/* debugging flag for putitem */
298 int	gsdebug = 0;
299 int	cldebug = 0;		/* debugging flag for closure */
300 int	pkdebug = 0;
301 int	g2debug = 0;
302 
303 struct
304 {
305 	char*	name;
306 	long	value;
307 } resrv[] =
308 {
309 	"binary",	BINARY,
310 	"left",		LEFT,
311 	"nonassoc",	BINARY,
312 	"prec",		PREC,
313 	"right",	RIGHT,
314 	"start",	START,
315 	"term",		TERM,
316 	"token",	TERM,
317 	"type",		TYPEDEF,
318 	"union",	UNION,
319 	0,
320 };
321 
322 	/* define functions */
323 
324 void	main(int, char**);
325 void	others(void);
326 char*	chcopy(char*, char*);
327 char*	writem(int*);
328 char*	symnam(int);
329 void	summary(void);
330 void	error(char*, ...);
331 void	aryfil(int*, int, int);
332 int	setunion(int*, int*);
333 void	prlook(Lkset*);
334 void	cpres(void);
335 void	cpfir(void);
336 int	state(int);
337 void	putitem(int*, Lkset*);
338 void	cempty(void);
339 void	stagen(void);
340 void	closure(int);
341 Lkset*	flset(Lkset*);
342 void	cleantmp(void);
343 void	intr(void);
344 void	setup(int, char**);
345 void	finact(void);
346 int	defin(int, char*);
347 void	defout(int);
348 char*	cstash(char*);
349 long	gettok(void);
350 int	fdtype(int);
351 int	chfind(int, char*);
352 void	cpyunion(void);
353 void	cpycode(void);
354 int	skipcom(void);
355 void	cpyact(int);
356 void	openup(char*, int, int, int, char*);
357 void	output(void);
358 int	apack(int*, int);
359 void	go2out(void);
360 void	go2gen(int);
361 void	precftn(int, int, int);
362 void	wract(int);
363 void	wrstate(int);
364 void	warray(char*, int*, int);
365 void	hideprod(void);
366 void	callopt(void);
367 void	gin(int);
368 void	stin(int);
369 int	nxti(void);
370 void	osummary(void);
371 void	aoutput(void);
372 void	arout(char*, int*, int);
373 int	gtnm(void);
374 
375 void
main(int argc,char * argv[])376 main(int argc, char *argv[])
377 {
378 
379 	setup(argc, argv);	/* initialize and read productions */
380 	tbitset = NWORDS(ntokens);
381 	cpres();		/* make table of which productions yield a given nonterminal */
382 	cempty();		/* make a table of which nonterminals can match the empty string */
383 	cpfir();		/* make a table of firsts of nonterminals */
384 	stagen();		/* generate the states */
385 	output();		/* write the states and the tables */
386 	go2out();
387 	hideprod();
388 	summary();
389 	callopt();
390 	others();
391 	exits(0);
392 }
393 
394 /*
395  * put out other arrays, copy the parsers
396  */
397 void
others(void)398 others(void)
399 {
400 	int c, i, j;
401 
402 	finput = Bopen(parser, OREAD);
403 	if(finput == 0)
404 		error("cannot find parser %s", parser);
405 	warray("yyr1", levprd, nprod);
406 	aryfil(temp1, nprod, 0);
407 	PLOOP(1, i)
408 		temp1[i] = prdptr[i+1]-prdptr[i]-2;
409 	warray("yyr2", temp1, nprod);
410 
411 	aryfil(temp1, nstate, -1000);
412 	TLOOP(i)
413 		for(j=tstates[i]; j!=0; j=mstates[j])
414 			temp1[j] = i;
415 	NTLOOP(i)
416 		for(j=ntstates[i]; j!=0; j=mstates[j])
417 			temp1[j] = -i;
418 	warray("yychk", temp1, nstate);
419 	warray("yydef", defact, nstate);
420 
421 	/* put out token translation tables */
422 	/* table 1 has 0-256 */
423 	aryfil(temp1, 256, 0);
424 	c = 0;
425 	TLOOP(i) {
426 		j = tokset[i].value;
427 		if(j >= 0 && j < 256) {
428 			if(temp1[j]) {
429 				print("yacc bug -- cant have 2 different Ts with same value\n");
430 				print("	%s and %s\n", tokset[i].name, tokset[temp1[j]].name);
431 				nerrors++;
432 			}
433 			temp1[j] = i;
434 			if(j > c)
435 				c = j;
436 		}
437 	}
438 	warray("yytok1", temp1, c+1);
439 
440 	/* table 2 has PRIVATE-PRIVATE+256 */
441 	aryfil(temp1, 256, 0);
442 	c = 0;
443 	TLOOP(i) {
444 		j = tokset[i].value - PRIVATE;
445 		if(j >= 0 && j < 256) {
446 			if(temp1[j]) {
447 				print("yacc bug -- cant have 2 different Ts with same value\n");
448 				print("	%s and %s\n", tokset[i].name, tokset[temp1[j]].name);
449 				nerrors++;
450 			}
451 			temp1[j] = i;
452 			if(j > c)
453 				c = j;
454 		}
455 	}
456 	warray("yytok2", temp1, c+1);
457 
458 	/* table 3 has everything else */
459 	Bprint(ftable, "long	yytok3[] =\n{\n");
460 	c = 0;
461 	TLOOP(i) {
462 		j = tokset[i].value;
463 		if(j >= 0 && j < 256)
464 			continue;
465 		if(j >= PRIVATE && j < 256+PRIVATE)
466 			continue;
467 
468 		Bprint(ftable, "%4d,%4d,", j, i);
469 		c++;
470 		if(c%5 == 0)
471 			Bprint(ftable, "\n");
472 	}
473 	Bprint(ftable, "%4d\n};\n", 0);
474 
475 	/* copy parser text */
476 	while((c=Bgetrune(finput)) != Beof) {
477 		if(c == '$') {
478 			if((c = Bgetrune(finput)) != 'A')
479 				Bputrune(ftable, '$');
480 			else { /* copy actions */
481 				faction = Bopen(actname, OREAD);
482 				if(faction == 0)
483 					error("cannot reopen action tempfile");
484 				while((c=Bgetrune(faction)) != Beof)
485 					Bputrune(ftable, c);
486 				Bterm(faction);
487 				ZAPFILE(actname);
488 				c = Bgetrune(finput);
489 			}
490 		}
491 		Bputrune(ftable, c);
492 	}
493 	Bterm(ftable);
494 }
495 
496 /*
497  * copies string q into p, returning next free char ptr
498  */
499 char*
chcopy(char * p,char * q)500 chcopy(char* p, char* q)
501 {
502 	int c;
503 
504 	while(c = *q) {
505 		if(c == '"')
506 			*p++ = '\\';
507 		*p++ = c;
508 		q++;
509 	}
510 	*p = 0;
511 	return p;
512 }
513 
514 /*
515  * creates output string for item pointed to by pp
516  */
517 char*
writem(int * pp)518 writem(int *pp)
519 {
520 	int i,*p;
521 	static char sarr[ISIZE];
522 	char* q;
523 
524 	for(p=pp; *p>0; p++)
525 		;
526 	p = prdptr[-*p];
527 	q = chcopy(sarr, nontrst[*p-NTBASE].name);
528 	q = chcopy(q, ": ");
529 	for(;;) {
530 		*q = ' ';
531 		p++;
532 		if(p == pp)
533 			*q = '.';
534 		q++;
535 		*q = '\0';
536 		i = *p;
537 		if(i <= 0)
538 			break;
539 		q = chcopy(q, symnam(i));
540 		if(q > &sarr[ISIZE-30])
541 			error("item too big");
542 	}
543 
544 	/* an item calling for a reduction */
545 	i = *pp;
546 	if(i < 0 ) {
547 		q = chcopy(q, "    (");
548 		sprint(q, "%d)", -i);
549 	}
550 	return sarr;
551 }
552 
553 /*
554  * return a pointer to the name of symbol i
555  */
556 char*
symnam(int i)557 symnam(int i)
558 {
559 	char* cp;
560 
561 	cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
562 	if(*cp == ' ')
563 		cp++;
564 	return cp;
565 }
566 
567 /*
568  * output the summary on y.output
569  */
570 void
summary(void)571 summary(void)
572 {
573 
574 	if(foutput != 0) {
575 		Bprint(foutput, "\n%d/%d terminals, %d/%d nonterminals\n",
576 			ntokens, NTERMS, nnonter, NNONTERM);
577 		Bprint(foutput, "%d/%d grammar rules, %d/%d states\n",
578 			nprod, NPROD, nstate, NSTATES);
579 		Bprint(foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n",
580 			zzsrconf, zzrrconf);
581 		Bprint(foutput, "%d/%d working sets used\n",
582 			(int)(zzcwp-wsets), WSETSIZE);
583 		Bprint(foutput, "memory: states,etc. %d/%d, parser %d/%d\n",
584 			(int)(zzmemsz-mem0), MEMSIZE, (int)(memp-amem), ACTSIZE);
585 		Bprint(foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE);
586 		Bprint(foutput, "%d extra closures\n", zzclose - 2*nstate);
587 		Bprint(foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp);
588 		Bprint(foutput, "%d goto entries\n", zzgoent);
589 		Bprint(foutput, "%d entries saved by goto default\n", zzgobest);
590 	}
591 	if(zzsrconf != 0 || zzrrconf != 0) {
592 		print("\nconflicts: ");
593 		if(zzsrconf)
594 			print("%d shift/reduce", zzsrconf);
595 		if(zzsrconf && zzrrconf)
596 			print(", ");
597 		if(zzrrconf)
598 			print("%d reduce/reduce", zzrrconf);
599 		print("\n");
600 	}
601 	if(ftemp != 0) {
602 		Bterm(ftemp);
603 		ftemp = 0;
604 	}
605 	if(fdefine != 0) {
606 		Bterm(fdefine);
607 		fdefine = 0;
608 	}
609 }
610 
611 /*
612  * write out error comment -- NEEDS WORK
613  */
614 void
error(char * s,...)615 error(char *s, ...)
616 {
617 
618 	nerrors++;
619 	fprint(2, "\n fatal error:");
620 	fprint(2, s, (&s)[1]);
621 	fprint(2, ", %s:%d\n", infile, lineno);
622 	if(!fatfl)
623 		return;
624 	summary();
625 	cleantmp();
626 	exits("error");
627 }
628 
629 /*
630  * set elements 0 through n-1 to c
631  */
632 void
aryfil(int * v,int n,int c)633 aryfil(int *v, int n, int c)
634 {
635 	int i;
636 
637 	for(i=0; i<n; i++)
638 		v[i] = c;
639 }
640 
641 /*
642  * set a to the union of a and b
643  * return 1 if b is not a subset of a, 0 otherwise
644  */
645 int
setunion(int * a,int * b)646 setunion(int *a, int *b)
647 {
648 	int i, x, sub;
649 
650 	sub = 0;
651 	SETLOOP(i) {
652 		x = *a;
653 		*a |= *b;
654 		if(*a != x)
655 			sub = 1;
656 		a++;
657 		b++;
658 	}
659 	return sub;
660 }
661 
662 void
prlook(Lkset * p)663 prlook(Lkset* p)
664 {
665 	int j, *pp;
666 
667 	pp = p->lset;
668 	if(pp == 0)
669 		Bprint(foutput, "\tNULL");
670 	else {
671 		Bprint(foutput, " { ");
672 		TLOOP(j)
673 			if(BIT(pp,j))
674 				Bprint(foutput, "%s ", symnam(j));
675 		Bprint(foutput, "}");
676 	}
677 }
678 
679 /*
680  * compute an array with the beginnings of  productions yielding given nonterminals
681  * The array pres points to these lists
682  * the array pyield has the lists: the total size is only NPROD+1
683  */
684 void
cpres(void)685 cpres(void)
686 {
687 	int c, j, i, **pmem;
688 	static int *pyield[NPROD];
689 
690 	pmem = pyield;
691 	NTLOOP(i) {
692 		c = i+NTBASE;
693 		pres[i] = pmem;
694 		fatfl = 0;  	/* make undefined  symbols  nonfatal */
695 		PLOOP(0, j)
696 			if(*prdptr[j] == c)
697 				*pmem++ =  prdptr[j]+1;
698 		if(pres[i] == pmem)
699 			error("nonterminal %s not defined!", nontrst[i].name);
700 	}
701 	pres[i] = pmem;
702 	fatfl = 1;
703 	if(nerrors) {
704 		summary();
705 		cleantmp();
706 		exits("error");
707 	}
708 	if(pmem != &pyield[nprod])
709 		error("internal Yacc error: pyield %d", pmem-&pyield[nprod]);
710 }
711 
712 /*
713  * compute an array with the first of nonterminals
714  */
715 void
cpfir(void)716 cpfir(void)
717 {
718 	int *p, **s, i, **t, ch, changes;
719 
720 	zzcwp = &wsets[nnonter];
721 	NTLOOP(i) {
722 		aryfil(wsets[i].ws.lset, tbitset, 0);
723 		t = pres[i+1];
724 		/* initially fill the sets */
725 		for(s=pres[i]; s<t; ++s)
726 			for(p = *s; (ch = *p) > 0; ++p) {
727 				if(ch < NTBASE) {
728 					SETBIT(wsets[i].ws.lset, ch);
729 					break;
730 				}
731 				if(!pempty[ch-NTBASE])
732 					break;
733 			}
734 	}
735 
736 	/* now, reflect transitivity */
737 	changes = 1;
738 	while(changes) {
739 		changes = 0;
740 		NTLOOP(i) {
741 			t = pres[i+1];
742 			for(s = pres[i]; s < t; ++s)
743 				for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) {
744 					changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset);
745 					if(!pempty[ch])
746 						break;
747 				}
748 		}
749 	}
750 
751 	NTLOOP(i)
752 		pfirst[i] = flset(&wsets[i].ws);
753 	if(!indebug)
754 		return;
755 	if(foutput != 0)
756 		NTLOOP(i) {
757 			Bprint(foutput, "\n%s: ", nontrst[i].name);
758 			prlook(pfirst[i]);
759 			Bprint(foutput, " %d\n", pempty[i]);
760 		}
761 }
762 
763 /*
764  * sorts last state,and sees if it equals earlier ones. returns state number
765  */
766 int
state(int c)767 state(int c)
768 {
769 	Item *p1, *p2, *k, *l, *q1, *q2;
770 	int size1, size2, i;
771 
772 	p1 = pstate[nstate];
773 	p2 = pstate[nstate+1];
774 	if(p1 == p2)
775 		return 0;	/* null state */
776 	/* sort the items */
777 	for(k = p2-1; k > p1; k--)	/* make k the biggest */
778 		for(l = k-1; l >= p1; --l)
779 			if(l->pitem > k->pitem) {
780 				int *s;
781 				Lkset *ss;
782 
783 				s = k->pitem;
784 				k->pitem = l->pitem;
785 				l->pitem = s;
786 				ss = k->look;
787 				k->look = l->look;
788 				l->look = ss;
789 			}
790 	size1 = p2 - p1;	/* size of state */
791 
792 	for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) {
793 		/* get ith state */
794 		q1 = pstate[i];
795 		q2 = pstate[i+1];
796 		size2 = q2 - q1;
797 		if(size1 != size2)
798 			continue;
799 		k = p1;
800 		for(l = q1; l < q2; l++) {
801 			if(l->pitem != k->pitem)
802 				break;
803 			k++;
804 		}
805 		if(l != q2)
806 			continue;
807 		/* found it */
808 		pstate[nstate+1] = pstate[nstate];	/* delete last state */
809 		/* fix up lookaheads */
810 		if(nolook)
811 			return i;
812 		for(l = q1, k = p1; l < q2; ++l, ++k ) {
813 			int s;
814 
815 			SETLOOP(s)
816 				clset.lset[s] = l->look->lset[s];
817 			if(setunion(clset.lset, k->look->lset)) {
818 				tystate[i] = MUSTDO;
819 				/* register the new set */
820 				l->look = flset( &clset );
821 			}
822 		}
823 		return i;
824 	}
825 	/* state is new */
826 	if(nolook)
827 		error("yacc state/nolook error");
828 	pstate[nstate+2] = p2;
829 	if(nstate+1 >= NSTATES)
830 		error("too many states");
831 	if(c >= NTBASE) {
832 		mstates[nstate] = ntstates[c-NTBASE];
833 		ntstates[c-NTBASE] = nstate;
834 	} else {
835 		mstates[nstate] = tstates[c];
836 		tstates[c] = nstate;
837 	}
838 	tystate[nstate] = MUSTDO;
839 	return nstate++;
840 }
841 
842 void
putitem(int * ptr,Lkset * lptr)843 putitem(int *ptr, Lkset *lptr)
844 {
845 	Item *j;
846 
847 	if(pidebug && foutput != 0)
848 		Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate);
849 	j = pstate[nstate+1];
850 	j->pitem = ptr;
851 	if(!nolook)
852 		j->look = flset(lptr);
853 	pstate[nstate+1] = ++j;
854 	if((int*)j > zzmemsz) {
855 		zzmemsz = (int*)j;
856 		if(zzmemsz >=  &mem0[MEMSIZE])
857 			error("out of state space");
858 	}
859 }
860 
861 /*
862  * mark nonterminals which derive the empty string
863  * also, look for nonterminals which don't derive any token strings
864  */
865 void
cempty(void)866 cempty(void)
867 {
868 
869 	int i, *p;
870 
871 	/* first, use the array pempty to detect productions that can never be reduced */
872 	/* set pempty to WHONOWS */
873 	aryfil(pempty, nnonter+1, WHOKNOWS);
874 
875 	/* now, look at productions, marking nonterminals which derive something */
876 more:
877 	PLOOP(0, i) {
878 		if(pempty[*prdptr[i] - NTBASE])
879 			continue;
880 		for(p = prdptr[i]+1; *p >= 0; ++p)
881 			if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS)
882 				break;
883 		/* production can be derived */
884 		if(*p < 0) {
885 			pempty[*prdptr[i]-NTBASE] = OK;
886 			goto more;
887 		}
888 	}
889 
890 	/* now, look at the nonterminals, to see if they are all OK */
891 	NTLOOP(i) {
892 		/* the added production rises or falls as the start symbol ... */
893 		if(i == 0)
894 			continue;
895 		if(pempty[i] != OK) {
896 			fatfl = 0;
897 			error("nonterminal %s never derives any token string", nontrst[i].name);
898 		}
899 	}
900 
901 	if(nerrors) {
902 		summary();
903 		cleantmp();
904 		exits("error");
905 	}
906 
907 	/* now, compute the pempty array, to see which nonterminals derive the empty string */
908 	/* set pempty to WHOKNOWS */
909 	aryfil( pempty, nnonter+1, WHOKNOWS);
910 
911 	/* loop as long as we keep finding empty nonterminals */
912 
913 again:
914 	PLOOP(1, i) {
915 		/* not known to be empty */
916 		if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) {
917 			for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p)
918 				;
919 			/* we have a nontrivially empty nonterminal */
920 			if(*p < 0) {
921 				pempty[*prdptr[i]-NTBASE] = EMPTY;
922 				/* got one ... try for another */
923 				goto again;
924 			}
925 		}
926 	}
927 }
928 
929 /*
930  * generate the states
931  */
932 void
stagen(void)933 stagen(void)
934 {
935 
936 	int c, i, j, more;
937 	Wset *p, *q;
938 
939 	/* initialize */
940 	nstate = 0;
941 
942 	/* THIS IS FUNNY from the standpoint of portability
943 	 * it represents the magic moment when the mem0 array, which has
944 	 * been holding the productions, starts to hold item pointers, of a
945 	 * different type...
946 	 * someday, alloc should be used to allocate all this stuff... for now, we
947 	 * accept that if pointers don't fit in integers, there is a problem...
948 	 */
949 
950 	pstate[0] = pstate[1] = (Item*)mem;
951 	aryfil(clset.lset, tbitset, 0);
952 	putitem(prdptr[0]+1, &clset);
953 	tystate[0] = MUSTDO;
954 	nstate = 1;
955 	pstate[2] = pstate[1];
956 
957 	aryfil(amem, ACTSIZE, 0);
958 
959 	/* now, the main state generation loop */
960 	for(more=1; more;) {
961 		more = 0;
962 		SLOOP(i) {
963 			if(tystate[i] != MUSTDO)
964 				continue;
965 			tystate[i] = DONE;
966 			aryfil(temp1, nnonter+1, 0);
967 			/* take state i, close it, and do gotos */
968 			closure(i);
969 			/* generate goto's */
970 			WSLOOP(wsets, p) {
971 				if(p->flag)
972 					continue;
973 				p->flag = 1;
974 				c = *(p->pitem);
975 				if(c <= 1) {
976 					if(pstate[i+1]-pstate[i] <= p-wsets)
977 						tystate[i] = MUSTLOOKAHEAD;
978 					continue;
979 				}
980 				/* do a goto on c */
981 				WSLOOP(p, q)
982 					/* this item contributes to the goto */
983 					if(c == *(q->pitem)) {
984 						putitem(q->pitem+1, &q->ws);
985 						q->flag = 1;
986 					}
987 				if(c < NTBASE)
988 					state(c);	/* register new state */
989 				else
990 					temp1[c-NTBASE] = state(c);
991 			}
992 			if(gsdebug && foutput != 0) {
993 				Bprint(foutput, "%d: ", i);
994 				NTLOOP(j)
995 					if(temp1[j])
996 						Bprint(foutput, "%s %d, ",
997 						nontrst[j].name, temp1[j]);
998 				Bprint(foutput, "\n");
999 			}
1000 			indgo[i] = apack(&temp1[1], nnonter-1) - 1;
1001 			/* do some more */
1002 			more = 1;
1003 		}
1004 	}
1005 }
1006 
1007 /*
1008  * generate the closure of state i
1009  */
1010 void
closure(int i)1011 closure(int i)
1012 {
1013 
1014 	Wset *u, *v;
1015 	Item *p, *q;
1016 	int c, ch, work, k, *pi, **s, **t;
1017 
1018 	zzclose++;
1019 
1020 	/* first, copy kernel of state i to wsets */
1021 	cwp = wsets;
1022 	ITMLOOP(i, p, q) {
1023 		cwp->pitem = p->pitem;
1024 		cwp->flag = 1;			/* this item must get closed */
1025 		SETLOOP(k)
1026 			cwp->ws.lset[k] = p->look->lset[k];
1027 		WSBUMP(cwp);
1028 	}
1029 
1030 	/* now, go through the loop, closing each item */
1031 	work = 1;
1032 	while(work) {
1033 		work = 0;
1034 		WSLOOP(wsets, u) {
1035 			if(u->flag == 0)
1036 				continue;
1037 			/* dot is before c */
1038 			c = *(u->pitem);
1039 			if(c < NTBASE) {
1040 				u->flag = 0;
1041 				/* only interesting case is where . is before nonterminal */
1042 				continue;
1043 			}
1044 
1045 			/* compute the lookahead */
1046 			aryfil(clset.lset, tbitset, 0);
1047 
1048 			/* find items involving c */
1049 			WSLOOP(u, v)
1050 				if(v->flag == 1 && *(pi=v->pitem) == c) {
1051 					v->flag = 0;
1052 					if(nolook)
1053 						continue;
1054 					while((ch = *++pi) > 0) {
1055 						/* terminal symbol */
1056 						if(ch < NTBASE) {
1057 							SETBIT(clset.lset, ch);
1058 							break;
1059 						}
1060 						/* nonterminal symbol */
1061 						setunion(clset.lset, pfirst[ch-NTBASE]->lset);
1062 						if(!pempty[ch-NTBASE])
1063 							break;
1064 					}
1065 					if(ch <= 0)
1066 						setunion(clset.lset, v->ws.lset);
1067 				}
1068 
1069 			/*
1070 			 * now loop over productions derived from c
1071 			 * c is now nonterminal number
1072 			 */
1073 			c -= NTBASE;
1074 			t = pres[c+1];
1075 			for(s = pres[c]; s < t; ++s) {
1076 				/*
1077 				 * put these items into the closure
1078 				 * is the item there
1079 				 */
1080 				WSLOOP(wsets, v)
1081 					/* yes, it is there */
1082 					if(v->pitem == *s) {
1083 						if(nolook)
1084 							goto nexts;
1085 						if(setunion(v->ws.lset, clset.lset))
1086 							v->flag = work = 1;
1087 						goto nexts;
1088 					}
1089 
1090 				/*  not there; make a new entry */
1091 				if(cwp-wsets+1 >= WSETSIZE)
1092 					error( "working set overflow");
1093 				cwp->pitem = *s;
1094 				cwp->flag = 1;
1095 				if(!nolook) {
1096 					work = 1;
1097 					SETLOOP(k) cwp->ws.lset[k] = clset.lset[k];
1098 				}
1099 				WSBUMP(cwp);
1100 
1101 			nexts:;
1102 			}
1103 		}
1104 	}
1105 
1106 	/* have computed closure; flags are reset; return */
1107 	if(cwp > zzcwp)
1108 		zzcwp = cwp;
1109 	if(cldebug && foutput != 0) {
1110 		Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook);
1111 		WSLOOP(wsets, u) {
1112 			if(u->flag)
1113 				Bprint(foutput, "flag set!\n");
1114 			u->flag = 0;
1115 			Bprint(foutput, "\t%s", writem(u->pitem));
1116 			prlook(&u->ws);
1117 			Bprint(foutput, "\n");
1118 		}
1119 	}
1120 }
1121 
1122 /*
1123  * decide if the lookahead set pointed to by p is known
1124  * return pointer to a perminent location for the set
1125  */
1126 Lkset*
flset(Lkset * p)1127 flset(Lkset *p)
1128 {
1129 	Lkset *q;
1130 	int *u, *v, *w, j;
1131 
1132 	for(q = &lkst[nlset]; q-- > lkst;) {
1133 		u = p->lset;
1134 		v = q->lset;
1135 		w = &v[tbitset];
1136 		while(v < w)
1137 			if(*u++ != *v++)
1138 				goto more;
1139 		/* we have matched */
1140 		return q;
1141 	more:;
1142 	}
1143 	/* add a new one */
1144 	q = &lkst[nlset++];
1145 	if(nlset >= LSETSIZE)
1146 		error("too many lookahead sets");
1147 	SETLOOP(j)
1148 		q->lset[j] = p->lset[j];
1149 	return q;
1150 }
1151 
1152 void
cleantmp(void)1153 cleantmp(void)
1154 {
1155 	ZAPFILE(actname);
1156 	ZAPFILE(tempname);
1157 }
1158 
1159 void
intr(void)1160 intr(void)
1161 {
1162 	cleantmp();
1163 	exits("interrupted");
1164 }
1165 
1166 void
usage(void)1167 usage(void)
1168 {
1169 	fprint(2, "usage: yacc [-Dn] [-vdS] [-o outputfile] [-s stem] grammar\n");
1170 	exits("usage");
1171 }
1172 
1173 void
setup(int argc,char * argv[])1174 setup(int argc, char *argv[])
1175 {
1176 	long c, t;
1177 	int i, j, lev, ty, ytab, *p;
1178 	int vflag, dflag, stem;
1179 	char actnm[8], *stemc, *s, dirbuf[128];
1180 
1181 	ytab = 0;
1182 	vflag = 0;
1183 	dflag = 0;
1184 	stem = 0;
1185 	stemc = "y";
1186 	foutput = 0;
1187 	fdefine = 0;
1188 	fdebug = 0;
1189 	ARGBEGIN{
1190 	case 'v':
1191 	case 'V':
1192 		vflag++;
1193 		break;
1194 	case 'D':
1195 		yydebug = EARGF(usage());
1196 		break;
1197 	case 'd':
1198 		dflag++;
1199 		break;
1200 	case 'o':
1201 		ytab++;
1202 		ytabc = EARGF(usage());
1203 		break;
1204 	case 's':
1205 		stem++;
1206 		stemc = ARGF();
1207 		break;
1208 	case 'S':
1209 		parser = PARSERS;
1210 		break;
1211 	default:
1212 		error("illegal option: %c", ARGC());
1213 	}ARGEND
1214 	openup(stemc, dflag, vflag, ytab, ytabc);
1215 
1216 	ftemp = Bopen(tempname = mktemp(ttempname), OWRITE);
1217 	faction = Bopen(actname = mktemp(tactname), OWRITE);
1218 	if(ftemp == 0 || faction == 0)
1219 		error("cannot open temp file");
1220 	if(argc < 1)
1221 		error("no input file");
1222 	infile = argv[0];
1223 	if(infile[0] != '/' && getwd(dirbuf, sizeof dirbuf)!=nil){
1224 		i = strlen(infile)+1+strlen(dirbuf)+1+10;
1225 		s = malloc(i);
1226 		if(s != nil){
1227 			snprint(s, i, "%s/%s", dirbuf, infile);
1228 			cleanname(s);
1229 			infile = s;
1230 		}
1231 	}
1232 	finput = Bopen(infile, OREAD);
1233 	if(finput == 0)
1234 		error("cannot open '%s'", argv[0]);
1235 	cnamp = cnames;
1236 
1237 	defin(0, "$end");
1238 	extval = PRIVATE;	/* tokens start in unicode 'private use' */
1239 	defin(0, "error");
1240 	defin(1, "$accept");
1241 	defin(0, "$unk");
1242 	mem = mem0;
1243 	i = 0;
1244 
1245 	for(t = gettok(); t != MARK && t != ENDFILE;)
1246 	switch(t) {
1247 	case ';':
1248 		t = gettok();
1249 		break;
1250 
1251 	case START:
1252 		if(gettok() != IDENTIFIER)
1253 			error("bad %%start construction");
1254 		start = chfind(1, tokname);
1255 		t = gettok();
1256 		continue;
1257 
1258 	case TYPEDEF:
1259 		if(gettok() != TYPENAME)
1260 			error("bad syntax in %%type");
1261 		ty = numbval;
1262 		for(;;) {
1263 			t = gettok();
1264 			switch(t) {
1265 			case IDENTIFIER:
1266 				if((t=chfind(1, tokname)) < NTBASE) {
1267 					j = TYPE(toklev[t]);
1268 					if(j != 0 && j != ty)
1269 						error("type redeclaration of token %s",
1270 							tokset[t].name);
1271 					else
1272 						SETTYPE(toklev[t], ty);
1273 				} else {
1274 					j = nontrst[t-NTBASE].value;
1275 					if(j != 0 && j != ty)
1276 						error("type redeclaration of nonterminal %s",
1277 							nontrst[t-NTBASE].name );
1278 					else
1279 						nontrst[t-NTBASE].value = ty;
1280 				}
1281 			case ',':
1282 				continue;
1283 			case ';':
1284 				t = gettok();
1285 			default:
1286 				break;
1287 			}
1288 			break;
1289 		}
1290 		continue;
1291 
1292 	case UNION:
1293 		/* copy the union declaration to the output */
1294 		cpyunion();
1295 		t = gettok();
1296 		continue;
1297 
1298 	case LEFT:
1299 	case BINARY:
1300 	case RIGHT:
1301 		i++;
1302 
1303 	case TERM:
1304 		/* nonzero means new prec. and assoc. */
1305 		lev = t-TERM;
1306 		ty = 0;
1307 
1308 		/* get identifiers so defined */
1309 		t = gettok();
1310 
1311 		/* there is a type defined */
1312 		if(t == TYPENAME) {
1313 			ty = numbval;
1314 			t = gettok();
1315 		}
1316 		for(;;) {
1317 			switch(t) {
1318 			case ',':
1319 				t = gettok();
1320 				continue;
1321 
1322 			case ';':
1323 				break;
1324 
1325 			case IDENTIFIER:
1326 				j = chfind(0, tokname);
1327 				if(j >= NTBASE)
1328 					error("%s defined earlier as nonterminal", tokname);
1329 				if(lev) {
1330 					if(ASSOC(toklev[j]))
1331 						error("redeclaration of precedence of %s", tokname);
1332 					SETASC(toklev[j], lev);
1333 					SETPLEV(toklev[j], i);
1334 				}
1335 				if(ty) {
1336 					if(TYPE(toklev[j]))
1337 						error("redeclaration of type of %s", tokname);
1338 					SETTYPE(toklev[j],ty);
1339 				}
1340 				t = gettok();
1341 				if(t == NUMBER) {
1342 					tokset[j].value = numbval;
1343 					if(j < ndefout && j > 3)
1344 						error("please define type number of %s earlier",
1345 							tokset[j].name);
1346 					t = gettok();
1347 				}
1348 				continue;
1349 			}
1350 			break;
1351 		}
1352 		continue;
1353 
1354 	case LCURLY:
1355 		defout(0);
1356 		cpycode();
1357 		t = gettok();
1358 		continue;
1359 
1360 	default:
1361 		error("syntax error");
1362 	}
1363 	if(t == ENDFILE)
1364 		error("unexpected EOF before %%");
1365 
1366 	/* t is MARK */
1367 	Bprint(ftable, "extern	int	yyerrflag;\n");
1368 	Bprint(ftable, "#ifndef	YYMAXDEPTH\n");
1369 	Bprint(ftable, "#define	YYMAXDEPTH	150\n");
1370 	Bprint(ftable, "#endif\n" );
1371 	if(!ntypes) {
1372 		Bprint(ftable, "#ifndef	YYSTYPE\n");
1373 		Bprint(ftable, "#define	YYSTYPE	int\n");
1374 		Bprint(ftable, "#endif\n");
1375 	}
1376 	Bprint(ftable, "YYSTYPE	yylval;\n");
1377 	Bprint(ftable, "YYSTYPE	yyval;\n");
1378 
1379 	prdptr[0] = mem;
1380 
1381 	/* added production */
1382 	*mem++ = NTBASE;
1383 
1384 	/* if start is 0, we will overwrite with the lhs of the first rule */
1385 	*mem++ = start;
1386 	*mem++ = 1;
1387 	*mem++ = 0;
1388 	prdptr[1] = mem;
1389 	while((t=gettok()) == LCURLY)
1390 		cpycode();
1391 	if(t != IDENTCOLON)
1392 		error("bad syntax on first rule");
1393 
1394 	if(!start)
1395 		prdptr[0][1] = chfind(1, tokname);
1396 
1397 	/* read rules */
1398 	while(t != MARK && t != ENDFILE) {
1399 		/* process a rule */
1400 		rlines[nprod] = lineno;
1401 		if(t == '|')
1402 			*mem++ = *prdptr[nprod-1];
1403 		else
1404 			if(t == IDENTCOLON) {
1405 				*mem = chfind(1, tokname);
1406 				if(*mem < NTBASE)
1407 					error("token illegal on LHS of grammar rule");
1408 				mem++;
1409 			} else
1410 				error("illegal rule: missing semicolon or | ?");
1411 		/* read rule body */
1412 		t = gettok();
1413 
1414 	more_rule:
1415 		while(t == IDENTIFIER) {
1416 			*mem = chfind(1, tokname);
1417 			if(*mem < NTBASE)
1418 				levprd[nprod] = toklev[*mem];
1419 			mem++;
1420 			t = gettok();
1421 		}
1422 		if(t == PREC) {
1423 			if(gettok() != IDENTIFIER)
1424 				error("illegal %%prec syntax");
1425 			j = chfind(2, tokname);
1426 			if(j >= NTBASE)
1427 				error("nonterminal %s illegal after %%prec",
1428 					nontrst[j-NTBASE].name);
1429 			levprd[nprod] = toklev[j];
1430 			t = gettok();
1431 		}
1432 		if(t == '=') {
1433 			levprd[nprod] |= ACTFLAG;
1434 			Bprint(faction, "\ncase %d:", nprod);
1435 			cpyact(mem-prdptr[nprod]-1);
1436 			Bprint(faction, " break;");
1437 			if((t=gettok()) == IDENTIFIER) {
1438 
1439 				/* action within rule... */
1440 				sprint(actnm, "$$%d", nprod);
1441 
1442 				/* make it a nonterminal */
1443 				j = chfind(1, actnm);
1444 
1445 				/*
1446 				 * the current rule will become rule number nprod+1
1447 				 * move the contents down, and make room for the null
1448 				 */
1449 				for(p = mem; p >= prdptr[nprod]; --p)
1450 					p[2] = *p;
1451 				mem += 2;
1452 
1453 				/* enter null production for action */
1454 				p = prdptr[nprod];
1455 				*p++ = j;
1456 				*p++ = -nprod;
1457 
1458 				/* update the production information */
1459 				levprd[nprod+1] = levprd[nprod] & ~ACTFLAG;
1460 				levprd[nprod] = ACTFLAG;
1461 				if(++nprod >= NPROD)
1462 					error("more than %d rules", NPROD);
1463 				prdptr[nprod] = p;
1464 
1465 				/* make the action appear in the original rule */
1466 				*mem++ = j;
1467 
1468 				/* get some more of the rule */
1469 				goto more_rule;
1470 			}
1471 		}
1472 
1473 		while(t == ';')
1474 			t = gettok();
1475 		*mem++ = -nprod;
1476 
1477 		/* check that default action is reasonable */
1478 		if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].value) {
1479 
1480 			/* no explicit action, LHS has value */
1481 			int tempty;
1482 
1483 			tempty = prdptr[nprod][1];
1484 			if(tempty < 0)
1485 				error("must return a value, since LHS has a type");
1486 			else
1487 				if(tempty >= NTBASE)
1488 					tempty = nontrst[tempty-NTBASE].value;
1489 				else
1490 					tempty = TYPE(toklev[tempty]);
1491 			if(tempty != nontrst[*prdptr[nprod]-NTBASE].value)
1492 				error("default action causes potential type clash");
1493 		}
1494 		nprod++;
1495 		if(nprod >= NPROD)
1496 			error("more than %d rules", NPROD);
1497 		prdptr[nprod] = mem;
1498 		levprd[nprod] = 0;
1499 	}
1500 
1501 	/* end of all rules */
1502 	defout(1);
1503 
1504 	finact();
1505 	if(t == MARK) {
1506 		Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1507 		while((c=Bgetrune(finput)) != Beof)
1508 			Bputrune(ftable, c);
1509 	}
1510 	Bterm(finput);
1511 }
1512 
1513 /*
1514  * finish action routine
1515  */
1516 void
finact(void)1517 finact(void)
1518 {
1519 
1520 	Bterm(faction);
1521 	Bprint(ftable, "#define YYEOFCODE %d\n", 1);
1522 	Bprint(ftable, "#define YYERRCODE %d\n", 2);
1523 }
1524 
1525 /*
1526  * define s to be a terminal if t=0
1527  * or a nonterminal if t=1
1528  */
1529 int
defin(int nt,char * s)1530 defin(int nt, char *s)
1531 {
1532 	int val;
1533 	Rune rune;
1534 
1535 	val = 0;
1536 	if(nt) {
1537 		nnonter++;
1538 		if(nnonter >= NNONTERM)
1539 			error("too many nonterminals, limit %d",NNONTERM);
1540 		nontrst[nnonter].name = cstash(s);
1541 		return NTBASE + nnonter;
1542 	}
1543 
1544 	/* must be a token */
1545 	ntokens++;
1546 	if(ntokens >= NTERMS)
1547 		error("too many terminals, limit %d", NTERMS);
1548 	tokset[ntokens].name = cstash(s);
1549 
1550 	/* establish value for token */
1551 	/* single character literal */
1552 	if(s[0] == ' ') {
1553 		val = chartorune(&rune, &s[1]);
1554 		if(s[val+1] == 0) {
1555 			val = rune;
1556 			goto out;
1557 		}
1558 	}
1559 
1560 	/* escape sequence */
1561 	if(s[0] == ' ' && s[1] == '\\') {
1562 		if(s[3] == 0) {
1563 			/* single character escape sequence */
1564 			switch(s[2]) {
1565 			case 'n':	val = '\n'; break;
1566 			case 'r':	val = '\r'; break;
1567 			case 'b':	val = '\b'; break;
1568 			case 't':	val = '\t'; break;
1569 			case 'f':	val = '\f'; break;
1570 			case '\'':	val = '\''; break;
1571 			case '"':	val = '"'; break;
1572 			case '\\':	val = '\\'; break;
1573 			default:	error("invalid escape");
1574 			}
1575 			goto out;
1576 		}
1577 
1578 		/* \nnn sequence */
1579 		if(s[2] >= '0' && s[2] <= '7') {
1580 			if(s[3] < '0' ||
1581 			   s[3] > '7' ||
1582 			   s[4] < '0' ||
1583 			   s[4] > '7' ||
1584 			   s[5] != 0)
1585 				error("illegal \\nnn construction");
1586 			val = 64*s[2] + 8*s[3] + s[4] - 73*'0';
1587 			if(val == 0)
1588 				error("'\\000' is illegal");
1589 			goto out;
1590 		}
1591 		error("unknown escape");
1592 	}
1593 	val = extval++;
1594 
1595 out:
1596 	tokset[ntokens].value = val;
1597 	toklev[ntokens] = 0;
1598 	return ntokens;
1599 }
1600 
1601 /*
1602  * write out the defines (at the end of the declaration section)
1603  */
1604 void
defout(int last)1605 defout(int last)
1606 {
1607 	int i, c;
1608 	char sar[NAMESIZE+10];
1609 
1610 	for(i=ndefout; i<=ntokens; i++) {
1611 		/* non-literals */
1612 		c = tokset[i].name[0];
1613 		if(c != ' ' && c != '$') {
1614 			Bprint(ftable, "#define	%s	%d\n",
1615 				tokset[i].name, tokset[i].value);
1616 			if(fdefine)
1617 				Bprint(fdefine, "#define\t%s\t%d\n",
1618 					tokset[i].name, tokset[i].value);
1619 		}
1620 	}
1621 	ndefout = ntokens+1;
1622 	if(last && fdebug) {
1623 		Bprint(fdebug, "char*	yytoknames[] =\n{\n");
1624 		TLOOP(i) {
1625 			if(tokset[i].name) {
1626 				chcopy(sar, tokset[i].name);
1627 				Bprint(fdebug, "\t\"%s\",\n", sar);
1628 				continue;
1629 			}
1630 			Bprint(fdebug, "\t0,\n");
1631 		}
1632 		Bprint(fdebug, "};\n");
1633 	}
1634 }
1635 
1636 char*
cstash(char * s)1637 cstash(char *s)
1638 {
1639 	char *temp;
1640 
1641 	temp = cnamp;
1642 	do {
1643 		if(cnamp >= &cnames[cnamsz])
1644 			error("too many characters in id's and literals");
1645 		else
1646 			*cnamp++ = *s;
1647 	} while(*s++);
1648 	return temp;
1649 }
1650 
1651 long
gettok(void)1652 gettok(void)
1653 {
1654 	long c;
1655 	Rune rune;
1656 	int i, base, match, reserve;
1657 	static int peekline;
1658 
1659 begin:
1660 	reserve = 0;
1661 	lineno += peekline;
1662 	peekline = 0;
1663 	c = Bgetrune(finput);
1664 	while(c == ' ' || c == '\n' || c == '\t' || c == '\f') {
1665 		if(c == '\n')
1666 			lineno++;
1667 		c = Bgetrune(finput);
1668 	}
1669 
1670 	/* skip comment */
1671 	if(c == '/') {
1672 		lineno += skipcom();
1673 		goto begin;
1674 	}
1675 	switch(c) {
1676 	case Beof:
1677 		return ENDFILE;
1678 
1679 	case '{':
1680 		Bungetrune(finput);
1681 		return '=';
1682 
1683 	case '<':
1684 		/* get, and look up, a type name (union member name) */
1685 		i = 0;
1686 		while((c=Bgetrune(finput)) != '>' && c >= 0 && c != '\n') {
1687 			rune = c;
1688 			c = runetochar(&tokname[i], &rune);
1689 			if(i < NAMESIZE)
1690 				i += c;
1691 		}
1692 		if(c != '>')
1693 			error("unterminated < ... > clause");
1694 		tokname[i] = 0;
1695 		for(i=1; i<=ntypes; i++)
1696 			if(!strcmp(typeset[i], tokname)) {
1697 				numbval = i;
1698 				return TYPENAME;
1699 			}
1700 		ntypes++;
1701 		numbval = ntypes;
1702 		typeset[numbval] = cstash(tokname);
1703 		return TYPENAME;
1704 
1705 	case '"':
1706 	case '\'':
1707 		match = c;
1708 		tokname[0] = ' ';
1709 		i = 1;
1710 		for(;;) {
1711 			c = Bgetrune(finput);
1712 			if(c == '\n' || c <= 0)
1713 				error("illegal or missing ' or \"" );
1714 			if(c == '\\') {
1715 				tokname[i] = '\\';
1716 				if(i < NAMESIZE)
1717 					i++;
1718 				c = Bgetrune(finput);
1719 			} else
1720 				if(c == match)
1721 					break;
1722 			rune = c;
1723 			c = runetochar(&tokname[i], &rune);
1724 			if(i < NAMESIZE)
1725 				i += c;
1726 		}
1727 		break;
1728 
1729 	case '%':
1730 	case '\\':
1731 		switch(c = Bgetrune(finput)) {
1732 		case '0':	return TERM;
1733 		case '<':	return LEFT;
1734 		case '2':	return BINARY;
1735 		case '>':	return RIGHT;
1736 		case '%':
1737 		case '\\':	return MARK;
1738 		case '=':	return PREC;
1739 		case '{':	return LCURLY;
1740 		default:	reserve = 1;
1741 		}
1742 
1743 	default:
1744 		/* number */
1745 		if(isdigit(c)) {
1746 			numbval = c-'0';
1747 			base = (c=='0')? 8: 10;
1748 			for(c = Bgetrune(finput); isdigit(c); c = Bgetrune(finput))
1749 				numbval = numbval*base + (c-'0');
1750 			Bungetrune(finput);
1751 			return NUMBER;
1752 		}
1753 		if(islower(c) || isupper(c) || c=='_' || c=='.' || c=='$')  {
1754 			i = 0;
1755 			while(islower(c) || isupper(c) || isdigit(c) ||
1756 			    c == '-' || c=='_' || c=='.' || c=='$') {
1757 				if(reserve && isupper(c))
1758 					c += 'a'-'A';
1759 				rune = c;
1760 				c = runetochar(&tokname[i], &rune);
1761 				if(i < NAMESIZE)
1762 					i += c;
1763 				c = Bgetrune(finput);
1764 			}
1765 		} else
1766 			return c;
1767 		Bungetrune(finput);
1768 	}
1769 	tokname[i] = 0;
1770 
1771 	/* find a reserved word */
1772 	if(reserve) {
1773 		for(c=0; resrv[c].name; c++)
1774 			if(strcmp(tokname, resrv[c].name) == 0)
1775 				return resrv[c].value;
1776 		error("invalid escape, or illegal reserved word: %s", tokname);
1777 	}
1778 
1779 	/* look ahead to distinguish IDENTIFIER from IDENTCOLON */
1780 	c = Bgetrune(finput);
1781 	while(c == ' ' || c == '\t'|| c == '\n' || c == '\f' || c == '/') {
1782 		if(c == '\n')
1783 			peekline++;
1784 		/* look for comments */
1785 		if(c == '/')
1786 			peekline += skipcom();
1787 		c = Bgetrune(finput);
1788 	}
1789 	if(c == ':')
1790 		return IDENTCOLON;
1791 	Bungetrune(finput);
1792 	return IDENTIFIER;
1793 }
1794 
1795 /*
1796  * determine the type of a symbol
1797  */
1798 int
fdtype(int t)1799 fdtype(int t)
1800 {
1801 	int v;
1802 
1803 	if(t >= NTBASE)
1804 		v = nontrst[t-NTBASE].value;
1805 	else
1806 		v = TYPE(toklev[t]);
1807 	if(v <= 0)
1808 		error("must specify type for %s", (t>=NTBASE)?
1809 			nontrst[t-NTBASE].name: tokset[t].name);
1810 	return v;
1811 }
1812 
1813 int
chfind(int t,char * s)1814 chfind(int t, char *s)
1815 {
1816 	int i;
1817 
1818 	if(s[0] == ' ')
1819 		t = 0;
1820 	TLOOP(i)
1821 		if(!strcmp(s, tokset[i].name))
1822 			return i;
1823 	NTLOOP(i)
1824 		if(!strcmp(s, nontrst[i].name))
1825 			return NTBASE+i;
1826 
1827 	/* cannot find name */
1828 	if(t > 1)
1829 		error("%s should have been defined earlier", s);
1830 	return defin(t, s);
1831 }
1832 
1833 /*
1834  * copy the union declaration to the output, and the define file if present
1835  */
1836 void
cpyunion(void)1837 cpyunion(void)
1838 {
1839 	long c;
1840 	int level;
1841 
1842 	Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1843 	Bprint(ftable, "typedef union ");
1844 	if(fdefine != 0)
1845 		Bprint(fdefine, "\ntypedef union ");
1846 
1847 	level = 0;
1848 	for(;;) {
1849 		if((c=Bgetrune(finput)) == Beof)
1850 			error("EOF encountered while processing %%union");
1851 		Bputrune(ftable, c);
1852 		if(fdefine != 0)
1853 			Bputrune(fdefine, c);
1854 		switch(c) {
1855 		case '\n':
1856 			lineno++;
1857 			break;
1858 		case '{':
1859 			level++;
1860 			break;
1861 		case '}':
1862 			level--;
1863 
1864 			/* we are finished copying */
1865 			if(level == 0) {
1866 				Bprint(ftable, " YYSTYPE;\n");
1867 				if(fdefine != 0)
1868 					Bprint(fdefine, "\tYYSTYPE;\nextern\tYYSTYPE\tyylval;\n");
1869 				return;
1870 			}
1871 		}
1872 	}
1873 }
1874 
1875 /*
1876  * copies code between \{ and \}
1877  */
1878 void
cpycode(void)1879 cpycode(void)
1880 {
1881 
1882 	long c;
1883 
1884 	c = Bgetrune(finput);
1885 	if(c == '\n') {
1886 		c = Bgetrune(finput);
1887 		lineno++;
1888 	}
1889 	Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1890 	while(c != Beof) {
1891 		if(c == '\\') {
1892 			if((c=Bgetrune(finput)) == '}')
1893 				return;
1894 			Bputc(ftable, '\\');
1895 		}
1896 		if(c == '%') {
1897 			if((c=Bgetrune(finput)) == '}')
1898 				return;
1899 			Bputc(ftable, '%');
1900 		}
1901 		Bputrune(ftable, c);
1902 		if(c == '\n')
1903 			lineno++;
1904 		c = Bgetrune(finput);
1905 	}
1906 	error("eof before %%}");
1907 }
1908 
1909 /*
1910  * skip over comments
1911  * skipcom is called after reading a '/'
1912  */
1913 int
skipcom(void)1914 skipcom(void)
1915 {
1916 	long c;
1917 	int i;
1918 
1919 	/* i is the number of lines skipped */
1920 	i = 0;
1921 	c = Bgetrune(finput);
1922 	if(c == '/'){			/* C++ //: skip to end of line */
1923 		while((c = Bgetrune(finput)) != Beof)
1924 			if(c == '\n')
1925 				return 1;
1926 	}else if(c == '*'){		/* normal C comment */
1927 		while((c = Bgetrune(finput)) != Beof) {
1928 			while(c == '*')
1929 				if((c = Bgetrune(finput)) == '/')
1930 					return i;
1931 			if(c == '\n')
1932 				i++;
1933 		}
1934 	}else
1935 		error("illegal comment");
1936 
1937 	error("EOF inside comment");
1938 	return 0;
1939 }
1940 
1941 /*
1942  * copy C action to the next ; or closing }
1943  */
1944 void
cpyact(int offset)1945 cpyact(int offset)
1946 {
1947 	long c;
1948 	int brac, match, j, s, fnd, tok;
1949 
1950 	Bprint(faction, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1951 	brac = 0;
1952 
1953 loop:
1954 	c = Bgetrune(finput);
1955 swt:
1956 	switch(c) {
1957 	case ';':
1958 		if(brac == 0) {
1959 			Bputrune(faction, c);
1960 			return;
1961 		}
1962 		goto lcopy;
1963 
1964 	case '{':
1965 		brac++;
1966 		goto lcopy;
1967 
1968 	case '$':
1969 		s = 1;
1970 		tok = -1;
1971 		c = Bgetrune(finput);
1972 
1973 		/* type description */
1974 		if(c == '<') {
1975 			Bungetrune(finput);
1976 			if(gettok() != TYPENAME)
1977 				error("bad syntax on $<ident> clause");
1978 			tok = numbval;
1979 			c = Bgetrune(finput);
1980 		}
1981 		if(c == '$') {
1982 			Bprint(faction, "yyval");
1983 
1984 			/* put out the proper tag... */
1985 			if(ntypes) {
1986 				if(tok < 0)
1987 					tok = fdtype(*prdptr[nprod]);
1988 				Bprint(faction, ".%s", typeset[tok]);
1989 			}
1990 			goto loop;
1991 		}
1992 		if(c == '-') {
1993 			s = -s;
1994 			c = Bgetrune(finput);
1995 		}
1996 		if(isdigit(c)) {
1997 			j = 0;
1998 			while(isdigit(c)) {
1999 				j = j*10 + (c-'0');
2000 				c = Bgetrune(finput);
2001 			}
2002 			Bungetrune(finput);
2003 			j = j*s - offset;
2004 			if(j > 0)
2005 				error("Illegal use of $%d", j+offset);
2006 
2007 		dollar:
2008 			Bprint(faction, "yypt[-%d].yyv", -j);
2009 
2010 			/* put out the proper tag */
2011 			if(ntypes) {
2012 				if(j+offset <= 0 && tok < 0)
2013 					error("must specify type of $%d", j+offset);
2014 				if(tok < 0)
2015 					tok = fdtype(prdptr[nprod][j+offset]);
2016 				Bprint(faction, ".%s", typeset[tok]);
2017 			}
2018 			goto loop;
2019 		}
2020 		if(isupper(c) || islower(c) || c == '_' || c == '.') {
2021 			int tok; /* tok used oustide for type info */
2022 
2023 			/* look for $name */
2024 			Bungetrune(finput);
2025 			if(gettok() != IDENTIFIER)
2026 				error("$ must be followed by an identifier");
2027 			tok = chfind(2, tokname);
2028 			if((c = Bgetrune(finput)) != '#') {
2029 				Bungetrune(finput);
2030 				fnd = -1;
2031 			} else
2032 				if(gettok() != NUMBER) {
2033 					error("# must be followed by number");
2034 					fnd = -1;
2035 				} else
2036 					fnd = numbval;
2037 			for(j=1; j<=offset; ++j)
2038 				if(tok == prdptr[nprod][j]) {
2039 					if(--fnd <= 0) {
2040 						j -= offset;
2041 						goto dollar;
2042 					}
2043 				}
2044 			error("$name or $name#number not found");
2045 		}
2046 		Bputc(faction, '$');
2047 		if(s < 0 )
2048 			Bputc(faction, '-');
2049 		goto swt;
2050 
2051 	case '}':
2052 		brac--;
2053 		if(brac)
2054 			goto lcopy;
2055 		Bputrune(faction, c);
2056 		return;
2057 
2058 	case '/':
2059 		/* look for comments */
2060 		Bputrune(faction, c);
2061 		c = Bgetrune(finput);
2062 		if(c != '*')
2063 			goto swt;
2064 
2065 		/* it really is a comment; copy it */
2066 		Bputrune(faction, c);
2067 		c = Bgetrune(finput);
2068 		while(c >= 0) {
2069 			while(c == '*') {
2070 				Bputrune(faction, c);
2071 				if((c=Bgetrune(finput)) == '/')
2072 					goto lcopy;
2073 			}
2074 			Bputrune(faction, c);
2075 			if(c == '\n')
2076 				lineno++;
2077 			c = Bgetrune(finput);
2078 		}
2079 		error("EOF inside comment");
2080 
2081 	case '\'':
2082 		/* character constant */
2083 		match = '\'';
2084 		goto string;
2085 
2086 	case '"':
2087 		/* character string */
2088 		match = '"';
2089 
2090 	string:
2091 		Bputrune(faction, c);
2092 		while(c = Bgetrune(finput)) {
2093 			if(c == '\\') {
2094 				Bputrune(faction, c);
2095 				c = Bgetrune(finput);
2096 				if(c == '\n')
2097 					lineno++;
2098 			} else {
2099 				if(c == match)
2100 					goto lcopy;
2101 				if(c == '\n')
2102 					error("newline in string or char. const.");
2103 			}
2104 			Bputrune(faction, c);
2105 		}
2106 		error("EOF in string or character constant");
2107 
2108 	case Beof:
2109 		error("action does not terminate");
2110 
2111 	case '\n':
2112 		lineno++;
2113 		goto lcopy;
2114 	}
2115 
2116 lcopy:
2117 	Bputrune(faction, c);
2118 	goto loop;
2119 }
2120 
2121 void
openup(char * stem,int dflag,int vflag,int ytab,char * ytabc)2122 openup(char *stem, int dflag, int vflag, int ytab, char *ytabc)
2123 {
2124 	char buf[256];
2125 
2126 	if(vflag) {
2127 		snprint(buf, sizeof buf, "%s.%s", stem, FILEU);
2128 		foutput = Bopen(buf, OWRITE);
2129 		if(foutput == 0)
2130 			error("cannot open %s", buf);
2131 	}
2132 	if(yydebug) {
2133 		snprint(buf, sizeof buf, "%s.%s", stem, FILEDEBUG);
2134 		if((fdebug = Bopen(buf, OWRITE)) == 0)
2135 			error("can't open %s", buf);
2136 	}
2137 	if(dflag) {
2138 		snprint(buf, sizeof buf, "%s.%s", stem, FILED);
2139 		fdefine = Bopen(buf, OWRITE);
2140 		if(fdefine == 0)
2141 			error("can't create %s", buf);
2142 	}
2143 	if(ytab == 0)
2144 		snprint(buf, sizeof buf, "%s.%s", stem, OFILE);
2145 	else
2146 		strecpy(buf, buf+sizeof buf, ytabc);
2147 	ftable = Bopen(buf, OWRITE);
2148 	if(ftable == 0)
2149 		error("cannot open table file %s", buf);
2150 }
2151 
2152 /*
2153  * print the output for the states
2154  */
2155 void
output(void)2156 output(void)
2157 {
2158 	int i, k, c;
2159 	Wset *u, *v;
2160 
2161 	Bprint(ftable, "short	yyexca[] =\n{");
2162 	if(fdebug)
2163 		Bprint(fdebug, "char*	yystates[] =\n{\n");
2164 
2165 	/* output the stuff for state i */
2166 	SLOOP(i) {
2167 		nolook = tystate[i]!=MUSTLOOKAHEAD;
2168 		closure(i);
2169 
2170 		/* output actions */
2171 		nolook = 1;
2172 		aryfil(temp1, ntokens+nnonter+1, 0);
2173 		WSLOOP(wsets, u) {
2174 			c = *(u->pitem);
2175 			if(c > 1 && c < NTBASE && temp1[c] == 0) {
2176 				WSLOOP(u, v)
2177 					if(c == *(v->pitem))
2178 						putitem(v->pitem+1, (Lkset*)0);
2179 				temp1[c] = state(c);
2180 			} else
2181 				if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0)
2182 					temp1[c+ntokens] = amem[indgo[i]+c];
2183 		}
2184 		if(i == 1)
2185 			temp1[1] = ACCEPTCODE;
2186 
2187 		/* now, we have the shifts; look at the reductions */
2188 		lastred = 0;
2189 		WSLOOP(wsets, u) {
2190 			c = *u->pitem;
2191 
2192 			/* reduction */
2193 			if(c <= 0) {
2194 				lastred = -c;
2195 				TLOOP(k)
2196 					if(BIT(u->ws.lset, k)) {
2197 						if(temp1[k] == 0)
2198 							temp1[k] = c;
2199 						else
2200 						if(temp1[k] < 0) { /* reduce/reduce conflict */
2201 							if(foutput)
2202 								Bprint(foutput,
2203 									"\n%d: reduce/reduce conflict"
2204 									" (red'ns %d and %d ) on %s",
2205 									i, -temp1[k], lastred,
2206 									symnam(k));
2207 							if(-temp1[k] > lastred)
2208 								temp1[k] = -lastred;
2209 							zzrrconf++;
2210 						} else
2211 							/* potential shift/reduce conflict */
2212 							precftn( lastred, k, i );
2213 					}
2214 				}
2215 		}
2216 		wract(i);
2217 	}
2218 
2219 	if(fdebug)
2220 		Bprint(fdebug, "};\n");
2221 	Bprint(ftable, "};\n");
2222 	Bprint(ftable, "#define	YYNPROD	%d\n", nprod);
2223 	Bprint(ftable, "#define	YYPRIVATE %d\n", PRIVATE);
2224 	if(yydebug)
2225 		Bprint(ftable, "#define	yydebug	%s\n", yydebug);
2226 }
2227 
2228 /*
2229  * pack state i from temp1 into amem
2230  */
2231 int
apack(int * p,int n)2232 apack(int *p, int n)
2233 {
2234 	int *pp, *qq, *rr, off, *q, *r;
2235 
2236 	/* we don't need to worry about checking because
2237 	 * we will only look at entries known to be there...
2238 	 * eliminate leading and trailing 0's
2239 	 */
2240 
2241 	q = p+n;
2242 	for(pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off)
2243 		;
2244  	/* no actions */
2245 	if(pp > q)
2246 		return 0;
2247 	p = pp;
2248 
2249 	/* now, find a place for the elements from p to q, inclusive */
2250 	r = &amem[ACTSIZE-1];
2251 	for(rr = amem; rr <= r; rr++, off++) {
2252 		for(qq = rr, pp = p; pp <= q; pp++, qq++)
2253 			if(*pp != 0)
2254 				if(*pp != *qq && *qq != 0)
2255 					goto nextk;
2256 
2257 		/* we have found an acceptable k */
2258 		if(pkdebug && foutput != 0)
2259 			Bprint(foutput, "off = %d, k = %d\n", off, (int)(rr-amem));
2260 		for(qq = rr, pp = p; pp <= q; pp++, qq++)
2261 			if(*pp) {
2262 				if(qq > r)
2263 					error("action table overflow");
2264 				if(qq > memp)
2265 					memp = qq;
2266 				*qq = *pp;
2267 			}
2268 		if(pkdebug && foutput != 0)
2269 			for(pp = amem; pp <= memp; pp += 10) {
2270 				Bprint(foutput, "\t");
2271 				for(qq = pp; qq <= pp+9; qq++)
2272 					Bprint(foutput, "%d ", *qq);
2273 				Bprint(foutput, "\n");
2274 			}
2275 		return(off);
2276 	nextk:;
2277 	}
2278 	error("no space in action table");
2279 	return 0;
2280 }
2281 
2282 /*
2283  * output the gotos for the nontermninals
2284  */
2285 void
go2out(void)2286 go2out(void)
2287 {
2288 	int i, j, k, best, count, cbest, times;
2289 
2290 	/* mark begining of gotos */
2291 	Bprint(ftemp, "$\n");
2292 	for(i = 1; i <= nnonter; i++) {
2293 		go2gen(i);
2294 
2295 		/* find the best one to make default */
2296 		best = -1;
2297 		times = 0;
2298 
2299 		/* is j the most frequent */
2300 		for(j = 0; j <= nstate; j++) {
2301 			if(tystate[j] == 0)
2302 				continue;
2303 			if(tystate[j] == best)
2304 				continue;
2305 
2306 			/* is tystate[j] the most frequent */
2307 			count = 0;
2308 			cbest = tystate[j];
2309 			for(k = j; k <= nstate; k++)
2310 				if(tystate[k] == cbest)
2311 					count++;
2312 			if(count > times) {
2313 				best = cbest;
2314 				times = count;
2315 			}
2316 		}
2317 
2318 		/* best is now the default entry */
2319 		zzgobest += times-1;
2320 		for(j = 0; j <= nstate; j++)
2321 			if(tystate[j] != 0 && tystate[j] != best) {
2322 				Bprint(ftemp, "%d,%d,", j, tystate[j]);
2323 				zzgoent++;
2324 			}
2325 
2326 		/* now, the default */
2327 		if(best == -1)
2328 			best = 0;
2329 		zzgoent++;
2330 		Bprint(ftemp, "%d\n", best);
2331 	}
2332 }
2333 
2334 /*
2335  * output the gotos for nonterminal c
2336  */
2337 void
go2gen(int c)2338 go2gen(int c)
2339 {
2340 	int i, work, cc;
2341 	Item *p, *q;
2342 
2343 
2344 	/* first, find nonterminals with gotos on c */
2345 	aryfil(temp1, nnonter+1, 0);
2346 	temp1[c] = 1;
2347 	work = 1;
2348 	while(work) {
2349 		work = 0;
2350 		PLOOP(0, i)
2351 
2352 			/* cc is a nonterminal */
2353 			if((cc=prdptr[i][1]-NTBASE) >= 0)
2354 				/* cc has a goto on c */
2355 				if(temp1[cc] != 0) {
2356 
2357 					/* thus, the left side of production i does too */
2358 					cc = *prdptr[i]-NTBASE;
2359 					if(temp1[cc] == 0) {
2360 						  work = 1;
2361 						  temp1[cc] = 1;
2362 					}
2363 				}
2364 	}
2365 
2366 	/* now, we have temp1[c] = 1 if a goto on c in closure of cc */
2367 	if(g2debug && foutput != 0) {
2368 		Bprint(foutput, "%s: gotos on ", nontrst[c].name);
2369 		NTLOOP(i)
2370 			if(temp1[i])
2371 				Bprint(foutput, "%s ", nontrst[i].name);
2372 		Bprint(foutput, "\n");
2373 	}
2374 
2375 	/* now, go through and put gotos into tystate */
2376 	aryfil(tystate, nstate, 0);
2377 	SLOOP(i)
2378 		ITMLOOP(i, p, q)
2379 			if((cc = *p->pitem) >= NTBASE)
2380 				/* goto on c is possible */
2381 				if(temp1[cc-NTBASE]) {
2382 					tystate[i] = amem[indgo[i]+c];
2383 					break;
2384 				}
2385 }
2386 
2387 /*
2388  * decide a shift/reduce conflict by precedence.
2389  * r is a rule number, t a token number
2390  * the conflict is in state s
2391  * temp1[t] is changed to reflect the action
2392  */
2393 void
precftn(int r,int t,int s)2394 precftn(int r, int t, int s)
2395 {
2396 	int lp, lt, action;
2397 
2398 	lp = levprd[r];
2399 	lt = toklev[t];
2400 	if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) {
2401 
2402 		/* conflict */
2403 		if(foutput != 0)
2404 			Bprint(foutput,
2405 				"\n%d: shift/reduce conflict (shift %d(%d), red'n %d(%d)) on %s",
2406 				s, temp1[t], PLEVEL(lt), r, PLEVEL(lp), symnam(t));
2407 		zzsrconf++;
2408 		return;
2409 	}
2410 	if(PLEVEL(lt) == PLEVEL(lp))
2411 		action = ASSOC(lt);
2412 	else
2413 		if(PLEVEL(lt) > PLEVEL(lp))
2414 			action = RASC;  /* shift */
2415 		else
2416 			action = LASC;  /* reduce */
2417 	switch(action) {
2418 	case BASC:  /* error action */
2419 		temp1[t] = ERRCODE;
2420 		break;
2421 	case LASC:  /* reduce */
2422 		temp1[t] = -r;
2423 		break;
2424 	}
2425 }
2426 
2427 /*
2428  * output state i
2429  * temp1 has the actions, lastred the default
2430  */
2431 void
wract(int i)2432 wract(int i)
2433 {
2434 	int p, p0, p1, ntimes, tred, count, j, flag;
2435 
2436 	/* find the best choice for lastred */
2437 	lastred = 0;
2438 	ntimes = 0;
2439 	TLOOP(j) {
2440 		if(temp1[j] >= 0)
2441 			continue;
2442 		if(temp1[j]+lastred == 0)
2443 			continue;
2444 		/* count the number of appearances of temp1[j] */
2445 		count = 0;
2446 		tred = -temp1[j];
2447 		levprd[tred] |= REDFLAG;
2448 		TLOOP(p)
2449 			if(temp1[p]+tred == 0)
2450 				count++;
2451 		if(count > ntimes) {
2452 			lastred = tred;
2453 			ntimes = count;
2454 		}
2455 	}
2456 
2457 	/*
2458 	 * for error recovery, arrange that, if there is a shift on the
2459 	 * error recovery token, `error', that the default be the error action
2460 	 */
2461 	if(temp1[2] > 0)
2462 		lastred = 0;
2463 
2464 	/* clear out entries in temp1 which equal lastred */
2465 	TLOOP(p)
2466 		if(temp1[p]+lastred == 0)
2467 			temp1[p] = 0;
2468 
2469 	wrstate(i);
2470 	defact[i] = lastred;
2471 	flag = 0;
2472 	TLOOP(p0)
2473 		if((p1=temp1[p0]) != 0) {
2474 			if(p1 < 0) {
2475 				p1 = -p1;
2476 				goto exc;
2477 			}
2478 			if(p1 == ACCEPTCODE) {
2479 				p1 = -1;
2480 				goto exc;
2481 			}
2482 			if(p1 == ERRCODE) {
2483 				p1 = 0;
2484 			exc:
2485 				if(flag++ == 0)
2486 					Bprint(ftable, "-1, %d,\n", i);
2487 				Bprint(ftable, "\t%d, %d,\n", p0, p1);
2488 				zzexcp++;
2489 				continue;
2490 			}
2491 			Bprint(ftemp, "%d,%d,", p0, p1);
2492 			zzacent++;
2493 		}
2494 	if(flag) {
2495 		defact[i] = -2;
2496 		Bprint(ftable, "\t-2, %d,\n", lastred);
2497 	}
2498 	Bprint(ftemp, "\n");
2499 }
2500 
2501 /*
2502  * writes state i
2503  */
2504 void
wrstate(int i)2505 wrstate(int i)
2506 {
2507 	int j0, j1;
2508 	Item *pp, *qq;
2509 	Wset *u;
2510 
2511 	if(fdebug) {
2512 		if(lastred) {
2513 			Bprint(fdebug, "	0, /*%d*/\n", i);
2514 		} else {
2515 			Bprint(fdebug, "	\"");
2516 			ITMLOOP(i, pp, qq)
2517 				Bprint(fdebug, "%s\\n", writem(pp->pitem));
2518 			if(tystate[i] == MUSTLOOKAHEAD)
2519 				WSLOOP(wsets + (pstate[i+1] - pstate[i]), u)
2520 					if(*u->pitem < 0)
2521 						Bprint(fdebug, "%s\\n", writem(u->pitem));
2522 			Bprint(fdebug, "\", /*%d*/\n", i);
2523 		}
2524 	}
2525 	if(foutput == 0)
2526 		return;
2527 	Bprint(foutput, "\nstate %d\n", i);
2528 	ITMLOOP(i, pp, qq)
2529 		Bprint(foutput, "\t%s\n", writem(pp->pitem));
2530 	if(tystate[i] == MUSTLOOKAHEAD)
2531 		/* print out empty productions in closure */
2532 		WSLOOP(wsets+(pstate[i+1]-pstate[i]), u)
2533 			if(*u->pitem < 0)
2534 				Bprint(foutput, "\t%s\n", writem(u->pitem));
2535 
2536 	/* check for state equal to another */
2537 	TLOOP(j0)
2538 		if((j1=temp1[j0]) != 0) {
2539 			Bprint(foutput, "\n\t%s  ", symnam(j0));
2540 			/* shift, error, or accept */
2541 			if(j1 > 0) {
2542 				if(j1 == ACCEPTCODE)
2543 					Bprint(foutput,  "accept");
2544 				else
2545 					if(j1 == ERRCODE)
2546 						Bprint(foutput, "error");
2547 					else
2548 						Bprint(foutput, "shift %d", j1);
2549 			} else
2550 				Bprint(foutput, "reduce %d (src line %d)", -j1, rlines[-j1]);
2551 		}
2552 
2553 	/* output the final production */
2554 	if(lastred)
2555 		Bprint(foutput, "\n\t.  reduce %d (src line %d)\n\n",
2556 			lastred, rlines[lastred]);
2557 	else
2558 		Bprint(foutput, "\n\t.  error\n\n");
2559 
2560 	/* now, output nonterminal actions */
2561 	j1 = ntokens;
2562 	for(j0 = 1; j0 <= nnonter; j0++) {
2563 		j1++;
2564 		if(temp1[j1])
2565 			Bprint(foutput, "\t%s  goto %d\n", symnam(j0+NTBASE), temp1[j1]);
2566 	}
2567 }
2568 
2569 void
warray(char * s,int * v,int n)2570 warray(char *s, int *v, int n)
2571 {
2572 	int i;
2573 
2574 	Bprint(ftable, "short	%s[] =\n{", s);
2575 	for(i=0;;) {
2576 		if(i%10 == 0)
2577 			Bprint(ftable, "\n");
2578 		Bprint(ftable, "%4d", v[i]);
2579 		i++;
2580 		if(i >= n) {
2581 			Bprint(ftable, "\n};\n");
2582 			break;
2583 		}
2584 		Bprint(ftable, ",");
2585 	}
2586 }
2587 
2588 /*
2589  * in order to free up the mem and amem arrays for the optimizer,
2590  * and still be able to output yyr1, etc., after the sizes of
2591  * the action array is known, we hide the nonterminals
2592  * derived by productions in levprd.
2593  */
2594 
2595 void
hideprod(void)2596 hideprod(void)
2597 {
2598 	int i, j;
2599 
2600 	j = 0;
2601 	levprd[0] = 0;
2602 	PLOOP(1, i) {
2603 		if(!(levprd[i] & REDFLAG)) {
2604 			j++;
2605 			if(foutput != 0)
2606 				Bprint(foutput, "Rule not reduced:   %s\n", writem(prdptr[i]));
2607 		}
2608 		levprd[i] = *prdptr[i] - NTBASE;
2609 	}
2610 	if(j)
2611 		print("%d rules never reduced\n", j);
2612 }
2613 
2614 void
callopt(void)2615 callopt(void)
2616 {
2617 	int i, *p, j, k, *q;
2618 
2619 	/* read the arrays from tempfile and set parameters */
2620 	finput = Bopen(tempname, OREAD);
2621 	if(finput == 0)
2622 		error("optimizer cannot open tempfile");
2623 
2624 	pgo[0] = 0;
2625 	temp1[0] = 0;
2626 	nstate = 0;
2627 	nnonter = 0;
2628 	for(;;) {
2629 		switch(gtnm()) {
2630 		case '\n':
2631 			nstate++;
2632 			pmem--;
2633 			temp1[nstate] = pmem - mem0;
2634 		case ',':
2635 			continue;
2636 		case '$':
2637 			break;
2638 		default:
2639 			error("bad tempfile");
2640 		}
2641 		break;
2642 	}
2643 
2644 	pmem--;
2645 	temp1[nstate] = yypgo[0] = pmem - mem0;
2646 	for(;;) {
2647 		switch(gtnm()) {
2648 		case '\n':
2649 			nnonter++;
2650 			yypgo[nnonter] = pmem-mem0;
2651 		case ',':
2652 			continue;
2653 		case -1:
2654 			break;
2655 		default:
2656 			error("bad tempfile");
2657 		}
2658 		break;
2659 	}
2660 	pmem--;
2661 	yypgo[nnonter--] = pmem - mem0;
2662 	for(i = 0; i < nstate; i++) {
2663 		k = 32000;
2664 		j = 0;
2665 		q = mem0 + temp1[i+1];
2666 		for(p = mem0 + temp1[i]; p < q ; p += 2) {
2667 			if(*p > j)
2668 				j = *p;
2669 			if(*p < k)
2670 				k = *p;
2671 		}
2672 		/* nontrivial situation */
2673 		if(k <= j) {
2674 			/* j is now the range */
2675 /*			j -= k;			/* call scj */
2676 			if(k > maxoff)
2677 				maxoff = k;
2678 		}
2679 		tystate[i] = (temp1[i+1]-temp1[i]) + 2*j;
2680 		if(j > maxspr)
2681 			maxspr = j;
2682 	}
2683 
2684 	/* initialize ggreed table */
2685 	for(i = 1; i <= nnonter; i++) {
2686 		ggreed[i] = 1;
2687 		j = 0;
2688 
2689 		/* minimum entry index is always 0 */
2690 		q = mem0 + yypgo[i+1] - 1;
2691 		for(p = mem0+yypgo[i]; p < q ; p += 2) {
2692 			ggreed[i] += 2;
2693 			if(*p > j)
2694 				j = *p;
2695 		}
2696 		ggreed[i] = ggreed[i] + 2*j;
2697 		if(j > maxoff)
2698 			maxoff = j;
2699 	}
2700 
2701 	/* now, prepare to put the shift actions into the amem array */
2702 	for(i = 0; i < ACTSIZE; i++)
2703 		amem[i] = 0;
2704 	maxa = amem;
2705 	for(i = 0; i < nstate; i++) {
2706 		if(tystate[i] == 0 && adb > 1)
2707 			Bprint(ftable, "State %d: null\n", i);
2708 		indgo[i] = YYFLAG1;
2709 	}
2710 	while((i = nxti()) != NOMORE)
2711 		if(i >= 0)
2712 			stin(i);
2713 		else
2714 			gin(-i);
2715 
2716 	/* print amem array */
2717 	if(adb > 2 )
2718 		for(p = amem; p <= maxa; p += 10) {
2719 			Bprint(ftable, "%4d  ", (int)(p-amem));
2720 			for(i = 0; i < 10; ++i)
2721 				Bprint(ftable, "%4d  ", p[i]);
2722 			Bprint(ftable, "\n");
2723 		}
2724 
2725 	/* write out the output appropriate to the language */
2726 	aoutput();
2727 	osummary();
2728 	ZAPFILE(tempname);
2729 }
2730 
2731 void
gin(int i)2732 gin(int i)
2733 {
2734 	int *p, *r, *s, *q1, *q2;
2735 
2736 	/* enter gotos on nonterminal i into array amem */
2737 	ggreed[i] = 0;
2738 
2739 	q2 = mem0+ yypgo[i+1] - 1;
2740 	q1 = mem0 + yypgo[i];
2741 
2742 	/* now, find amem place for it */
2743 	for(p = amem; p < &amem[ACTSIZE]; p++) {
2744 		if(*p)
2745 			continue;
2746 		for(r = q1; r < q2; r += 2) {
2747 			s = p + *r + 1;
2748 			if(*s)
2749 				goto nextgp;
2750 			if(s > maxa)
2751 				if((maxa = s) > &amem[ACTSIZE])
2752 					error("a array overflow");
2753 		}
2754 		/* we have found amem spot */
2755 		*p = *q2;
2756 		if(p > maxa)
2757 			if((maxa = p) > &amem[ACTSIZE])
2758 				error("a array overflow");
2759 		for(r = q1; r < q2; r += 2) {
2760 			s = p + *r + 1;
2761 			*s = r[1];
2762 		}
2763 		pgo[i] = p-amem;
2764 		if(adb > 1)
2765 			Bprint(ftable, "Nonterminal %d, entry at %d\n", i, pgo[i]);
2766 		return;
2767 
2768 	nextgp:;
2769 	}
2770 	error("cannot place goto %d\n", i);
2771 }
2772 
2773 void
stin(int i)2774 stin(int i)
2775 {
2776 	int *r, *s, n, flag, j, *q1, *q2;
2777 
2778 	tystate[i] = 0;
2779 
2780 	/* enter state i into the amem array */
2781 	q2 = mem0+temp1[i+1];
2782 	q1 = mem0+temp1[i];
2783 	/* find an acceptable place */
2784 	for(n = -maxoff; n < ACTSIZE; n++) {
2785 		flag = 0;
2786 		for(r = q1; r < q2; r += 2) {
2787 			if((s = *r + n + amem) < amem)
2788 				goto nextn;
2789 			if(*s == 0)
2790 				flag++;
2791 			else
2792 				if(*s != r[1])
2793 					goto nextn;
2794 		}
2795 
2796 		/* check that the position equals another only if the states are identical */
2797 		for(j=0; j<nstate; j++) {
2798 			if(indgo[j] == n) {
2799 
2800 				/* we have some disagreement */
2801 				if(flag)
2802 					goto nextn;
2803 				if(temp1[j+1]+temp1[i] == temp1[j]+temp1[i+1]) {
2804 
2805 					/* states are equal */
2806 					indgo[i] = n;
2807 					if(adb > 1)
2808 						Bprint(ftable,
2809 						"State %d: entry at %d equals state %d\n",
2810 						i, n, j);
2811 					return;
2812 				}
2813 
2814 				/* we have some disagreement */
2815 				goto nextn;
2816 			}
2817 		}
2818 
2819 		for(r = q1; r < q2; r += 2) {
2820 			if((s = *r+n+amem) >= &amem[ACTSIZE])
2821 				error("out of space in optimizer a array");
2822 			if(s > maxa)
2823 				maxa = s;
2824 			if(*s != 0 && *s != r[1])
2825 				error("clobber of a array, pos'n %d, by %d", s-amem, r[1]);
2826 			*s = r[1];
2827 		}
2828 		indgo[i] = n;
2829 		if(adb > 1)
2830 			Bprint(ftable, "State %d: entry at %d\n", i, indgo[i]);
2831 		return;
2832 	nextn:;
2833 	}
2834 	error("Error; failure to place state %d\n", i);
2835 }
2836 
2837 /*
2838  * finds the next i
2839  */
2840 int
nxti(void)2841 nxti(void)
2842 {
2843 	int i, max, maxi;
2844 
2845 	max = 0;
2846 	maxi = 0;
2847 	for(i = 1; i <= nnonter; i++)
2848 		if(ggreed[i] >= max) {
2849 			max = ggreed[i];
2850 			maxi = -i;
2851 		}
2852 	for(i = 0; i < nstate; ++i)
2853 		if(tystate[i] >= max) {
2854 			max = tystate[i];
2855 			maxi = i;
2856 		}
2857 	if(nxdb)
2858 		Bprint(ftable, "nxti = %d, max = %d\n", maxi, max);
2859 	if(max == 0)
2860 		return NOMORE;
2861 	return maxi;
2862 }
2863 
2864 /*
2865  * write summary
2866  */
2867 void
osummary(void)2868 osummary(void)
2869 {
2870 
2871 	int i, *p;
2872 
2873 	if(foutput == 0)
2874 		return;
2875 	i = 0;
2876 	for(p = maxa; p >= amem; p--)
2877 		if(*p == 0)
2878 			i++;
2879 
2880 	Bprint(foutput, "Optimizer space used: input %d/%d, output %d/%d\n",
2881 		(int)(pmem-mem0+1), MEMSIZE, (int)(maxa-amem+1), ACTSIZE);
2882 	Bprint(foutput, "%d table entries, %d zero\n", (int)(maxa-amem+1), i);
2883 	Bprint(foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff);
2884 }
2885 
2886 /*
2887  * this version is for C
2888  * write out the optimized parser
2889  */
2890 void
aoutput(void)2891 aoutput(void)
2892 {
2893 	Bprint(ftable, "#define\tYYLAST\t%d\n", (int)(maxa-amem+1));
2894 	arout("yyact", amem, (maxa-amem)+1);
2895 	arout("yypact", indgo, nstate);
2896 	arout("yypgo", pgo, nnonter+1);
2897 }
2898 
2899 void
arout(char * s,int * v,int n)2900 arout(char *s, int *v, int n)
2901 {
2902 	int i;
2903 
2904 	Bprint(ftable, "short	%s[] =\n{", s);
2905 	for(i = 0; i < n;) {
2906 		if(i%10 == 0)
2907 			Bprint(ftable, "\n");
2908 		Bprint(ftable, "%4d", v[i]);
2909 		i++;
2910 		if(i == n)
2911 			Bprint(ftable, "\n};\n");
2912 		else
2913 			Bprint(ftable, ",");
2914 	}
2915 }
2916 
2917 /*
2918  * read and convert an integer from the standard input
2919  * return the terminating character
2920  * blanks, tabs, and newlines are ignored
2921  */
2922 int
gtnm(void)2923 gtnm(void)
2924 {
2925 	int sign, val, c;
2926 
2927 	sign = 0;
2928 	val = 0;
2929 	while((c=Bgetrune(finput)) != Beof) {
2930 		if(isdigit(c)) {
2931 			val = val*10 + c-'0';
2932 			continue;
2933 		}
2934 		if(c == '-') {
2935 			sign = 1;
2936 			continue;
2937 		}
2938 		break;
2939 	}
2940 	if(sign)
2941 		val = -val;
2942 	*pmem++ = val;
2943 	if(pmem >= &mem0[MEMSIZE])
2944 		error("out of space");
2945 	return c;
2946 }
2947